1 /*
2  *  Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
3  *  Copyright (C) 2007-2013 Sourcefire, Inc.
4  *
5  *  Authors: Tomasz Kojm
6  *
7  *  This program is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License version 2 as
9  *  published by the Free Software Foundation.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, write to the Free Software
18  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  *  MA 02110-1301, USA.
20  */
21 
22 #if HAVE_CONFIG_H
23 #include "clamav-config.h"
24 #endif
25 
26 #ifndef _WIN32
27 #include <sys/time.h>
28 #endif
29 #include <stdio.h>
30 #include <string.h>
31 #include <stdlib.h>
32 #include <libgen.h>
33 #include <errno.h>
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <stdbool.h>
37 #ifdef HAVE_UNISTD_H
38 #include <unistd.h>
39 #endif
40 #ifdef HAVE_SYS_PARAM_H
41 #include <sys/param.h>
42 #endif
43 #include <fcntl.h>
44 #include <dirent.h>
45 #ifdef HAVE_SYS_TIMES_H
46 #include <sys/times.h>
47 #endif
48 
49 #define DCONF_ARCH ctx->dconf->archive
50 #define DCONF_DOC ctx->dconf->doc
51 #define DCONF_MAIL ctx->dconf->mail
52 #define DCONF_OTHER ctx->dconf->other
53 
54 #include <zlib.h>
55 
56 #include "clamav.h"
57 #include "others.h"
58 #include "dconf.h"
59 #include "scanners.h"
60 #include "matcher-ac.h"
61 #include "matcher-bm.h"
62 #include "matcher.h"
63 #include "ole2_extract.h"
64 #include "vba_extract.h"
65 #include "xlm_extract.h"
66 #include "msexpand.h"
67 #include "mbox.h"
68 #include "libmspack.h"
69 #include "pe.h"
70 #include "elf.h"
71 #include "filetypes.h"
72 #include "htmlnorm.h"
73 #include "untar.h"
74 #include "special.h"
75 #include "binhex.h"
76 /* #include "uuencode.h" */
77 #include "tnef.h"
78 #include "sis.h"
79 #include "pdf.h"
80 #include "str.h"
81 #include "entconv.h"
82 #include "rtf.h"
83 #include "unarj.h"
84 #include "nsis/nulsft.h"
85 #include "autoit.h"
86 #include "textnorm.h"
87 #include "unzip.h"
88 #include "dlp.h"
89 #include "default.h"
90 #include "cpio.h"
91 #include "macho.h"
92 #include "ishield.h"
93 #include "7z_iface.h"
94 #include "fmap.h"
95 #include "cache.h"
96 #include "events.h"
97 #include "swf.h"
98 #include "jpeg.h"
99 #include "gif.h"
100 #include "png.h"
101 #include "iso9660.h"
102 #include "dmg.h"
103 #include "xar.h"
104 #include "hfsplus.h"
105 #include "xz_iface.h"
106 #include "mbr.h"
107 #include "gpt.h"
108 #include "apm.h"
109 #include "ooxml.h"
110 #include "xdp.h"
111 #include "json_api.h"
112 #include "msxml.h"
113 #include "tiff.h"
114 #include "hwp.h"
115 #include "msdoc.h"
116 #include "execs.h"
117 #include "egg.h"
118 
119 // libclamunrar_iface
120 #include "unrar_iface.h"
121 
122 #ifdef HAVE_BZLIB_H
123 #include <bzlib.h>
124 #endif
125 
126 #include <fcntl.h>
127 #include <string.h>
128 
cli_magic_scan_dir(const char * dirname,cli_ctx * ctx)129 cl_error_t cli_magic_scan_dir(const char *dirname, cli_ctx *ctx)
130 {
131     cl_error_t status = CL_CLEAN;
132     DIR *dd           = NULL;
133     struct dirent *dent;
134     STATBUF statbuf;
135     char *fname                      = NULL;
136     unsigned int viruses_found       = 0;
137     bool processing_normalized_files = ctx->next_layer_is_normalized;
138 
139     if ((dd = opendir(dirname)) != NULL) {
140         while ((dent = readdir(dd))) {
141             if (dent->d_ino) {
142                 if (strcmp(dent->d_name, ".") && strcmp(dent->d_name, "..")) {
143                     /* build the full name */
144                     fname = cli_malloc(strlen(dirname) + strlen(dent->d_name) + 2);
145                     if (!fname) {
146                         cli_dbgmsg("cli_magic_scan_dir: Unable to allocate memory for filename\n");
147                         status = CL_EMEM;
148                         goto done;
149                     }
150 
151                     sprintf(fname, "%s" PATHSEP "%s", dirname, dent->d_name);
152 
153                     /* stat the file */
154                     if (LSTAT(fname, &statbuf) != -1) {
155                         if (S_ISDIR(statbuf.st_mode) && !S_ISLNK(statbuf.st_mode)) {
156                             if (cli_magic_scan_dir(fname, ctx) == CL_VIRUS) {
157                                 if (SCAN_ALLMATCHES) {
158                                     viruses_found++;
159                                     continue;
160                                 }
161 
162                                 status = CL_VIRUS;
163                                 goto done;
164                             }
165                         } else {
166                             if (S_ISREG(statbuf.st_mode)) {
167                                 ctx->next_layer_is_normalized = processing_normalized_files; // This flag ingested by cli_recursion_stack_push().
168                                 if (cli_magic_scan_file(fname, ctx, dent->d_name) == CL_VIRUS) {
169                                     if (SCAN_ALLMATCHES) {
170                                         viruses_found++;
171                                         continue;
172                                     }
173 
174                                     status = CL_VIRUS;
175                                     goto done;
176                                 }
177                             }
178                         }
179                     }
180                     free(fname);
181                     fname = NULL;
182                 }
183             }
184         }
185     } else {
186         cli_dbgmsg("cli_magic_scan_dir: Can't open directory %s.\n", dirname);
187         status = CL_EOPEN;
188         goto done;
189     }
190 
191 done:
192     ctx->next_layer_is_normalized = false;
193     if (NULL != dd) {
194         closedir(dd);
195     }
196     if (NULL != fname) {
197         free(fname);
198     }
199 
200     if (SCAN_ALLMATCHES && viruses_found)
201         status = CL_VIRUS;
202 
203     return status;
204 }
205 
206 /**
207  * @brief  Scan the metadata using cli_matchmeta()
208  *
209  * @param metadata  unrar metadata structure
210  * @param ctx       scanning context structure
211  * @param files
212  * @return cl_error_t  Returns CL_CLEAN if nothing found, CL_VIRUS if something found, CL_EUNPACK if encrypted.
213  */
cli_unrar_scanmetadata(unrar_metadata_t * metadata,cli_ctx * ctx,unsigned int files)214 static cl_error_t cli_unrar_scanmetadata(unrar_metadata_t *metadata, cli_ctx *ctx, unsigned int files)
215 {
216     cl_error_t status = CL_CLEAN;
217 
218     cli_dbgmsg("RAR: %s, crc32: 0x%x, encrypted: %u, compressed: %u, normal: %u, method: %u, ratio: %u\n",
219                metadata->filename, metadata->crc, metadata->encrypted, (unsigned int)metadata->pack_size,
220                (unsigned int)metadata->unpack_size, metadata->method,
221                metadata->pack_size ? (unsigned int)(metadata->unpack_size / metadata->pack_size) : 0);
222 
223     if (CL_VIRUS == cli_matchmeta(ctx, metadata->filename, metadata->pack_size, metadata->unpack_size, metadata->encrypted, files, metadata->crc, NULL)) {
224         status = CL_VIRUS;
225     } else if (SCAN_HEURISTIC_ENCRYPTED_ARCHIVE && metadata->encrypted) {
226         cli_dbgmsg("RAR: Encrypted files found in archive.\n");
227         status = CL_EUNPACK;
228     }
229 
230     return status;
231 }
232 
cli_scanrar_file(const char * filepath,int desc,cli_ctx * ctx)233 static cl_error_t cli_scanrar_file(const char *filepath, int desc, cli_ctx *ctx)
234 {
235     cl_error_t status          = CL_EPARSE;
236     cl_unrar_error_t unrar_ret = UNRAR_ERR;
237 
238     unsigned int file_count    = 0;
239     unsigned int viruses_found = 0;
240 
241     uint32_t nEncryptedFilesFound = 0;
242     uint32_t nTooLargeFilesFound  = 0;
243 
244     void *hArchive = NULL;
245 
246     char *comment         = NULL;
247     uint32_t comment_size = 0;
248 
249     unrar_metadata_t metadata;
250     char *filename_base    = NULL;
251     char *extract_fullpath = NULL;
252     char *comment_fullpath = NULL;
253 
254     UNUSEDPARAM(desc);
255 
256     if (filepath == NULL || ctx == NULL) {
257         cli_dbgmsg("RAR: Invalid arguments!\n");
258         return CL_EARG;
259     }
260 
261     cli_dbgmsg("in scanrar()\n");
262 
263     /* Zero out the metadata struct before we read the header */
264     memset(&metadata, 0, sizeof(unrar_metadata_t));
265 
266     /*
267      * Open the archive.
268      */
269     if (UNRAR_OK != (unrar_ret = cli_unrar_open(filepath, &hArchive, &comment, &comment_size, cli_debug_flag))) {
270         if (unrar_ret == UNRAR_ENCRYPTED) {
271             cli_dbgmsg("RAR: Encrypted main header\n");
272             status = CL_EUNPACK;
273             goto done;
274         }
275         if (unrar_ret == UNRAR_EMEM) {
276             status = CL_EMEM;
277             goto done;
278         } else if (unrar_ret == UNRAR_EOPEN) {
279             status = CL_EOPEN;
280             goto done;
281         } else {
282             status = CL_EFORMAT;
283             goto done;
284         }
285     }
286 
287     /* If the archive header had a comment, write it to the comment dir. */
288     if ((comment != NULL) && (comment_size > 0)) {
289 
290         if (ctx->engine->keeptmp) {
291             int comment_fd = -1;
292             if (!(comment_fullpath = cli_gentemp_with_prefix(ctx->sub_tmpdir, "comments"))) {
293                 status = CL_EMEM;
294                 goto done;
295             }
296 
297             comment_fd = open(comment_fullpath, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0600);
298             if (comment_fd < 0) {
299                 cli_dbgmsg("RAR: ERROR: Failed to open output file\n");
300             } else {
301                 cli_dbgmsg("RAR: Writing the archive comment to temp file: %s\n", comment_fullpath);
302                 if (0 == write(comment_fd, comment, comment_size)) {
303                     cli_dbgmsg("RAR: ERROR: Failed to write to output file\n");
304                 }
305                 close(comment_fd);
306             }
307         }
308 
309         /* Scan the comment */
310         status = cli_magic_scan_buff(comment, comment_size, ctx, NULL);
311 
312         if ((status == CL_VIRUS) && SCAN_ALLMATCHES) {
313             status = CL_CLEAN;
314             viruses_found++;
315         }
316         if ((status == CL_VIRUS) || (status == CL_BREAK)) {
317             goto done;
318         }
319     }
320 
321     /*
322      * Read & scan each file header.
323      * Extract & scan each file.
324      *
325      * Skip files if they will exceed max filesize or max scansize.
326      * Count the number of encrypted file headers and encrypted files.
327      *  - Alert if there are encrypted files,
328      *      if the Heuristic for encrypted archives is enabled,
329      *      and if we have not detected a signature match.
330      */
331     do {
332         status = CL_CLEAN;
333 
334         /* Zero out the metadata struct before we read the header */
335         memset(&metadata, 0, sizeof(unrar_metadata_t));
336 
337         /*
338          * Get the header information for the next file in the archive.
339          */
340         unrar_ret = cli_unrar_peek_file_header(hArchive, &metadata);
341         if (unrar_ret != UNRAR_OK) {
342             if (unrar_ret == UNRAR_ENCRYPTED) {
343                 /* Found an encrypted file header, must skip. */
344                 cli_dbgmsg("RAR: Encrypted file header, unable to reading file metadata and file contents. Skipping file...\n");
345                 nEncryptedFilesFound += 1;
346 
347                 if (UNRAR_OK != cli_unrar_skip_file(hArchive)) {
348                     /* Failed to skip!  Break extraction loop. */
349                     cli_dbgmsg("RAR: Failed to skip file. RAR archive extraction has failed.\n");
350                     break;
351                 }
352             } else if (unrar_ret == UNRAR_BREAK) {
353                 /* No more files. Break extraction loop. */
354                 cli_dbgmsg("RAR: No more files in archive.\n");
355                 break;
356             } else {
357                 /* Memory error or some other error reading the header info. */
358                 cli_dbgmsg("RAR: Error (%u) reading file header!\n", unrar_ret);
359                 break;
360             }
361         } else {
362             file_count += 1;
363 
364             /*
365             * Scan the metadata for the file in question since the content was clean, or we're running in all-match.
366             */
367             status = cli_unrar_scanmetadata(&metadata, ctx, file_count);
368             if ((status == CL_VIRUS) && SCAN_ALLMATCHES) {
369                 status = CL_CLEAN;
370                 viruses_found++;
371             }
372             if ((status == CL_VIRUS) || (status == CL_BREAK)) {
373                 break;
374             }
375 
376             /* Check if we've already exceeded the scan limit */
377             if (cli_checklimits("RAR", ctx, 0, 0, 0))
378                 break;
379 
380             if (metadata.is_dir) {
381                 /* Entry is a directory. Skip. */
382                 cli_dbgmsg("RAR: Found directory. Skipping to next file.\n");
383 
384                 if (UNRAR_OK != cli_unrar_skip_file(hArchive)) {
385                     /* Failed to skip!  Break extraction loop. */
386                     cli_dbgmsg("RAR: Failed to skip directory. RAR archive extraction has failed.\n");
387                     break;
388                 }
389             } else if (cli_checklimits("RAR", ctx, metadata.unpack_size, 0, 0)) {
390                 /* File size exceeds maxfilesize, must skip extraction.
391                 * Although we may be able to scan the metadata */
392                 nTooLargeFilesFound += 1;
393 
394                 cli_dbgmsg("RAR: Next file is too large (%" PRIu64 " bytes); it would exceed max scansize.  Skipping to next file.\n", metadata.unpack_size);
395 
396                 if (UNRAR_OK != cli_unrar_skip_file(hArchive)) {
397                     /* Failed to skip!  Break extraction loop. */
398                     cli_dbgmsg("RAR: Failed to skip file. RAR archive extraction has failed.\n");
399                     break;
400                 }
401             } else if (metadata.encrypted != 0) {
402                 /* Found an encrypted file, must skip. */
403                 cli_dbgmsg("RAR: Encrypted file, unable to extract file contents. Skipping file...\n");
404                 nEncryptedFilesFound += 1;
405 
406                 if (UNRAR_OK != cli_unrar_skip_file(hArchive)) {
407                     /* Failed to skip!  Break extraction loop. */
408                     cli_dbgmsg("RAR: Failed to skip file. RAR archive extraction has failed.\n");
409                     break;
410                 }
411             } else {
412                 /*
413                  * Extract the file...
414                  */
415                 if (NULL != metadata.filename) {
416                     (void)cli_basename(metadata.filename, strlen(metadata.filename), &filename_base);
417                 }
418 
419                 if (!(ctx->engine->keeptmp) ||
420                     (NULL == filename_base)) {
421                     extract_fullpath = cli_gentemp(ctx->sub_tmpdir);
422                 } else {
423                     extract_fullpath = cli_gentemp_with_prefix(ctx->sub_tmpdir, filename_base);
424                 }
425                 if (NULL == extract_fullpath) {
426                     cli_dbgmsg("RAR: Memory error allocating filename for extracted file.");
427                     status = CL_EMEM;
428                     break;
429                 }
430                 cli_dbgmsg("RAR: Extracting file: %s to %s\n", metadata.filename, extract_fullpath);
431 
432                 unrar_ret = cli_unrar_extract_file(hArchive, extract_fullpath, NULL);
433                 if (unrar_ret != UNRAR_OK) {
434                     /*
435                      * Some other error extracting the file
436                      */
437                     cli_dbgmsg("RAR: Error extracting file: %s\n", metadata.filename);
438 
439                     /* TODO:
440                      *   may need to manually skip the file depending on what, specifically, cli_unrar_extract_file() returned.
441                      */
442                 } else {
443                     /*
444                      * File should be extracted...
445                      * ... make sure we have read permissions to the file.
446                      */
447 #ifdef _WIN32
448                     if (0 != _access_s(extract_fullpath, R_OK)) {
449 #else
450                     if (0 != access(extract_fullpath, R_OK)) {
451 #endif
452                         cli_dbgmsg("RAR: Don't have read permissions, attempting to change file permissions to make it readable..\n");
453 #ifdef _WIN32
454                         if (0 != _chmod(extract_fullpath, _S_IREAD)) {
455 #else
456                         if (0 != chmod(extract_fullpath, S_IRUSR | S_IRGRP)) {
457 #endif
458                             cli_dbgmsg("RAR: Failed to change permission bits so the extracted file is readable..\n");
459                         }
460                     }
461 
462                     /*
463                      * ... scan the extracted file.
464                      */
465                     cli_dbgmsg("RAR: Extraction complete.  Scanning now...\n");
466                     status = cli_magic_scan_file(extract_fullpath, ctx, filename_base);
467                     if (status == CL_EOPEN) {
468                         cli_dbgmsg("RAR: File not found, Extraction failed!\n");
469                         status = CL_CLEAN;
470                     } else {
471                         /* Delete the tempfile if not --leave-temps */
472                         if (!ctx->engine->keeptmp)
473                             if (cli_unlink(extract_fullpath))
474                                 cli_dbgmsg("RAR: Failed to unlink the extracted file: %s\n", extract_fullpath);
475 
476                         if (status == CL_VIRUS) {
477                             cli_dbgmsg("RAR: infected with %s\n", cli_get_last_virus(ctx));
478                             status = CL_VIRUS;
479                             viruses_found++;
480                         }
481                     }
482                 }
483 
484                 /* Free up that the filepath */
485                 if (NULL != extract_fullpath) {
486                     free(extract_fullpath);
487                     extract_fullpath = NULL;
488                 }
489             }
490         }
491 
492         if (status == CL_VIRUS) {
493             if (SCAN_ALLMATCHES)
494                 status = CL_SUCCESS;
495             else
496                 break;
497         }
498 
499         if (ctx->engine->maxscansize && ctx->scansize >= ctx->engine->maxscansize) {
500             status = CL_CLEAN;
501             break;
502         }
503 
504         /*
505          * Free up any malloced metadata...
506          */
507         if (metadata.filename != NULL) {
508             free(metadata.filename);
509             metadata.filename = NULL;
510         }
511         if (NULL != filename_base) {
512             free(filename_base);
513             filename_base = NULL;
514         }
515 
516     } while (status == CL_CLEAN);
517 
518     if (status == CL_BREAK)
519         status = CL_CLEAN;
520 
521 done:
522     if (NULL != comment) {
523         free(comment);
524         comment = NULL;
525     }
526 
527     if (NULL != comment_fullpath) {
528         if (!ctx->engine->keeptmp) {
529             cli_rmdirs(comment_fullpath);
530         }
531         free(comment_fullpath);
532         comment_fullpath = NULL;
533     }
534 
535     if (NULL != hArchive) {
536         cli_unrar_close(hArchive);
537         hArchive = NULL;
538     }
539 
540     if (NULL != filename_base) {
541         free(filename_base);
542         filename_base = NULL;
543     }
544 
545     if (metadata.filename != NULL) {
546         free(metadata.filename);
547         metadata.filename = NULL;
548     }
549 
550     if (NULL != extract_fullpath) {
551         free(extract_fullpath);
552         extract_fullpath = NULL;
553     }
554 
555     if ((CL_VIRUS != status) && ((CL_EUNPACK == status) || (nEncryptedFilesFound > 0))) {
556         /* If user requests enabled the Heuristic for encrypted archives... */
557         if (SCAN_HEURISTIC_ENCRYPTED_ARCHIVE) {
558             if (CL_VIRUS == cli_append_virus(ctx, "Heuristics.Encrypted.RAR")) {
559                 status = CL_VIRUS;
560             }
561         }
562         if (status != CL_VIRUS) {
563             status = CL_CLEAN;
564         }
565     }
566 
567     cli_dbgmsg("RAR: Exit code: %d\n", status);
568 
569     if (SCAN_ALLMATCHES && viruses_found)
570         status = CL_VIRUS;
571 
572     return status;
573 }
574 
575 static cl_error_t cli_scanrar(cli_ctx *ctx)
576 {
577     cl_error_t status = CL_SUCCESS;
578 
579     const char *filepath = NULL;
580     int fd               = -1;
581 
582     char *tmpname = NULL;
583     int tmpfd     = -1;
584 
585 #ifdef _WIN32
586     if ((SCAN_UNPRIVILEGED) || (NULL == ctx->sub_filepath) || (0 != _access_s(ctx->sub_filepath, R_OK))) {
587 #else
588     if ((SCAN_UNPRIVILEGED) || (NULL == ctx->sub_filepath) || (0 != access(ctx->sub_filepath, R_OK))) {
589 #endif
590         /* If map is not file-backed have to dump to file for scanrar. */
591         status = fmap_dump_to_file(ctx->fmap, ctx->sub_filepath, ctx->sub_tmpdir, &tmpname, &tmpfd, 0, SIZE_MAX);
592         if (status != CL_SUCCESS) {
593             cli_dbgmsg("cli_magic_scan: failed to generate temporary file.\n");
594             goto done;
595         }
596         filepath = tmpname;
597         fd       = tmpfd;
598     } else {
599         /* Use the original file and file descriptor. */
600         filepath = ctx->sub_filepath;
601         fd       = fmap_fd(ctx->fmap);
602     }
603 
604     /* scan file */
605     status = cli_scanrar_file(filepath, fd, ctx);
606 
607     if ((NULL == tmpname) && (CL_EOPEN == status)) {
608         /*
609          * Failed to open the file using the original filename.
610          * Try writing the file descriptor to a temp file and try again.
611          */
612         status = fmap_dump_to_file(ctx->fmap, ctx->sub_filepath, ctx->sub_tmpdir, &tmpname, &tmpfd, 0, SIZE_MAX);
613         if (status != CL_SUCCESS) {
614             cli_dbgmsg("cli_magic_scan: failed to generate temporary file.\n");
615             goto done;
616         }
617         filepath = tmpname;
618         fd       = tmpfd;
619 
620         /* try to scan again */
621         status = cli_scanrar_file(filepath, fd, ctx);
622     }
623 
624 done:
625     if (tmpfd != -1) {
626         /* If dumped tempfile, need to cleanup */
627         close(tmpfd);
628         if (!ctx->engine->keeptmp) {
629             if (cli_unlink(tmpname)) {
630                 status = CL_EUNLINK;
631             }
632         }
633     }
634 
635     if (tmpname != NULL) {
636         free(tmpname);
637     }
638     return status;
639 }
640 
641 /**
642  * @brief  Scan the metadata using cli_matchmeta()
643  *
644  * @param metadata  egg metadata structure
645  * @param ctx       scanning context structure
646  * @param files     number of files
647  * @return cl_error_t  Returns CL_CLEAN if nothing found, CL_VIRUS if something found, CL_EUNPACK if encrypted.
648  */
649 static cl_error_t cli_egg_scanmetadata(cl_egg_metadata *metadata, cli_ctx *ctx, unsigned int files)
650 {
651     cl_error_t status = CL_CLEAN;
652 
653     cli_dbgmsg("EGG: %s, encrypted: %u, compressed: %u, normal: %u, ratio: %u\n",
654                metadata->filename, metadata->encrypted, (unsigned int)metadata->pack_size,
655                (unsigned int)metadata->unpack_size,
656                metadata->pack_size ? (unsigned int)(metadata->unpack_size / metadata->pack_size) : 0);
657 
658     if (CL_VIRUS == cli_matchmeta(ctx, metadata->filename, metadata->pack_size, metadata->unpack_size, metadata->encrypted, files, 0, NULL)) {
659         status = CL_VIRUS;
660     } else if (SCAN_HEURISTIC_ENCRYPTED_ARCHIVE && metadata->encrypted) {
661         cli_dbgmsg("EGG: Encrypted files found in archive.\n");
662         status = CL_EUNPACK;
663     }
664 
665     return status;
666 }
667 
668 static cl_error_t cli_scanegg(cli_ctx *ctx)
669 {
670     cl_error_t status  = CL_EPARSE;
671     cl_error_t egg_ret = CL_EPARSE;
672 
673     unsigned int file_count    = 0;
674     unsigned int viruses_found = 0;
675 
676     uint32_t nEncryptedFilesFound = 0;
677     uint32_t nTooLargeFilesFound  = 0;
678 
679     void *hArchive = NULL;
680 
681     char **comments    = NULL;
682     uint32_t nComments = 0;
683 
684     cl_egg_metadata metadata;
685     char *filename_base    = NULL;
686     char *extract_fullpath = NULL;
687     char *comment_fullpath = NULL;
688 
689     if (ctx == NULL) {
690         cli_dbgmsg("EGG: Invalid arguments!\n");
691         return CL_EARG;
692     }
693 
694     cli_dbgmsg("in scanegg()\n");
695 
696     /* Zero out the metadata struct before we read the header */
697     memset(&metadata, 0, sizeof(cl_egg_metadata));
698 
699     /*
700      * Open the archive.
701      */
702     if (CL_SUCCESS != (egg_ret = cli_egg_open(ctx->fmap, &hArchive, &comments, &nComments))) {
703         if (egg_ret == CL_EUNPACK) {
704             cli_dbgmsg("EGG: Encrypted main header\n");
705             status = CL_EUNPACK;
706             goto done;
707         }
708         if (egg_ret == CL_EMEM) {
709             status = CL_EMEM;
710             goto done;
711         } else {
712             status = CL_EFORMAT;
713             goto done;
714         }
715     }
716 
717     /* If the archive header had a comment, write it to the comment dir. */
718     if (comments != NULL) {
719         uint32_t i;
720         for (i = 0; i < nComments; i++) {
721             /*
722             * Drop the comment to a temp file, if requested
723             */
724             if (ctx->engine->keeptmp) {
725                 int comment_fd   = -1;
726                 size_t prefixLen = strlen("comments_") + 5;
727                 char *prefix     = (char *)malloc(prefixLen + 1);
728 
729                 snprintf(prefix, prefixLen, "comments_%u", i);
730                 prefix[prefixLen] = '\0';
731 
732                 if (!(comment_fullpath = cli_gentemp_with_prefix(ctx->sub_tmpdir, prefix))) {
733                     free(prefix);
734                     status = CL_EMEM;
735                     goto done;
736                 }
737                 free(prefix);
738 
739                 comment_fd = open(comment_fullpath, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0600);
740                 if (comment_fd < 0) {
741                     cli_dbgmsg("EGG: ERROR: Failed to open output file\n");
742                 } else {
743                     cli_dbgmsg("EGG: Writing the archive comment to temp file: %s\n", comment_fullpath);
744                     if (0 == write(comment_fd, comments[i], nComments)) {
745                         cli_dbgmsg("EGG: ERROR: Failed to write to output file\n");
746                     }
747                     close(comment_fd);
748                 }
749                 free(comment_fullpath);
750                 comment_fullpath = NULL;
751             }
752 
753             /*
754             * Scan the comment.
755             */
756             status = cli_magic_scan_buff(comments[i], strlen(comments[i]), ctx, NULL);
757 
758             if ((status == CL_VIRUS) && SCAN_ALLMATCHES) {
759                 status = CL_CLEAN;
760                 viruses_found++;
761             }
762             if ((status == CL_VIRUS) || (status == CL_BREAK)) {
763                 goto done;
764             }
765         }
766     }
767 
768     /*
769      * Read & scan each file header.
770      * Extract & scan each file.
771      *
772      * Skip files if they will exceed max filesize or max scansize.
773      * Count the number of encrypted file headers and encrypted files.
774      *  - Alert if there are encrypted files,
775      *      if the Heuristic for encrypted archives is enabled,
776      *      and if we have not detected a signature match.
777      */
778     do {
779         status = CL_CLEAN;
780 
781         /* Zero out the metadata struct before we read the header */
782         memset(&metadata, 0, sizeof(cl_egg_metadata));
783 
784         /*
785          * Get the header information for the next file in the archive.
786          */
787         egg_ret = cli_egg_peek_file_header(hArchive, &metadata);
788         if (egg_ret != CL_SUCCESS) {
789             if (egg_ret == CL_EUNPACK) {
790                 /* Found an encrypted file header, must skip. */
791                 cli_dbgmsg("EGG: Encrypted file header, unable to reading file metadata and file contents. Skipping file...\n");
792                 nEncryptedFilesFound += 1;
793 
794                 if (CL_SUCCESS != cli_egg_skip_file(hArchive)) {
795                     /* Failed to skip!  Break extraction loop. */
796                     cli_dbgmsg("EGG: Failed to skip file. EGG archive extraction has failed.\n");
797                     break;
798                 }
799             } else if (egg_ret == CL_BREAK) {
800                 /* No more files. Break extraction loop. */
801                 cli_dbgmsg("EGG: No more files in archive.\n");
802                 break;
803             } else {
804                 /* Memory error or some other error reading the header info. */
805                 cli_dbgmsg("EGG: Error (%u) reading file header!\n", egg_ret);
806                 break;
807             }
808         } else {
809             file_count += 1;
810 
811             /*
812             * Scan the metadata for the file in question since the content was clean, or we're running in all-match.
813             */
814             status = cli_egg_scanmetadata(&metadata, ctx, file_count);
815             if ((status == CL_VIRUS) && SCAN_ALLMATCHES) {
816                 status = CL_CLEAN;
817                 viruses_found++;
818             }
819             if ((status == CL_VIRUS) || (status == CL_BREAK)) {
820                 break;
821             }
822             /* Check if we've already exceeded the scan limit */
823             if (cli_checklimits("EGG", ctx, 0, 0, 0))
824                 break;
825 
826             if (metadata.is_dir) {
827                 /* Entry is a directory. Skip. */
828                 cli_dbgmsg("EGG: Found directory. Skipping to next file.\n");
829 
830                 if (CL_SUCCESS != cli_egg_skip_file(hArchive)) {
831                     /* Failed to skip!  Break extraction loop. */
832                     cli_dbgmsg("EGG: Failed to skip directory. EGG archive extraction has failed.\n");
833                     break;
834                 }
835             } else if (cli_checklimits("EGG", ctx, metadata.unpack_size, 0, 0)) {
836                 /* File size exceeds maxfilesize, must skip extraction.
837                 * Although we may be able to scan the metadata */
838                 nTooLargeFilesFound += 1;
839 
840                 cli_dbgmsg("EGG: Next file is too large (%" PRIu64 " bytes); it would exceed max scansize.  Skipping to next file.\n", metadata.unpack_size);
841 
842                 if (CL_SUCCESS != cli_egg_skip_file(hArchive)) {
843                     /* Failed to skip!  Break extraction loop. */
844                     cli_dbgmsg("EGG: Failed to skip file. EGG archive extraction has failed.\n");
845                     break;
846                 }
847             } else if (metadata.encrypted != 0) {
848                 /* Found an encrypted file, must skip. */
849                 cli_dbgmsg("EGG: Encrypted file, unable to extract file contents. Skipping file...\n");
850                 nEncryptedFilesFound += 1;
851 
852                 if (CL_SUCCESS != cli_egg_skip_file(hArchive)) {
853                     /* Failed to skip!  Break extraction loop. */
854                     cli_dbgmsg("EGG: Failed to skip file. EGG archive extraction has failed.\n");
855                     break;
856                 }
857             } else {
858                 /*
859                 * Extract the file...
860                 */
861                 char *extract_filename    = NULL;
862                 char *extract_buffer      = NULL;
863                 size_t extract_buffer_len = 0;
864 
865                 cli_dbgmsg("EGG: Extracting file: %s\n", metadata.filename);
866 
867                 egg_ret = cli_egg_extract_file(hArchive, (const char **)&extract_filename, (const char **)&extract_buffer, &extract_buffer_len);
868                 if (egg_ret != CL_SUCCESS) {
869                     /*
870                      * Some other error extracting the file
871                      */
872                     cli_dbgmsg("EGG: Error extracting file: %s\n", metadata.filename);
873                 } else if (!extract_buffer || 0 == extract_buffer_len) {
874                     /*
875                      * Empty file. Skip.
876                      */
877                     cli_dbgmsg("EGG: Skipping empty file: %s\n", metadata.filename);
878 
879                     if (NULL != extract_filename) {
880                         free(extract_filename);
881                         extract_filename = NULL;
882                     }
883                     if (NULL != extract_buffer) {
884                         free(extract_buffer);
885                         extract_buffer = NULL;
886                     }
887                 } else {
888                     /*
889                      * Drop to a temp file, if requested.
890                      */
891                     if (NULL != metadata.filename) {
892                         (void)cli_basename(metadata.filename, strlen(metadata.filename), &filename_base);
893                     }
894 
895                     if (ctx->engine->keeptmp) {
896                         int extracted_fd = -1;
897                         if (NULL == filename_base) {
898                             extract_fullpath = cli_gentemp(ctx->sub_tmpdir);
899                         } else {
900                             extract_fullpath = cli_gentemp_with_prefix(ctx->sub_tmpdir, filename_base);
901                         }
902                         if (NULL == extract_fullpath) {
903                             cli_dbgmsg("EGG: Memory error allocating filename for extracted file.");
904                             status = CL_EMEM;
905                             break;
906                         }
907 
908                         extracted_fd = open(extract_fullpath, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0600);
909                         if (extracted_fd < 0) {
910                             cli_dbgmsg("EGG: ERROR: Failed to open output file\n");
911                         } else {
912                             cli_dbgmsg("EGG: Writing the extracted file contents to temp file: %s\n", extract_fullpath);
913                             if (0 == write(extracted_fd, extract_buffer, extract_buffer_len)) {
914                                 cli_dbgmsg("EGG: ERROR: Failed to write to output file\n");
915                             } else {
916                                 close(extracted_fd);
917                                 extracted_fd = -1;
918                             }
919                         }
920                     }
921 
922                     /*
923                      * Scan the extracted file...
924                      */
925                     cli_dbgmsg("EGG: Extraction complete.  Scanning now...\n");
926                     status = cli_magic_scan_buff(extract_buffer, extract_buffer_len, ctx, filename_base);
927                     if (status == CL_VIRUS) {
928                         cli_dbgmsg("EGG: infected with %s\n", cli_get_last_virus(ctx));
929                         status = CL_VIRUS;
930                         viruses_found++;
931                     }
932 
933                     if (NULL != filename_base) {
934                         free(filename_base);
935                         filename_base = NULL;
936                     }
937                     if (NULL != extract_filename) {
938                         free(extract_filename);
939                         extract_filename = NULL;
940                     }
941                     if (NULL != extract_buffer) {
942                         free(extract_buffer);
943                         extract_buffer = NULL;
944                     }
945                 }
946 
947                 /* Free up that the filepath */
948                 if (NULL != extract_fullpath) {
949                     free(extract_fullpath);
950                     extract_fullpath = NULL;
951                 }
952             }
953         }
954 
955         if (status == CL_VIRUS) {
956             if (SCAN_ALLMATCHES)
957                 status = CL_SUCCESS;
958             else
959                 break;
960         }
961 
962         if (ctx->engine->maxscansize && ctx->scansize >= ctx->engine->maxscansize) {
963             status = CL_CLEAN;
964             break;
965         }
966 
967         /*
968          * TODO: Free up any malloced metadata...
969          */
970         if (metadata.filename != NULL) {
971             free(metadata.filename);
972             metadata.filename = NULL;
973         }
974 
975     } while (status == CL_CLEAN);
976 
977     if (status == CL_BREAK)
978         status = CL_CLEAN;
979 
980 done:
981 
982     if (NULL != comment_fullpath) {
983         free(comment_fullpath);
984         comment_fullpath = NULL;
985     }
986 
987     if (NULL != hArchive) {
988         cli_egg_close(hArchive);
989         hArchive = NULL;
990     }
991 
992     if (NULL != filename_base) {
993         free(filename_base);
994         filename_base = NULL;
995     }
996 
997     if (metadata.filename != NULL) {
998         free(metadata.filename);
999         metadata.filename = NULL;
1000     }
1001 
1002     if (NULL != extract_fullpath) {
1003         free(extract_fullpath);
1004         extract_fullpath = NULL;
1005     }
1006 
1007     if ((CL_VIRUS != status) && ((CL_EUNPACK == status) || (nEncryptedFilesFound > 0))) {
1008         /* If user requests enabled the Heuristic for encrypted archives... */
1009         if (SCAN_HEURISTIC_ENCRYPTED_ARCHIVE) {
1010             if (CL_VIRUS == cli_append_virus(ctx, "Heuristics.Encrypted.EGG")) {
1011                 status = CL_VIRUS;
1012             }
1013         }
1014         if (status != CL_VIRUS) {
1015             status = CL_CLEAN;
1016         }
1017     }
1018 
1019     cli_dbgmsg("EGG: Exit code: %d\n", status);
1020 
1021     if (SCAN_ALLMATCHES && viruses_found)
1022         status = CL_VIRUS;
1023 
1024     return status;
1025 }
1026 
1027 static cl_error_t cli_scanarj(cli_ctx *ctx)
1028 {
1029     cl_error_t ret = CL_CLEAN;
1030     cl_error_t status;
1031     int file = 0;
1032     arj_metadata_t metadata;
1033     char *dir;
1034     int virus_found = 0;
1035 
1036     cli_dbgmsg("in cli_scanarj()\n");
1037 
1038     memset(&metadata, 0, sizeof(arj_metadata_t));
1039 
1040     /* generate the temporary directory */
1041     if (!(dir = cli_gentemp_with_prefix(ctx->sub_tmpdir, "arj-tmp")))
1042         return CL_EMEM;
1043 
1044     if (mkdir(dir, 0700)) {
1045         cli_dbgmsg("ARJ: Can't create temporary directory %s\n", dir);
1046         free(dir);
1047         return CL_ETMPDIR;
1048     }
1049 
1050     ret = cli_unarj_open(ctx->fmap, dir, &metadata);
1051     if (ret != CL_SUCCESS) {
1052         if (!ctx->engine->keeptmp)
1053             cli_rmdirs(dir);
1054         free(dir);
1055         cli_dbgmsg("ARJ: Error: %s\n", cl_strerror(ret));
1056         return ret;
1057     }
1058 
1059     do {
1060 
1061         metadata.filename = NULL;
1062         ret               = cli_unarj_prepare_file(dir, &metadata);
1063         if (ret != CL_SUCCESS) {
1064             cli_dbgmsg("ARJ: cli_unarj_prepare_file Error: %s\n", cl_strerror(ret));
1065             break;
1066         }
1067         file++;
1068         if (cli_matchmeta(ctx, metadata.filename, metadata.comp_size, metadata.orig_size, metadata.encrypted, file, 0, NULL) == CL_VIRUS) {
1069             if (!SCAN_ALLMATCHES) {
1070                 cli_rmdirs(dir);
1071                 free(dir);
1072                 return CL_VIRUS;
1073             }
1074             virus_found = 1;
1075             ret         = CL_SUCCESS;
1076         }
1077 
1078         if ((ret = cli_checklimits("ARJ", ctx, metadata.orig_size, metadata.comp_size, 0)) != CL_CLEAN) {
1079             ret = CL_SUCCESS;
1080             if (metadata.filename)
1081                 free(metadata.filename);
1082             continue;
1083         }
1084         ret = cli_unarj_extract_file(dir, &metadata);
1085         if (ret != CL_SUCCESS) {
1086             cli_dbgmsg("ARJ: cli_unarj_extract_file Error: %s\n", cl_strerror(ret));
1087         }
1088         if (metadata.ofd >= 0) {
1089             if (lseek(metadata.ofd, 0, SEEK_SET) == -1) {
1090                 cli_dbgmsg("ARJ: call to lseek() failed\n");
1091             }
1092             status = cli_magic_scan_desc(metadata.ofd, NULL, ctx, metadata.filename);
1093             close(metadata.ofd);
1094             if (status == CL_VIRUS) {
1095                 cli_dbgmsg("ARJ: infected with %s\n", cli_get_last_virus(ctx));
1096                 if (!SCAN_ALLMATCHES) {
1097                     ret = CL_VIRUS;
1098                     if (metadata.filename) {
1099                         free(metadata.filename);
1100                         metadata.filename = NULL;
1101                     }
1102                     break;
1103                 }
1104                 virus_found = 1;
1105                 ret         = CL_SUCCESS;
1106             }
1107         }
1108         if (metadata.filename) {
1109             free(metadata.filename);
1110             metadata.filename = NULL;
1111         }
1112 
1113     } while (ret == CL_SUCCESS);
1114 
1115     if (!ctx->engine->keeptmp)
1116         cli_rmdirs(dir);
1117 
1118     free(dir);
1119     if (metadata.filename) {
1120         free(metadata.filename);
1121     }
1122 
1123     if (virus_found != 0)
1124         ret = CL_VIRUS;
1125     cli_dbgmsg("ARJ: Exit code: %d\n", ret);
1126     if (ret == CL_BREAK)
1127         ret = CL_CLEAN;
1128 
1129     return ret;
1130 }
1131 
1132 static cl_error_t cli_scangzip_with_zib_from_the_80s(cli_ctx *ctx, unsigned char *buff)
1133 {
1134     int fd;
1135     cl_error_t ret;
1136     size_t outsize = 0;
1137     int bytes;
1138     fmap_t *map = ctx->fmap;
1139     char *tmpname;
1140     gzFile gz;
1141 
1142     ret = fmap_fd(map);
1143     if (ret < 0)
1144         return CL_EDUP;
1145     fd = dup(ret);
1146     if (fd < 0)
1147         return CL_EDUP;
1148 
1149     if (!(gz = gzdopen(fd, "rb"))) {
1150         close(fd);
1151         return CL_EOPEN;
1152     }
1153 
1154     if ((ret = cli_gentempfd(ctx->sub_tmpdir, &tmpname, &fd)) != CL_SUCCESS) {
1155         cli_dbgmsg("GZip: Can't generate temporary file.\n");
1156         gzclose(gz);
1157         close(fd);
1158         return ret;
1159     }
1160 
1161     while ((bytes = gzread(gz, buff, FILEBUFF)) > 0) {
1162         outsize += bytes;
1163         if (cli_checklimits("GZip", ctx, outsize, 0, 0) != CL_CLEAN)
1164             break;
1165         if (cli_writen(fd, buff, (size_t)bytes) != (size_t)bytes) {
1166             close(fd);
1167             gzclose(gz);
1168             if (cli_unlink(tmpname)) {
1169                 free(tmpname);
1170                 return CL_EUNLINK;
1171             }
1172             free(tmpname);
1173             return CL_EWRITE;
1174         }
1175     }
1176 
1177     gzclose(gz);
1178 
1179     if ((ret = cli_magic_scan_desc(fd, tmpname, ctx, NULL)) == CL_VIRUS) {
1180         cli_dbgmsg("GZip: Infected with %s\n", cli_get_last_virus(ctx));
1181         close(fd);
1182         if (!ctx->engine->keeptmp) {
1183             if (cli_unlink(tmpname)) {
1184                 free(tmpname);
1185                 return CL_EUNLINK;
1186             }
1187         }
1188         free(tmpname);
1189         return CL_VIRUS;
1190     }
1191     close(fd);
1192     if (!ctx->engine->keeptmp)
1193         if (cli_unlink(tmpname))
1194             ret = CL_EUNLINK;
1195     free(tmpname);
1196     return ret;
1197 }
1198 
1199 static cl_error_t cli_scangzip(cli_ctx *ctx)
1200 {
1201     int fd;
1202     cl_error_t ret = CL_CLEAN;
1203     unsigned char buff[FILEBUFF];
1204     char *tmpname;
1205     z_stream z;
1206     size_t at = 0, outsize = 0;
1207     fmap_t *map = ctx->fmap;
1208 
1209     cli_dbgmsg("in cli_scangzip()\n");
1210 
1211     memset(&z, 0, sizeof(z));
1212     if ((ret = inflateInit2(&z, MAX_WBITS + 16)) != Z_OK) {
1213         cli_dbgmsg("GZip: InflateInit failed: %d\n", ret);
1214         return cli_scangzip_with_zib_from_the_80s(ctx, buff);
1215     }
1216 
1217     if ((ret = cli_gentempfd(ctx->sub_tmpdir, &tmpname, &fd)) != CL_SUCCESS) {
1218         cli_dbgmsg("GZip: Can't generate temporary file.\n");
1219         inflateEnd(&z);
1220         return ret;
1221     }
1222 
1223     while (at < map->len) {
1224         unsigned int bytes = MIN(map->len - at, map->pgsz);
1225         if (!(z.next_in = (void *)fmap_need_off_once(map, at, bytes))) {
1226             cli_dbgmsg("GZip: Can't read %u bytes @ %lu.\n", bytes, (long unsigned)at);
1227             inflateEnd(&z);
1228             close(fd);
1229             if (cli_unlink(tmpname)) {
1230                 free(tmpname);
1231                 return CL_EUNLINK;
1232             }
1233             free(tmpname);
1234             return CL_EREAD;
1235         }
1236         at += bytes;
1237         z.avail_in = bytes;
1238         do {
1239             int inf;
1240             z.avail_out = sizeof(buff);
1241             z.next_out  = buff;
1242             inf         = inflate(&z, Z_NO_FLUSH);
1243             if (inf != Z_OK && inf != Z_STREAM_END && inf != Z_BUF_ERROR) {
1244                 if (sizeof(buff) == z.avail_out) {
1245                     cli_dbgmsg("GZip: Bad stream, nothing in output buffer.\n");
1246                     at = map->len;
1247                     break;
1248                 } else {
1249                     cli_dbgmsg("GZip: Bad stream, data in output buffer.\n");
1250                     /* no break yet, flush extracted bytes to file */
1251                 }
1252             }
1253             if (cli_writen(fd, buff, sizeof(buff) - z.avail_out) == (size_t)-1) {
1254                 inflateEnd(&z);
1255                 close(fd);
1256                 if (cli_unlink(tmpname)) {
1257                     free(tmpname);
1258                     return CL_EUNLINK;
1259                 }
1260                 free(tmpname);
1261                 return CL_EWRITE;
1262             }
1263             outsize += sizeof(buff) - z.avail_out;
1264             if (cli_checklimits("GZip", ctx, outsize, 0, 0) != CL_CLEAN) {
1265                 at = map->len;
1266                 break;
1267             }
1268             if (inf == Z_STREAM_END) {
1269                 at -= z.avail_in;
1270                 inflateReset(&z);
1271                 break;
1272             } else if (inf != Z_OK && inf != Z_BUF_ERROR) {
1273                 at = map->len;
1274                 break;
1275             }
1276         } while (z.avail_out == 0);
1277     }
1278 
1279     inflateEnd(&z);
1280 
1281     if ((ret = cli_magic_scan_desc(fd, tmpname, ctx, NULL)) == CL_VIRUS) {
1282         cli_dbgmsg("GZip: Infected with %s\n", cli_get_last_virus(ctx));
1283         close(fd);
1284         if (!ctx->engine->keeptmp) {
1285             if (cli_unlink(tmpname)) {
1286                 free(tmpname);
1287                 return CL_EUNLINK;
1288             }
1289         }
1290         free(tmpname);
1291         return CL_VIRUS;
1292     }
1293     close(fd);
1294     if (!ctx->engine->keeptmp)
1295         if (cli_unlink(tmpname))
1296             ret = CL_EUNLINK;
1297     free(tmpname);
1298     return ret;
1299 }
1300 
1301 #ifndef HAVE_BZLIB_H
1302 static cl_error_t cli_scanbzip(cli_ctx *ctx)
1303 {
1304     cli_warnmsg("cli_scanbzip: bzip2 support not compiled in\n");
1305     return CL_CLEAN;
1306 }
1307 
1308 #else
1309 
1310 #ifdef NOBZ2PREFIX
1311 #define BZ2_bzDecompressInit bzDecompressInit
1312 #define BZ2_bzDecompress bzDecompress
1313 #define BZ2_bzDecompressEnd bzDecompressEnd
1314 #endif
1315 
1316 static cl_error_t cli_scanbzip(cli_ctx *ctx)
1317 {
1318     cl_error_t ret = CL_CLEAN;
1319     int fd, rc;
1320     uint64_t size = 0;
1321     char *tmpname;
1322     bz_stream strm;
1323     size_t off = 0;
1324     size_t avail;
1325     char buf[FILEBUFF];
1326 
1327     memset(&strm, 0, sizeof(strm));
1328     strm.next_out = buf;
1329     strm.avail_out = sizeof(buf);
1330     rc = BZ2_bzDecompressInit(&strm, 0, 0);
1331     if (BZ_OK != rc) {
1332         cli_dbgmsg("Bzip: DecompressInit failed: %d\n", rc);
1333         return CL_EOPEN;
1334     }
1335 
1336     if ((ret = cli_gentempfd(ctx->sub_tmpdir, &tmpname, &fd))) {
1337         cli_dbgmsg("Bzip: Can't generate temporary file.\n");
1338         BZ2_bzDecompressEnd(&strm);
1339         return ret;
1340     }
1341 
1342     do {
1343         if (!strm.avail_in) {
1344             strm.next_in = (void *)fmap_need_off_once_len(ctx->fmap, off, FILEBUFF, &avail);
1345             strm.avail_in = avail;
1346             off += avail;
1347             if (!strm.avail_in) {
1348                 cli_dbgmsg("Bzip: premature end of compressed stream\n");
1349                 break;
1350             }
1351         }
1352 
1353         rc = BZ2_bzDecompress(&strm);
1354         if (BZ_OK != rc && BZ_STREAM_END != rc) {
1355             cli_dbgmsg("Bzip: decompress error: %d\n", rc);
1356             break;
1357         }
1358 
1359         if (!strm.avail_out || BZ_STREAM_END == rc) {
1360 
1361             size += sizeof(buf) - strm.avail_out;
1362 
1363             if (cli_writen(fd, buf, sizeof(buf) - strm.avail_out) != sizeof(buf) - strm.avail_out) {
1364                 cli_dbgmsg("Bzip: Can't write to file.\n");
1365                 BZ2_bzDecompressEnd(&strm);
1366                 close(fd);
1367                 if (!ctx->engine->keeptmp) {
1368                     if (cli_unlink(tmpname)) {
1369                         free(tmpname);
1370                         return CL_EUNLINK;
1371                     }
1372                 }
1373                 free(tmpname);
1374                 return CL_EWRITE;
1375             }
1376 
1377             if (cli_checklimits("Bzip", ctx, size, 0, 0) != CL_CLEAN)
1378                 break;
1379 
1380             strm.next_out = buf;
1381             strm.avail_out = sizeof(buf);
1382         }
1383     } while (BZ_STREAM_END != rc);
1384 
1385     BZ2_bzDecompressEnd(&strm);
1386 
1387     if ((ret = cli_magic_scan_desc(fd, tmpname, ctx, NULL)) == CL_VIRUS) {
1388         cli_dbgmsg("Bzip: Infected with %s\n", cli_get_last_virus(ctx));
1389         close(fd);
1390         if (!ctx->engine->keeptmp) {
1391             if (cli_unlink(tmpname)) {
1392                 ret = CL_EUNLINK;
1393                 free(tmpname);
1394                 return ret;
1395             }
1396         }
1397         free(tmpname);
1398         return CL_VIRUS;
1399     }
1400     close(fd);
1401     if (!ctx->engine->keeptmp)
1402         if (cli_unlink(tmpname))
1403             ret = CL_EUNLINK;
1404     free(tmpname);
1405 
1406     return ret;
1407 }
1408 #endif
1409 
1410 static cl_error_t cli_scanxz(cli_ctx *ctx)
1411 {
1412     cl_error_t ret = CL_CLEAN;
1413     int fd, rc;
1414     unsigned long int size = 0;
1415     char *tmpname;
1416     struct CLI_XZ strm;
1417     size_t off = 0;
1418     size_t avail;
1419     unsigned char *buf;
1420 
1421     buf = cli_malloc(CLI_XZ_OBUF_SIZE);
1422     if (buf == NULL) {
1423         cli_errmsg("cli_scanxz: nomemory for decompress buffer.\n");
1424         return CL_EMEM;
1425     }
1426     memset(&strm, 0x00, sizeof(struct CLI_XZ));
1427     strm.next_out  = buf;
1428     strm.avail_out = CLI_XZ_OBUF_SIZE;
1429     rc             = cli_XzInit(&strm);
1430     if (rc != XZ_RESULT_OK) {
1431         cli_errmsg("cli_scanxz: DecompressInit failed: %i\n", rc);
1432         free(buf);
1433         return CL_EOPEN;
1434     }
1435 
1436     if ((ret = cli_gentempfd(ctx->sub_tmpdir, &tmpname, &fd))) {
1437         cli_errmsg("cli_scanxz: Can't generate temporary file.\n");
1438         cli_XzShutdown(&strm);
1439         free(buf);
1440         return ret;
1441     }
1442     cli_dbgmsg("cli_scanxz: decompressing to file %s\n", tmpname);
1443 
1444     do {
1445         /* set up input buffer */
1446         if (!strm.avail_in) {
1447             strm.next_in  = (void *)fmap_need_off_once_len(ctx->fmap, off, CLI_XZ_IBUF_SIZE, &avail);
1448             strm.avail_in = avail;
1449             off += avail;
1450             if (!strm.avail_in) {
1451                 cli_errmsg("cli_scanxz: premature end of compressed stream\n");
1452                 ret = CL_EFORMAT;
1453                 goto xz_exit;
1454             }
1455         }
1456 
1457         /* xz decompress a chunk */
1458         rc = cli_XzDecode(&strm);
1459         if (XZ_RESULT_OK != rc && XZ_STREAM_END != rc) {
1460             if (rc == XZ_DIC_HEURISTIC) {
1461                 ret = cli_append_virus(ctx, "Heuristics.XZ.DicSizeLimit");
1462                 goto xz_exit;
1463             }
1464             cli_errmsg("cli_scanxz: decompress error: %d\n", rc);
1465             ret = CL_EFORMAT;
1466             goto xz_exit;
1467         }
1468         //cli_dbgmsg("cli_scanxz: xz decompressed %li of %li available bytes\n",
1469         //           avail - strm.avail_in, avail);
1470 
1471         /* write decompress buffer */
1472         if (!strm.avail_out || rc == XZ_STREAM_END) {
1473             size_t towrite = CLI_XZ_OBUF_SIZE - strm.avail_out;
1474             size += towrite;
1475 
1476             //cli_dbgmsg("Writing %li bytes to XZ decompress temp file(%li byte total)\n",
1477             //           towrite, size);
1478 
1479             if (cli_writen(fd, buf, towrite) != towrite) {
1480                 cli_errmsg("cli_scanxz: Can't write to file.\n");
1481                 ret = CL_EWRITE;
1482                 goto xz_exit;
1483             }
1484             if (cli_checklimits("cli_scanxz", ctx, size, 0, 0) != CL_CLEAN) {
1485                 cli_warnmsg("cli_scanxz: decompress file size exceeds limits - "
1486                             "only scanning %li bytes\n",
1487                             size);
1488                 break;
1489             }
1490             strm.next_out  = buf;
1491             strm.avail_out = CLI_XZ_OBUF_SIZE;
1492         }
1493     } while (XZ_STREAM_END != rc);
1494 
1495     /* scan decompressed file */
1496     if ((ret = cli_magic_scan_desc(fd, tmpname, ctx, NULL)) == CL_VIRUS) {
1497         cli_dbgmsg("cli_scanxz: Infected with %s\n", cli_get_last_virus(ctx));
1498     }
1499 
1500 xz_exit:
1501     cli_XzShutdown(&strm);
1502     close(fd);
1503     if (!ctx->engine->keeptmp)
1504         if (cli_unlink(tmpname) && ret == CL_CLEAN)
1505             ret = CL_EUNLINK;
1506     free(tmpname);
1507     free(buf);
1508     return ret;
1509 }
1510 
1511 static cl_error_t cli_scanszdd(cli_ctx *ctx)
1512 {
1513     int ofd;
1514     cl_error_t ret;
1515     char *tmpname;
1516 
1517     cli_dbgmsg("in cli_scanszdd()\n");
1518 
1519     if ((ret = cli_gentempfd(ctx->sub_tmpdir, &tmpname, &ofd))) {
1520         cli_dbgmsg("MSEXPAND: Can't generate temporary file/descriptor\n");
1521         return ret;
1522     }
1523 
1524     ret = cli_msexpand(ctx, ofd);
1525 
1526     if (ret != CL_SUCCESS) { /* CL_VIRUS or some error */
1527         close(ofd);
1528         if (!ctx->engine->keeptmp)
1529             if (cli_unlink(tmpname))
1530                 ret = CL_EUNLINK;
1531         free(tmpname);
1532         return ret;
1533     }
1534 
1535     cli_dbgmsg("MSEXPAND: Decompressed into %s\n", tmpname);
1536     ret = cli_magic_scan_desc(ofd, tmpname, ctx, NULL);
1537     close(ofd);
1538     if (!ctx->engine->keeptmp)
1539         if (cli_unlink(tmpname))
1540             ret = CL_EUNLINK;
1541     free(tmpname);
1542 
1543     return ret;
1544 }
1545 
1546 static cl_error_t vba_scandata(const unsigned char *data, size_t len, cli_ctx *ctx)
1547 {
1548     cl_error_t ret            = CL_SUCCESS;
1549     struct cli_matcher *groot = ctx->engine->root[0];
1550     struct cli_matcher *troot = ctx->engine->root[2];
1551     struct cli_ac_data gmdata, tmdata;
1552     bool gmdata_initialized = false;
1553     bool tmdata_initialized = false;
1554     struct cli_ac_data *mdata[2];
1555     unsigned int viruses_found = 0;
1556 
1557     cl_fmap_t *new_map = NULL;
1558 
1559     if ((ret = cli_ac_initdata(&tmdata, troot->ac_partsigs, troot->ac_lsigs, troot->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN))) {
1560         goto done;
1561     }
1562     tmdata_initialized = true;
1563 
1564     if ((ret = cli_ac_initdata(&gmdata, groot->ac_partsigs, groot->ac_lsigs, groot->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN))) {
1565         goto done;
1566     }
1567     gmdata_initialized = true;
1568 
1569     mdata[0] = &tmdata;
1570     mdata[1] = &gmdata;
1571 
1572     ret = cli_scan_buff(data, len, 0, ctx, CL_TYPE_MSOLE2, mdata);
1573     if (ret == CL_VIRUS) {
1574         viruses_found++;
1575     }
1576 
1577     if (ret == CL_CLEAN || (ret == CL_VIRUS && SCAN_ALLMATCHES)) {
1578         /*
1579          * Evaluate logical & yara rules given the new matches to see if anything alerts.
1580          */
1581         new_map = fmap_open_memory(data, len, NULL);
1582         if (new_map == NULL) {
1583             cli_dbgmsg("Failed to create fmap for evaluating logical/yara rules after call to cli_scan_buff()\n");
1584             ret = CL_EMEM;
1585             goto done;
1586         }
1587 
1588         ctx->next_layer_is_normalized = true; // This flag ingested by cli_recursion_stack_push().
1589 
1590         ret = cli_recursion_stack_push(ctx, new_map, CL_TYPE_MSOLE2, true); /* Perform exp_eval with child fmap */
1591         if (CL_SUCCESS != ret) {
1592             cli_dbgmsg("Failed to scan fmap.\n");
1593             goto done;
1594         }
1595 
1596         ret = cli_exp_eval(ctx, troot, &tmdata, NULL, NULL);
1597         if (ret == CL_VIRUS) {
1598             viruses_found++;
1599         }
1600 
1601         if (ret == CL_CLEAN || (ret == CL_VIRUS && SCAN_ALLMATCHES)) {
1602             ret = cli_exp_eval(ctx, groot, &gmdata, NULL, NULL);
1603         }
1604 
1605         (void)cli_recursion_stack_pop(ctx); /* Restore the parent fmap */
1606     }
1607 
1608 done:
1609     if (NULL != new_map) {
1610         funmap(new_map);
1611     }
1612 
1613     if (tmdata_initialized) {
1614         cli_ac_freedata(&tmdata);
1615     }
1616 
1617     if (gmdata_initialized) {
1618         cli_ac_freedata(&gmdata);
1619     }
1620 
1621     if (ret == CL_CLEAN && viruses_found) {
1622         ret = CL_VIRUS;
1623     }
1624     return ret;
1625 }
1626 
1627 #define min(x, y) ((x) < (y) ? (x) : (y))
1628 
1629 /**
1630  * Find a file in a directory tree.
1631  * \param filename Name of the file to find
1632  * \param dir Directory path where to find the file
1633  * \param A pointer to the string to store the result into
1634  * \param Size of the string to store the result in
1635  */
1636 cl_error_t find_file(const char *filename, const char *dir, char *result, size_t result_size)
1637 {
1638     DIR *dd;
1639     struct dirent *dent;
1640     char fullname[PATH_MAX];
1641     cl_error_t ret;
1642     size_t len;
1643     STATBUF statbuf;
1644 
1645     if (!result) {
1646         return CL_ENULLARG;
1647     }
1648 
1649     if ((dd = opendir(dir)) != NULL) {
1650         while ((dent = readdir(dd))) {
1651             if (dent->d_ino) {
1652                 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0) {
1653 
1654                     snprintf(fullname, sizeof(fullname), "%s" PATHSEP "%s", dir, dent->d_name);
1655                     fullname[sizeof(fullname) - 1] = '\0';
1656 
1657                     /* stat the file */
1658                     if (LSTAT(fullname, &statbuf) != -1) {
1659                         if (S_ISDIR(statbuf.st_mode) && !S_ISLNK(statbuf.st_mode)) {
1660                             ret = find_file(filename, fullname, result, result_size);
1661                             if (ret == CL_SUCCESS) {
1662                                 closedir(dd);
1663                                 return ret;
1664                             }
1665                         } else if (S_ISREG(statbuf.st_mode)) {
1666                             if (strcmp(dent->d_name, filename) == 0) {
1667                                 len = min(strlen(dir) + 1, result_size);
1668                                 memcpy(result, dir, len);
1669                                 result[len - 1] = '\0';
1670                                 closedir(dd);
1671                                 return CL_SUCCESS;
1672                             }
1673                         }
1674                     }
1675                 }
1676             }
1677         }
1678         closedir(dd);
1679     }
1680 
1681     return CL_EOPEN;
1682 }
1683 
1684 /**
1685  * Scan an OLE directory for a VBA project.
1686  * Contrary to cli_vba_scandir, this function uses the dir file to locate VBA modules.
1687  */
1688 static cl_error_t cli_vba_scandir_new(const char *dirname, cli_ctx *ctx, struct uniq *U, int *has_macros)
1689 {
1690     cl_error_t ret   = CL_SUCCESS;
1691     uint32_t hashcnt = 0;
1692     char *hash       = NULL;
1693     char path[PATH_MAX];
1694     char filename[PATH_MAX];
1695     int tempfd        = -1;
1696     int viruses_found = 0;
1697 
1698     if (CL_SUCCESS != (ret = uniq_get(U, "dir", 3, &hash, &hashcnt))) {
1699         cli_dbgmsg("cli_vba_scandir_new: uniq_get('dir') failed with ret code (%d)!\n", ret);
1700         return ret;
1701     }
1702 
1703     while (hashcnt) {
1704         //Find the directory containing the extracted dir file. This is complicated
1705         //because ClamAV doesn't use the file names from the OLE file, but temporary names,
1706         //and we have neither the complete path of the dir file in the OLE container,
1707         //nor the mapping of the temporary directory names to their OLE names.
1708         snprintf(filename, sizeof(filename), "%s_%u", hash, hashcnt);
1709         filename[sizeof(filename) - 1] = '\0';
1710 
1711         if (CL_SUCCESS == find_file(filename, dirname, path, sizeof(path))) {
1712             cli_dbgmsg("cli_vba_scandir_new: Found dir file: %s\n", path);
1713             if ((ret = cli_vba_readdir_new(ctx, path, U, hash, hashcnt, &tempfd, has_macros)) != CL_SUCCESS) {
1714                 //FIXME: Since we only know the stream name of the OLE2 stream, but not its path inside the
1715                 //       OLE2 archive, we don't know if we have the right file. The only thing we can do is
1716                 //       iterate all of them until one succeeds.
1717                 cli_dbgmsg("cli_vba_scandir_new: Failed to read dir from %s, trying others (error: %s (%d))\n", path, cl_strerror(ret), (int)ret);
1718                 ret = CL_SUCCESS;
1719                 hashcnt--;
1720                 continue;
1721             }
1722 
1723 #if HAVE_JSON
1724             if (*has_macros && SCAN_COLLECT_METADATA && (ctx->wrkproperty != NULL)) {
1725                 cli_jsonbool(ctx->wrkproperty, "HasMacros", 1);
1726                 json_object *macro_languages = cli_jsonarray(ctx->wrkproperty, "MacroLanguages");
1727                 if (macro_languages) {
1728                     cli_jsonstr(macro_languages, NULL, "VBA");
1729                 } else {
1730                     cli_dbgmsg("[cli_vba_scandir_new] Failed to add \"VBA\" entry to MacroLanguages JSON array\n");
1731                 }
1732             }
1733 #endif
1734             if (SCAN_HEURISTIC_MACROS && *has_macros) {
1735                 ret = cli_append_virus(ctx, "Heuristics.OLE2.ContainsMacros.VBA");
1736                 if (ret == CL_VIRUS) {
1737                     viruses_found++;
1738                     if (!SCAN_ALLMATCHES) {
1739                         goto done;
1740                     }
1741                 }
1742             }
1743 
1744             /*
1745              * Now rewind the extracted vba-project output FD and scan it!
1746              */
1747             if (lseek(tempfd, 0, SEEK_SET) != 0) {
1748                 cli_dbgmsg("cli_vba_scandir_new: Failed to seek to beginning of temporary VBA project file\n");
1749                 ret = CL_ESEEK;
1750                 goto done;
1751             }
1752 
1753             ret = cli_scan_desc(tempfd, ctx, CL_TYPE_SCRIPT, 0, NULL, AC_SCAN_VIR, NULL, NULL);
1754 
1755             close(tempfd);
1756             tempfd = -1;
1757 
1758             if (CL_VIRUS == ret) {
1759                 viruses_found++;
1760                 if (!SCAN_ALLMATCHES) {
1761                     goto done;
1762                 }
1763             }
1764         }
1765 
1766         hashcnt--;
1767     }
1768 
1769 done:
1770     if (tempfd != -1) {
1771         close(tempfd);
1772         tempfd = -1;
1773     }
1774 
1775     if (viruses_found > 0)
1776         ret = CL_VIRUS;
1777     return ret;
1778 }
1779 
1780 static cl_error_t cli_vba_scandir(const char *dirname, cli_ctx *ctx, struct uniq *U, int *has_macros)
1781 {
1782     cl_error_t status = CL_CLEAN;
1783     cl_error_t ret;
1784     int i, j;
1785     size_t data_len;
1786     vba_project_t *vba_project;
1787     DIR *dd = NULL;
1788     struct dirent *dent;
1789     STATBUF statbuf;
1790     char *fullname, vbaname[1024];
1791     unsigned char *data;
1792     char *hash;
1793     uint32_t hashcnt           = 0;
1794     unsigned int viruses_found = 0;
1795 
1796     cli_dbgmsg("VBADir: %s\n", dirname);
1797     if (CL_SUCCESS != (ret = uniq_get(U, "_vba_project", 12, NULL, &hashcnt))) {
1798         cli_dbgmsg("VBADir: uniq_get('_vba_project') failed with ret code (%d)!\n", ret);
1799         status = ret;
1800         goto done;
1801     }
1802     while (hashcnt) {
1803         if (!(vba_project = (vba_project_t *)cli_vba_readdir(dirname, U, hashcnt))) {
1804             hashcnt--;
1805             continue;
1806         }
1807 
1808         for (i = 0; i < vba_project->count; i++) {
1809             for (j = 1; (unsigned int)j <= vba_project->colls[i]; j++) {
1810                 int fd = -1;
1811 
1812                 snprintf(vbaname, 1024, "%s" PATHSEP "%s_%u", vba_project->dir, vba_project->name[i], j);
1813                 vbaname[sizeof(vbaname) - 1] = '\0';
1814 
1815                 fd = open(vbaname, O_RDONLY | O_BINARY);
1816                 if (fd == -1) {
1817                     continue;
1818                 }
1819                 cli_dbgmsg("VBADir: Decompress VBA project '%s_%u'\n", vba_project->name[i], j);
1820                 data = (unsigned char *)cli_vba_inflate(fd, vba_project->offset[i], &data_len);
1821                 close(fd);
1822                 *has_macros = *has_macros + 1;
1823                 if (!data) {
1824                 } else {
1825                     /* cli_dbgmsg("Project content:\n%s", data); */
1826                     if (ctx->scanned)
1827                         *ctx->scanned += data_len / CL_COUNT_PRECISION;
1828                     if (ctx->engine->keeptmp) {
1829                         char *tempfile;
1830                         int of;
1831 
1832                         if ((ret = cli_gentempfd(ctx->sub_tmpdir, &tempfile, &of)) != CL_SUCCESS) {
1833                             cli_warnmsg("VBADir: WARNING: VBA project '%s_%u' cannot be dumped to file\n", vba_project->name[i], j);
1834                             status = ret;
1835                             goto done;
1836                         }
1837                         if (cli_writen(of, data, data_len) != data_len) {
1838                             cli_warnmsg("VBADir: WARNING: VBA project '%s_%u' failed to write to file\n", vba_project->name[i], j);
1839                             close(of);
1840                             free(tempfile);
1841                             status = CL_EWRITE;
1842                             goto done;
1843                         }
1844 
1845                         cli_dbgmsg("VBADir: VBA project '%s_%u' dumped to %s\n", vba_project->name[i], j, tempfile);
1846                         free(tempfile);
1847                     }
1848 
1849                     if (vba_scandata(data, data_len, ctx) == CL_VIRUS) {
1850                         viruses_found++;
1851                         if (!SCAN_ALLMATCHES) {
1852                             free(data);
1853                             status = CL_VIRUS;
1854                             break;
1855                         }
1856                     }
1857                     free(data);
1858                 }
1859             }
1860 
1861             if (status == CL_VIRUS)
1862                 break;
1863         }
1864 
1865         cli_free_vba_project(vba_project);
1866         vba_project = NULL;
1867 
1868         if (status == CL_VIRUS)
1869             break;
1870 
1871         hashcnt--;
1872     }
1873 
1874     if (status == CL_CLEAN || (status == CL_VIRUS && SCAN_ALLMATCHES)) {
1875         if (CL_SUCCESS != (ret = uniq_get(U, "powerpoint document", 19, &hash, &hashcnt))) {
1876             cli_dbgmsg("VBADir: uniq_get('powerpoint document') failed with ret code (%d)!\n", ret);
1877             status = ret;
1878             goto done;
1879         }
1880         while (hashcnt) {
1881             int fd = -1;
1882 
1883             snprintf(vbaname, 1024, "%s" PATHSEP "%s_%u", dirname, hash, hashcnt);
1884             vbaname[sizeof(vbaname) - 1] = '\0';
1885 
1886             fd = open(vbaname, O_RDONLY | O_BINARY);
1887             if (fd == -1) {
1888                 hashcnt--;
1889                 continue;
1890             }
1891             if ((fullname = cli_ppt_vba_read(fd, ctx))) {
1892                 ret = cli_magic_scan_dir(fullname, ctx);
1893 
1894                 if (!ctx->engine->keeptmp)
1895                     cli_rmdirs(fullname);
1896                 free(fullname);
1897 
1898                 if (ret == CL_VIRUS) {
1899                     status = CL_VIRUS;
1900                     viruses_found++;
1901                     if (!SCAN_ALLMATCHES) {
1902                         close(fd);
1903                         break;
1904                     }
1905                 }
1906             }
1907             close(fd);
1908             hashcnt--;
1909         }
1910     }
1911 
1912     if (status == CL_CLEAN || (status == CL_VIRUS && SCAN_ALLMATCHES)) {
1913         if (CL_SUCCESS != (ret = uniq_get(U, "worddocument", 12, &hash, &hashcnt))) {
1914             cli_dbgmsg("VBADir: uniq_get('worddocument') failed with ret code (%d)!\n", ret);
1915             status = ret;
1916             goto done;
1917         }
1918         while (hashcnt) {
1919             int fd = -1;
1920 
1921             snprintf(vbaname, sizeof(vbaname), "%s" PATHSEP "%s_%u", dirname, hash, hashcnt);
1922             vbaname[sizeof(vbaname) - 1] = '\0';
1923 
1924             fd = open(vbaname, O_RDONLY | O_BINARY);
1925             if (fd == -1) {
1926                 hashcnt--;
1927                 continue;
1928             }
1929 
1930             if (!(vba_project = (vba_project_t *)cli_wm_readdir(fd))) {
1931                 close(fd);
1932                 hashcnt--;
1933                 continue;
1934             }
1935 
1936             for (i = 0; i < vba_project->count; i++) {
1937                 cli_dbgmsg("VBADir: Decompress WM project macro:%d key:%d length:%d\n", i, vba_project->key[i], vba_project->length[i]);
1938                 data = (unsigned char *)cli_wm_decrypt_macro(fd, vba_project->offset[i], vba_project->length[i], vba_project->key[i]);
1939                 if (!data) {
1940                     cli_dbgmsg("VBADir: WARNING: WM project '%s' macro %d decrypted to NULL\n", vba_project->name[i], i);
1941                 } else {
1942                     cli_dbgmsg("Project content:\n%s", data);
1943                     if (ctx->scanned)
1944                         *ctx->scanned += vba_project->length[i] / CL_COUNT_PRECISION;
1945                     if (vba_scandata(data, vba_project->length[i], ctx) == CL_VIRUS) {
1946                         viruses_found++;
1947                         if (!SCAN_ALLMATCHES) {
1948                             free(data);
1949                             status = CL_VIRUS;
1950                             break;
1951                         }
1952                     }
1953                     free(data);
1954                 }
1955             }
1956 
1957             close(fd);
1958             cli_free_vba_project(vba_project);
1959             vba_project = NULL;
1960 
1961             if (status == CL_VIRUS && !SCAN_ALLMATCHES) {
1962                 break;
1963             }
1964             hashcnt--;
1965         }
1966     }
1967 
1968 #if HAVE_JSON
1969     /* JSON Output Summary Information */
1970     if (SCAN_COLLECT_METADATA && (ctx->wrkproperty != NULL)) {
1971         if (CL_SUCCESS != (ret = uniq_get(U, "_5_summaryinformation", 21, &hash, &hashcnt))) {
1972             cli_dbgmsg("VBADir: uniq_get('_5_summaryinformation') failed with ret code (%d)!\n", ret);
1973             status = ret;
1974             goto done;
1975         }
1976         while (hashcnt) {
1977             int fd = -1;
1978 
1979             snprintf(vbaname, sizeof(vbaname), "%s" PATHSEP "%s_%u", dirname, hash, hashcnt);
1980             vbaname[sizeof(vbaname) - 1] = '\0';
1981 
1982             fd = open(vbaname, O_RDONLY | O_BINARY);
1983             if (fd >= 0) {
1984                 cli_dbgmsg("VBADir: detected a '_5_summaryinformation' stream\n");
1985                 /* JSONOLE2 - what to do if something breaks? */
1986                 cli_ole2_summary_json(ctx, fd, 0);
1987                 close(fd);
1988             }
1989             hashcnt--;
1990         }
1991 
1992         if (CL_SUCCESS != (ret = uniq_get(U, "_5_documentsummaryinformation", 29, &hash, &hashcnt))) {
1993             cli_dbgmsg("VBADir: uniq_get('_5_documentsummaryinformation') failed with ret code (%d)!\n", ret);
1994             status = ret;
1995             goto done;
1996         }
1997         while (hashcnt) {
1998             int fd = -1;
1999 
2000             snprintf(vbaname, sizeof(vbaname), "%s" PATHSEP "%s_%u", dirname, hash, hashcnt);
2001             vbaname[sizeof(vbaname) - 1] = '\0';
2002 
2003             fd = open(vbaname, O_RDONLY | O_BINARY);
2004             if (fd >= 0) {
2005                 cli_dbgmsg("VBADir: detected a '_5_documentsummaryinformation' stream\n");
2006                 /* JSONOLE2 - what to do if something breaks? */
2007                 cli_ole2_summary_json(ctx, fd, 1);
2008                 close(fd);
2009             }
2010             hashcnt--;
2011         }
2012     }
2013 #endif
2014 
2015     if (status != CL_CLEAN && !(status == CL_VIRUS && SCAN_ALLMATCHES)) {
2016         goto done;
2017     }
2018 
2019     /* Check directory for embedded OLE objects */
2020     if (CL_SUCCESS != (ret = uniq_get(U, "_1_ole10native", 14, &hash, &hashcnt))) {
2021         cli_dbgmsg("VBADir: uniq_get('_1_ole10native') failed with ret code (%d)!\n", ret);
2022         status = ret;
2023         goto done;
2024     }
2025     while (hashcnt) {
2026         int fd = -1;
2027 
2028         snprintf(vbaname, sizeof(vbaname), "%s" PATHSEP "%s_%u", dirname, hash, hashcnt);
2029         vbaname[sizeof(vbaname) - 1] = '\0';
2030 
2031         fd = open(vbaname, O_RDONLY | O_BINARY);
2032         if (fd >= 0) {
2033             ret = cli_scan_ole10(fd, ctx);
2034             close(fd);
2035             if (CL_VIRUS == ret) {
2036                 viruses_found++;
2037                 if (!SCAN_ALLMATCHES) {
2038                     status = ret;
2039                     goto done;
2040                 }
2041             }
2042         }
2043         hashcnt--;
2044     }
2045 
2046     /* ACAB: since we now hash filenames and handle collisions we
2047      * could avoid recursion by removing the block below and by
2048      * flattening the paths in ole2_walk_property_tree (case 1) */
2049 
2050     if ((dd = opendir(dirname)) != NULL) {
2051         while ((dent = readdir(dd))) {
2052             if (dent->d_ino) {
2053                 if (strcmp(dent->d_name, ".") && strcmp(dent->d_name, "..")) {
2054                     /* build the full name */
2055                     fullname = cli_malloc(strlen(dirname) + strlen(dent->d_name) + 2);
2056                     if (!fullname) {
2057                         cli_dbgmsg("cli_vba_scandir: Unable to allocate memory for fullname\n");
2058                         status = CL_EMEM;
2059                         break;
2060                     }
2061                     sprintf(fullname, "%s" PATHSEP "%s", dirname, dent->d_name);
2062 
2063                     /* stat the file */
2064                     if (LSTAT(fullname, &statbuf) != -1) {
2065                         if (S_ISDIR(statbuf.st_mode) && !S_ISLNK(statbuf.st_mode))
2066                             if (cli_vba_scandir(fullname, ctx, U, has_macros) == CL_VIRUS) {
2067                                 viruses_found++;
2068                                 if (!SCAN_ALLMATCHES) {
2069                                     status = CL_VIRUS;
2070                                     free(fullname);
2071                                     break;
2072                                 }
2073                             }
2074                     }
2075                     free(fullname);
2076                 }
2077             }
2078         }
2079     } else {
2080         cli_dbgmsg("VBADir: Can't open directory %s.\n", dirname);
2081         status = CL_EOPEN;
2082         goto done;
2083     }
2084 
2085 done:
2086     if (NULL != dd) {
2087         closedir(dd);
2088     }
2089 
2090 #if HAVE_JSON
2091     if (*has_macros && SCAN_COLLECT_METADATA && (ctx->wrkproperty != NULL)) {
2092         cli_jsonbool(ctx->wrkproperty, "HasMacros", 1);
2093         json_object *macro_languages = cli_jsonarray(ctx->wrkproperty, "MacroLanguages");
2094         if (macro_languages) {
2095             cli_jsonstr(macro_languages, NULL, "VBA");
2096         } else {
2097             cli_dbgmsg("[cli_scan_vbadir] Failed to add \"VBA\" entry to MacroLanguages JSON array\n");
2098         }
2099     }
2100 #endif
2101     if (SCAN_HEURISTIC_MACROS && *has_macros) {
2102         ret = cli_append_virus(ctx, "Heuristics.OLE2.ContainsMacros.VBA");
2103         if (ret == CL_VIRUS)
2104             viruses_found++;
2105     }
2106 
2107     if (viruses_found > 0) {
2108         status = CL_VIRUS;
2109     }
2110     return status;
2111 }
2112 
2113 static cl_error_t cli_xlm_scandir(const char *dirname, cli_ctx *ctx, struct uniq *U)
2114 {
2115     cl_error_t ret             = CL_CLEAN;
2116     char *hash                 = NULL;
2117     uint32_t hashcnt           = 0;
2118     unsigned int viruses_found = 0;
2119     char STR_WORKBOOK[]        = "workbook";
2120     char STR_BOOK[]            = "book";
2121 
2122     cli_dbgmsg("XLMDir: %s\n", dirname);
2123 
2124     if (CL_SUCCESS != (ret = uniq_get(U, STR_WORKBOOK, sizeof(STR_WORKBOOK) - 1, &hash, &hashcnt))) {
2125         if (CL_SUCCESS != (ret = uniq_get(U, STR_BOOK, sizeof(STR_BOOK) - 1, &hash, &hashcnt))) {
2126             cli_dbgmsg("XLMDir: uniq_get('%s') failed with ret code (%d)!\n", STR_BOOK, ret);
2127             return ret;
2128         }
2129     }
2130 
2131     for (; hashcnt > 0; hashcnt--) {
2132         if ((ret = cli_xlm_extract_macros(dirname, ctx, U, hash, hashcnt)) != CL_SUCCESS) {
2133             switch (ret) {
2134                 case CL_VIRUS:
2135                 case CL_EMEM:
2136                     return ret;
2137                 default:
2138                     cli_dbgmsg("XLMDir: An error occured when parsing XLM BIFF temp file, skipping to next file.\n");
2139             }
2140         }
2141     }
2142 
2143     if (SCAN_HEURISTIC_MACROS) {
2144         ret = cli_append_virus(ctx, "Heuristics.OLE2.ContainsMacros.XLM");
2145         if (ret == CL_VIRUS)
2146             viruses_found++;
2147     }
2148     if (SCAN_ALLMATCHES && viruses_found)
2149         return CL_VIRUS;
2150     return ret;
2151 }
2152 
2153 static cl_error_t cli_scanhtml(cli_ctx *ctx)
2154 {
2155     char *tempname, fullname[1024];
2156     cl_error_t ret = CL_CLEAN;
2157     int fd;
2158     fmap_t *map                = ctx->fmap;
2159     unsigned int viruses_found = 0;
2160     uint64_t curr_len          = map->len;
2161 
2162     cli_dbgmsg("in cli_scanhtml()\n");
2163 
2164     /* CL_ENGINE_MAX_HTMLNORMALIZE */
2165     if (curr_len > ctx->engine->maxhtmlnormalize) {
2166         cli_dbgmsg("cli_scanhtml: exiting (file larger than MaxHTMLNormalize)\n");
2167         return CL_CLEAN;
2168     }
2169 
2170     if (!(tempname = cli_gentemp_with_prefix(ctx->sub_tmpdir, "html-tmp")))
2171         return CL_EMEM;
2172 
2173     if (mkdir(tempname, 0700)) {
2174         cli_errmsg("cli_scanhtml: Can't create temporary directory %s\n", tempname);
2175         free(tempname);
2176         return CL_ETMPDIR;
2177     }
2178 
2179     cli_dbgmsg("cli_scanhtml: using tempdir %s\n", tempname);
2180 
2181     html_normalise_map(map, tempname, NULL, ctx->dconf);
2182     snprintf(fullname, 1024, "%s" PATHSEP "nocomment.html", tempname);
2183     fd = open(fullname, O_RDONLY | O_BINARY);
2184     if (fd >= 0) {
2185         ctx->next_layer_is_normalized = true; // This flag ingested by cli_recursion_stack_push().
2186         if ((ret = cli_scan_desc(fd, ctx, CL_TYPE_HTML, 0, NULL, AC_SCAN_VIR, NULL, NULL)) == CL_VIRUS)
2187             viruses_found++;
2188         close(fd);
2189     }
2190 
2191     if (ret == CL_CLEAN || (ret == CL_VIRUS && SCAN_ALLMATCHES)) {
2192         /* CL_ENGINE_MAX_HTMLNOTAGS */
2193         curr_len = map->len;
2194         if (curr_len > ctx->engine->maxhtmlnotags) {
2195             /* we're not interested in scanning large files in notags form */
2196             /* TODO: don't even create notags if file is over limit */
2197             cli_dbgmsg("cli_scanhtml: skipping notags (normalized size over MaxHTMLNoTags)\n");
2198         } else {
2199             snprintf(fullname, 1024, "%s" PATHSEP "notags.html", tempname);
2200             fd = open(fullname, O_RDONLY | O_BINARY);
2201             if (fd >= 0) {
2202                 ctx->next_layer_is_normalized = true; // This flag ingested by cli_recursion_stack_push().
2203                 if ((ret = cli_scan_desc(fd, ctx, CL_TYPE_HTML, 0, NULL, AC_SCAN_VIR, NULL, NULL)) == CL_VIRUS)
2204                     viruses_found++;
2205                 close(fd);
2206             }
2207         }
2208     }
2209 
2210     if (ret == CL_CLEAN || (ret == CL_VIRUS && SCAN_ALLMATCHES)) {
2211         snprintf(fullname, 1024, "%s" PATHSEP "javascript", tempname);
2212         fd = open(fullname, O_RDONLY | O_BINARY);
2213         if (fd >= 0) {
2214             ctx->next_layer_is_normalized = true; // This flag ingested by cli_recursion_stack_push().
2215             if ((ret = cli_scan_desc(fd, ctx, CL_TYPE_HTML, 0, NULL, AC_SCAN_VIR, NULL, NULL)) == CL_VIRUS)
2216                 viruses_found++;
2217             if (ret == CL_CLEAN || (ret == CL_VIRUS && SCAN_ALLMATCHES)) {
2218                 ctx->next_layer_is_normalized = true; // This flag ingested by cli_recursion_stack_push().
2219                 if ((ret = cli_scan_desc(fd, ctx, CL_TYPE_TEXT_ASCII, 0, NULL, AC_SCAN_VIR, NULL, NULL)) == CL_VIRUS)
2220                     viruses_found++;
2221             }
2222             close(fd);
2223         }
2224     }
2225 
2226     if (ret == CL_CLEAN || (ret == CL_VIRUS && SCAN_ALLMATCHES)) {
2227         ctx->next_layer_is_normalized = true; // This flag ingested by cli_recursion_stack_push() or cleared when cli_magic_scan_dir() is done.
2228         snprintf(fullname, 1024, "%s" PATHSEP "rfc2397", tempname);
2229         ret = cli_magic_scan_dir(fullname, ctx);
2230         if (CL_EOPEN == ret) {
2231             /* If the directory doesn't exist, that's fine */
2232             ret = CL_CLEAN;
2233         }
2234     }
2235 
2236     if (!ctx->engine->keeptmp)
2237         cli_rmdirs(tempname);
2238 
2239     free(tempname);
2240     if (SCAN_ALLMATCHES && viruses_found)
2241         return CL_VIRUS;
2242     return ret;
2243 }
2244 
2245 static cl_error_t cli_scanscript(cli_ctx *ctx)
2246 {
2247     const unsigned char *buff;
2248     unsigned char *normalized = NULL;
2249     struct text_norm_state state;
2250     char *tmpname = NULL;
2251     int ofd       = -1;
2252     cl_error_t ret;
2253     struct cli_matcher *troot;
2254     uint32_t maxpatlen, offset = 0;
2255     struct cli_matcher *groot;
2256     struct cli_ac_data gmdata, tmdata;
2257     int gmdata_initialized = 0;
2258     int tmdata_initialized = 0;
2259     struct cli_ac_data *mdata[2];
2260     cl_fmap_t *new_map = NULL;
2261     fmap_t *map;
2262     size_t at                  = 0;
2263     unsigned int viruses_found = 0;
2264     uint64_t curr_len;
2265     struct cli_target_info info;
2266 
2267     if (!ctx || !ctx->engine->root)
2268         return CL_ENULLARG;
2269 
2270     map       = ctx->fmap;
2271     curr_len  = map->len;
2272     groot     = ctx->engine->root[0];
2273     troot     = ctx->engine->root[7];
2274     maxpatlen = troot ? troot->maxpatlen : 0;
2275 
2276     // Initialize info so it's safe to pass to destroy later
2277     cli_targetinfo_init(&info);
2278 
2279     cli_dbgmsg("in cli_scanscript()\n");
2280 
2281     /* CL_ENGINE_MAX_SCRIPTNORMALIZE */
2282     if (curr_len > ctx->engine->maxscriptnormalize) {
2283         cli_dbgmsg("cli_scanscript: exiting (file larger than MaxScriptSize)\n");
2284         ret = CL_CLEAN;
2285         goto done;
2286     }
2287 
2288     if (!(normalized = cli_malloc(SCANBUFF + maxpatlen))) {
2289         cli_dbgmsg("cli_scanscript: Unable to malloc %u bytes\n", SCANBUFF);
2290         ret = CL_EMEM;
2291         goto done;
2292     }
2293     text_normalize_init(&state, normalized, SCANBUFF + maxpatlen);
2294 
2295     if ((ret = cli_ac_initdata(&tmdata, troot ? troot->ac_partsigs : 0, troot ? troot->ac_lsigs : 0, troot ? troot->ac_reloff_num : 0, CLI_DEFAULT_AC_TRACKLEN))) {
2296         goto done;
2297     }
2298     tmdata_initialized = 1;
2299 
2300     if ((ret = cli_ac_initdata(&gmdata, groot->ac_partsigs, groot->ac_lsigs, groot->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN))) {
2301         goto done;
2302     }
2303     gmdata_initialized = 1;
2304 
2305     /* dump to disk only if explicitly asked to
2306      * or if necessary to check relative offsets,
2307      * otherwise we can process just in-memory */
2308     if (ctx->engine->keeptmp || (troot && (troot->ac_reloff_num > 0 || troot->linked_bcs))) {
2309         if ((ret = cli_gentempfd(ctx->sub_tmpdir, &tmpname, &ofd))) {
2310             cli_dbgmsg("cli_scanscript: Can't generate temporary file/descriptor\n");
2311             goto done;
2312         }
2313         if (ctx->engine->keeptmp)
2314             cli_dbgmsg("cli_scanscript: saving normalized file to %s\n", tmpname);
2315     }
2316 
2317     mdata[0] = &tmdata;
2318     mdata[1] = &gmdata;
2319 
2320     /* If there's a relative offset in troot or triggered bytecodes, normalize to file.*/
2321     if (troot && (troot->ac_reloff_num > 0 || troot->linked_bcs)) {
2322         size_t map_off = 0;
2323         while (map_off < map->len) {
2324             size_t written;
2325             if (!(written = text_normalize_map(&state, map, map_off)))
2326                 break;
2327             map_off += written;
2328 
2329             if (write(ofd, state.out, state.out_pos) == -1) {
2330                 cli_errmsg("cli_scanscript: can't write to file %s\n", tmpname);
2331                 ret = CL_EWRITE;
2332                 goto done;
2333             }
2334             text_normalize_reset(&state);
2335         }
2336 
2337         /* Temporarily store the normalized file map in the context. */
2338         new_map = fmap(ofd, 0, 0, NULL);
2339         if (new_map == NULL) {
2340             cli_dbgmsg("cli_scanscript: could not map file %s\n", tmpname);
2341             goto done;
2342         }
2343 
2344         ctx->next_layer_is_normalized = true; // This flag ingested by cli_recursion_stack_push().
2345 
2346         ret = cli_recursion_stack_push(ctx, new_map, CL_TYPE_TEXT_ASCII, true); /* Perform cli_scan_fmap with child fmap */
2347         if (CL_SUCCESS != ret) {
2348             cli_dbgmsg("Failed to scan fmap.\n");
2349             goto done;
2350         }
2351 
2352         /* scan map */
2353         ret = cli_scan_fmap(ctx, CL_TYPE_TEXT_ASCII, 0, NULL, AC_SCAN_VIR, NULL, NULL);
2354         if (ret == CL_VIRUS) {
2355             viruses_found++;
2356         }
2357 
2358         (void)cli_recursion_stack_pop(ctx); /* Restore the parent fmap */
2359     } else {
2360         /* Since the above is moderately costly all in all,
2361          * do the old stuff if there's no relative offsets. */
2362 
2363         if (troot) {
2364             cli_targetinfo(&info, 7, ctx);
2365             ret = cli_ac_caloff(troot, &tmdata, &info);
2366             if (ret)
2367                 goto done;
2368         }
2369 
2370         while (1) {
2371             size_t len = MIN(map->pgsz, map->len - at);
2372             buff       = fmap_need_off_once(map, at, len);
2373             at += len;
2374             if (!buff || !len || state.out_pos + len > state.out_len) {
2375                 /* flush if error/EOF, or too little buffer space left */
2376                 if ((ofd != -1) && (write(ofd, state.out, state.out_pos) == -1)) {
2377                     cli_errmsg("cli_scanscript: can't write to file %s\n", tmpname);
2378                     close(ofd);
2379                     ofd = -1;
2380                     /* we can continue to scan in memory */
2381                 }
2382                 /* when we flush the buffer also scan */
2383                 if (cli_scan_buff(state.out, state.out_pos, offset, ctx, CL_TYPE_TEXT_ASCII, mdata) == CL_VIRUS) {
2384                     if (SCAN_ALLMATCHES)
2385                         viruses_found++;
2386                     else {
2387                         ret = CL_VIRUS;
2388                         break;
2389                     }
2390                 }
2391                 if (ctx->scanned)
2392                     *ctx->scanned += state.out_pos / CL_COUNT_PRECISION;
2393                 offset += state.out_pos;
2394                 /* carry over maxpatlen from previous buffer */
2395                 if (state.out_pos > maxpatlen)
2396                     memmove(state.out, state.out + state.out_pos - maxpatlen, maxpatlen);
2397                 text_normalize_reset(&state);
2398                 state.out_pos = maxpatlen;
2399             }
2400             if (!len)
2401                 break;
2402             if (!buff || text_normalize_buffer(&state, buff, len) != len) {
2403                 cli_dbgmsg("cli_scanscript: short read during normalizing\n");
2404             }
2405         }
2406     }
2407 
2408     if (ret != CL_VIRUS || SCAN_ALLMATCHES) {
2409         if ((ret = cli_exp_eval(ctx, troot, &tmdata, NULL, NULL)) == CL_VIRUS)
2410             viruses_found++;
2411         if (ret != CL_VIRUS || SCAN_ALLMATCHES)
2412             if ((ret = cli_exp_eval(ctx, groot, &gmdata, NULL, NULL)) == CL_VIRUS)
2413                 viruses_found++;
2414     }
2415 
2416 done:
2417     if (NULL != new_map) {
2418         funmap(new_map);
2419     }
2420 
2421     cli_targetinfo_destroy(&info);
2422 
2423     if (NULL != normalized) {
2424         free(normalized);
2425     }
2426 
2427     if (tmdata_initialized) {
2428         cli_ac_freedata(&tmdata);
2429     }
2430 
2431     if (gmdata_initialized) {
2432         cli_ac_freedata(&gmdata);
2433     }
2434 
2435     if (ofd != -1)
2436         close(ofd);
2437     if (tmpname != NULL) {
2438         if (!ctx->engine->keeptmp)
2439             cli_unlink(tmpname);
2440         free(tmpname);
2441     }
2442 
2443     if (viruses_found)
2444         return CL_VIRUS;
2445 
2446     return ret;
2447 }
2448 
2449 static cl_error_t cli_scanhtml_utf16(cli_ctx *ctx)
2450 {
2451     cl_error_t status = CL_ERROR;
2452     char *tempname    = NULL;
2453     char *decoded     = NULL;
2454     const char *buff;
2455     int fd = -1;
2456     int bytes;
2457     size_t at       = 0;
2458     fmap_t *new_map = NULL;
2459 
2460     cli_dbgmsg("in cli_scanhtml_utf16()\n");
2461 
2462     if (!(tempname = cli_gentemp_with_prefix(ctx->sub_tmpdir, "html-utf16-tmp"))) {
2463         status = CL_EMEM;
2464         goto done;
2465     }
2466 
2467     if ((fd = open(tempname, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR)) < 0) {
2468         cli_errmsg("cli_scanhtml_utf16: Can't create file %s\n", tempname);
2469         status = CL_EOPEN;
2470         goto done;
2471     }
2472 
2473     cli_dbgmsg("cli_scanhtml_utf16: using tempfile %s\n", tempname);
2474 
2475     while (at < ctx->fmap->len) {
2476         bytes = MIN(ctx->fmap->len - at, ctx->fmap->pgsz * 16);
2477         if (!(buff = fmap_need_off_once(ctx->fmap, at, bytes))) {
2478             status = CL_EREAD;
2479             goto done;
2480         }
2481         at += bytes;
2482         decoded = cli_utf16toascii(buff, bytes);
2483         if (decoded) {
2484             if (write(fd, decoded, bytes / 2) == -1) {
2485                 cli_errmsg("cli_scanhtml_utf16: Can't write to file %s\n", tempname);
2486                 status = CL_EWRITE;
2487                 goto done;
2488             }
2489             free(decoded);
2490             decoded = NULL;
2491         }
2492     }
2493 
2494     new_map = fmap(fd, 0, 0, NULL);
2495     if (NULL == new_map) {
2496         cli_errmsg("cli_scanhtml_utf16: failed to create fmap for ascii HTML file decoded from utf16: %s\n.", tempname);
2497         status = CL_EMEM;
2498         goto done;
2499     }
2500 
2501     ctx->next_layer_is_normalized = true; // s/normalized/transcoded, practically the same thing.
2502 
2503     status = cli_recursion_stack_push(ctx, new_map, CL_TYPE_HTML, true); /* Perform exp_eval with child fmap */
2504     if (CL_SUCCESS != status) {
2505         cli_dbgmsg("Failed to scan fmap.\n");
2506         goto done;
2507     }
2508 
2509     status = cli_scanhtml(ctx);
2510 
2511     (void)cli_recursion_stack_pop(ctx); /* Restore the parent fmap */
2512 
2513     status = CL_SUCCESS;
2514 
2515 done:
2516     if (NULL != new_map) {
2517         funmap(new_map);
2518     }
2519     if (-1 != fd) {
2520         close(fd);
2521     }
2522 
2523     if (NULL != decoded) {
2524         free(decoded);
2525     }
2526 
2527     if (NULL != tempname) {
2528         if (!ctx->engine->keeptmp) {
2529             (void)cli_unlink(tempname);
2530         } else {
2531             cli_dbgmsg("cli_scanhtml_utf16: Decoded HTML data saved in %s\n", tempname);
2532         }
2533 
2534         free(tempname);
2535     }
2536 
2537     return status;
2538 }
2539 
2540 static cl_error_t cli_scanole2(cli_ctx *ctx)
2541 {
2542     char *dir          = NULL;
2543     cl_error_t ret     = CL_CLEAN;
2544     struct uniq *files = NULL;
2545     int has_vba = 0, has_xlm = 0, has_macros = 0, viruses_found = 0;
2546 
2547     cli_dbgmsg("in cli_scanole2()\n");
2548 
2549     /* generate the temporary directory */
2550     if (NULL == (dir = cli_gentemp_with_prefix(ctx->sub_tmpdir, "ole2-tmp"))) {
2551         ret = CL_EMEM;
2552         goto done;
2553     }
2554 
2555     if (mkdir(dir, 0700)) {
2556         cli_dbgmsg("OLE2: Can't create temporary directory %s\n", dir);
2557         free(dir);
2558         dir = NULL;
2559         ret = CL_ETMPDIR;
2560         goto done;
2561     }
2562 
2563     ret = cli_ole2_extract(dir, ctx, &files, &has_vba, &has_xlm);
2564     if (ret != CL_CLEAN && ret != CL_VIRUS) {
2565         cli_dbgmsg("OLE2: %s\n", cl_strerror(ret));
2566         goto done;
2567     }
2568     if (CL_VIRUS == ret) {
2569         viruses_found++;
2570         if (!SCAN_ALLMATCHES) {
2571             goto done;
2572         }
2573     }
2574 
2575     if (has_vba && files) {
2576         ret = cli_vba_scandir(dir, ctx, files, &has_macros);
2577         if (CL_VIRUS == ret) {
2578             viruses_found++;
2579             if (!SCAN_ALLMATCHES) {
2580                 goto done;
2581             }
2582         }
2583 
2584         ret = cli_vba_scandir_new(dir, ctx, files, &has_macros);
2585         if (CL_VIRUS == ret) {
2586             viruses_found++;
2587             if (!SCAN_ALLMATCHES) {
2588                 goto done;
2589             }
2590         }
2591     }
2592 
2593     if (CL_VIRUS == ret) {
2594         viruses_found++;
2595         if (!SCAN_ALLMATCHES) {
2596             goto done;
2597         }
2598     }
2599 
2600     if (has_xlm && files) {
2601         ret = cli_xlm_scandir(dir, ctx, files);
2602         if (CL_VIRUS == ret) {
2603             if (!SCAN_ALLMATCHES) {
2604                 goto done;
2605             }
2606         }
2607     }
2608 
2609     if ((has_xlm || has_vba) && files) {
2610         if (CL_VIRUS == cli_magic_scan_dir(dir, ctx)) {
2611             if (!SCAN_ALLMATCHES) {
2612                 goto done;
2613             }
2614         }
2615     }
2616 
2617 done:
2618     if (files) {
2619         uniq_free(files);
2620     }
2621 
2622     if (NULL != dir) {
2623         if (!ctx->engine->keeptmp)
2624             cli_rmdirs(dir);
2625         free(dir);
2626     }
2627 
2628     if (viruses_found > 0) {
2629         ret = CL_VIRUS;
2630     }
2631 
2632     return ret;
2633 }
2634 
2635 static cl_error_t cli_scantar(cli_ctx *ctx, unsigned int posix)
2636 {
2637     char *dir;
2638     cl_error_t ret = CL_CLEAN;
2639 
2640     cli_dbgmsg("in cli_scantar()\n");
2641 
2642     /* generate temporary directory */
2643     if (!(dir = cli_gentemp_with_prefix(ctx->sub_tmpdir, "tar-tmp")))
2644         return CL_EMEM;
2645 
2646     if (mkdir(dir, 0700)) {
2647         cli_errmsg("Tar: Can't create temporary directory %s\n", dir);
2648         free(dir);
2649         return CL_ETMPDIR;
2650     }
2651 
2652     ret = cli_untar(dir, posix, ctx);
2653 
2654     if (!ctx->engine->keeptmp)
2655         cli_rmdirs(dir);
2656 
2657     free(dir);
2658     return ret;
2659 }
2660 
2661 static cl_error_t cli_scanscrenc(cli_ctx *ctx)
2662 {
2663     char *tempname;
2664     cl_error_t ret = CL_CLEAN;
2665 
2666     cli_dbgmsg("in cli_scanscrenc()\n");
2667 
2668     if (!(tempname = cli_gentemp_with_prefix(ctx->sub_tmpdir, "screnc-tmp")))
2669         return CL_EMEM;
2670 
2671     if (mkdir(tempname, 0700)) {
2672         cli_dbgmsg("CHM: Can't create temporary directory %s\n", tempname);
2673         free(tempname);
2674         return CL_ETMPDIR;
2675     }
2676 
2677     if (html_screnc_decode(ctx->fmap, tempname))
2678         ret = cli_magic_scan_dir(tempname, ctx);
2679 
2680     if (!ctx->engine->keeptmp)
2681         cli_rmdirs(tempname);
2682 
2683     free(tempname);
2684     return ret;
2685 }
2686 
2687 static cl_error_t cli_scanriff(cli_ctx *ctx)
2688 {
2689     cl_error_t ret = CL_CLEAN;
2690 
2691     if (cli_check_riff_exploit(ctx) == 2)
2692         ret = cli_append_virus(ctx, "Heuristics.Exploit.W32.MS05-002");
2693 
2694     return ret;
2695 }
2696 
2697 static cl_error_t cli_scancryptff(cli_ctx *ctx)
2698 {
2699     cl_error_t ret = CL_CLEAN, ndesc;
2700     unsigned int i;
2701     const unsigned char *src;
2702     unsigned char *dest = NULL;
2703     char *tempfile;
2704     size_t pos;
2705     size_t bread;
2706 
2707     /* Skip the CryptFF file header */
2708     pos = 0x10;
2709 
2710     if ((dest = (unsigned char *)cli_malloc(FILEBUFF)) == NULL) {
2711         cli_dbgmsg("CryptFF: Can't allocate memory\n");
2712         return CL_EMEM;
2713     }
2714 
2715     if (!(tempfile = cli_gentemp_with_prefix(ctx->sub_tmpdir, "cryptff"))) {
2716         free(dest);
2717         return CL_EMEM;
2718     }
2719 
2720     if ((ndesc = open(tempfile, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR)) < 0) {
2721         cli_errmsg("CryptFF: Can't create file %s\n", tempfile);
2722         free(dest);
2723         free(tempfile);
2724         return CL_ECREAT;
2725     }
2726 
2727     for (; (src = fmap_need_off_once_len(ctx->fmap, pos, FILEBUFF, &bread)) && bread; pos += bread) {
2728         for (i = 0; i < bread; i++)
2729             dest[i] = src[i] ^ (unsigned char)0xff;
2730         if (cli_writen(ndesc, dest, bread) == (size_t)-1) {
2731             cli_dbgmsg("CryptFF: Can't write to descriptor %d\n", ndesc);
2732             free(dest);
2733             close(ndesc);
2734             free(tempfile);
2735             return CL_EWRITE;
2736         }
2737     }
2738 
2739     free(dest);
2740 
2741     cli_dbgmsg("CryptFF: Scanning decrypted data\n");
2742 
2743     if ((ret = cli_magic_scan_desc(ndesc, tempfile, ctx, NULL)) == CL_VIRUS)
2744         cli_dbgmsg("CryptFF: Infected with %s\n", cli_get_last_virus(ctx));
2745 
2746     close(ndesc);
2747 
2748     if (ctx->engine->keeptmp)
2749         cli_dbgmsg("CryptFF: Decompressed data saved in %s\n", tempfile);
2750     else if (cli_unlink(tempfile))
2751         ret = CL_EUNLINK;
2752 
2753     free(tempfile);
2754     return ret;
2755 }
2756 
2757 static cl_error_t cli_scanpdf(cli_ctx *ctx, off_t offset)
2758 {
2759     cl_error_t ret;
2760     char *dir = cli_gentemp_with_prefix(ctx->sub_tmpdir, "pdf-tmp");
2761 
2762     if (!dir)
2763         return CL_EMEM;
2764 
2765     if (mkdir(dir, 0700)) {
2766         cli_dbgmsg("Can't create temporary directory for PDF file %s\n", dir);
2767         free(dir);
2768         return CL_ETMPDIR;
2769     }
2770 
2771     ret = cli_pdf(dir, ctx, offset);
2772 
2773     if (!ctx->engine->keeptmp)
2774         cli_rmdirs(dir);
2775 
2776     free(dir);
2777     return ret;
2778 }
2779 
2780 static cl_error_t cli_scantnef(cli_ctx *ctx)
2781 {
2782     cl_error_t ret;
2783     char *dir = cli_gentemp_with_prefix(ctx->sub_tmpdir, "tnef-tmp");
2784 
2785     if (!dir)
2786         return CL_EMEM;
2787 
2788     if (mkdir(dir, 0700)) {
2789         cli_dbgmsg("Can't create temporary directory for tnef file %s\n", dir);
2790         free(dir);
2791         return CL_ETMPDIR;
2792     }
2793 
2794     ret = cli_tnef(dir, ctx);
2795 
2796     if (ret == CL_CLEAN)
2797         ret = cli_magic_scan_dir(dir, ctx);
2798 
2799     if (!ctx->engine->keeptmp)
2800         cli_rmdirs(dir);
2801 
2802     free(dir);
2803     return ret;
2804 }
2805 
2806 static cl_error_t cli_scanuuencoded(cli_ctx *ctx)
2807 {
2808     cl_error_t ret;
2809     char *dir = cli_gentemp_with_prefix(ctx->sub_tmpdir, "uuencoded-tmp");
2810 
2811     if (!dir)
2812         return CL_EMEM;
2813 
2814     if (mkdir(dir, 0700)) {
2815         cli_dbgmsg("Can't create temporary directory for uuencoded file %s\n", dir);
2816         free(dir);
2817         return CL_ETMPDIR;
2818     }
2819 
2820     ret = cli_uuencode(dir, ctx->fmap);
2821 
2822     if (ret == CL_CLEAN)
2823         ret = cli_magic_scan_dir(dir, ctx);
2824 
2825     if (!ctx->engine->keeptmp)
2826         cli_rmdirs(dir);
2827 
2828     free(dir);
2829     return ret;
2830 }
2831 
2832 static cl_error_t cli_scanmail(cli_ctx *ctx)
2833 {
2834     char *dir;
2835     cl_error_t ret;
2836     unsigned int viruses_found = 0;
2837 
2838     cli_dbgmsg("Starting cli_scanmail()\n");
2839 
2840     /* generate the temporary directory */
2841     if (!(dir = cli_gentemp_with_prefix(ctx->sub_tmpdir, "mail-tmp")))
2842         return CL_EMEM;
2843 
2844     if (mkdir(dir, 0700)) {
2845         cli_dbgmsg("Mail: Can't create temporary directory %s\n", dir);
2846         free(dir);
2847         return CL_ETMPDIR;
2848     }
2849 
2850     /*
2851      * Extract the attachments into the temporary directory
2852      */
2853     if ((ret = cli_mbox(dir, ctx))) {
2854         if (ret == CL_VIRUS && SCAN_ALLMATCHES)
2855             viruses_found++;
2856         else {
2857             if (!ctx->engine->keeptmp)
2858                 cli_rmdirs(dir);
2859             free(dir);
2860             return ret;
2861         }
2862     }
2863 
2864     ret = cli_magic_scan_dir(dir, ctx);
2865 
2866     if (!ctx->engine->keeptmp)
2867         cli_rmdirs(dir);
2868 
2869     free(dir);
2870     if (viruses_found)
2871         return CL_VIRUS;
2872     return ret;
2873 }
2874 
2875 static cl_error_t cli_scan_structured(cli_ctx *ctx)
2876 {
2877     char buf[8192];
2878     size_t result          = 0;
2879     unsigned int cc_count  = 0;
2880     unsigned int ssn_count = 0;
2881     int done               = 0;
2882     fmap_t *map;
2883     size_t pos = 0;
2884     int (*ccfunc)(const unsigned char *buffer, size_t length, int cc_only);
2885     int (*ssnfunc)(const unsigned char *buffer, size_t length);
2886     unsigned int viruses_found = 0;
2887 
2888     if (ctx == NULL)
2889         return CL_ENULLARG;
2890 
2891     map = ctx->fmap;
2892 
2893     if (ctx->engine->min_cc_count == 1)
2894         ccfunc = dlp_has_cc;
2895     else
2896         ccfunc = dlp_get_cc_count;
2897 
2898     switch (SCAN_HEURISTIC_STRUCTURED_SSN_NORMAL | SCAN_HEURISTIC_STRUCTURED_SSN_STRIPPED) {
2899         case (CL_SCAN_HEURISTIC_STRUCTURED_SSN_NORMAL | CL_SCAN_HEURISTIC_STRUCTURED_SSN_STRIPPED):
2900             if (ctx->engine->min_ssn_count == 1)
2901                 ssnfunc = dlp_has_ssn;
2902             else
2903                 ssnfunc = dlp_get_ssn_count;
2904             break;
2905 
2906         case CL_SCAN_HEURISTIC_STRUCTURED_SSN_NORMAL:
2907             if (ctx->engine->min_ssn_count == 1)
2908                 ssnfunc = dlp_has_normal_ssn;
2909             else
2910                 ssnfunc = dlp_get_normal_ssn_count;
2911             break;
2912 
2913         case CL_SCAN_HEURISTIC_STRUCTURED_SSN_STRIPPED:
2914             if (ctx->engine->min_ssn_count == 1)
2915                 ssnfunc = dlp_has_stripped_ssn;
2916             else
2917                 ssnfunc = dlp_get_stripped_ssn_count;
2918             break;
2919 
2920         default:
2921             ssnfunc = NULL;
2922     }
2923 
2924     while (!done && ((result = fmap_readn(map, buf, pos, 8191)) > 0) && (result != (size_t)-1)) {
2925         pos += result;
2926         if ((cc_count += ccfunc((const unsigned char *)buf, result,
2927                                 (ctx->options->heuristic & CL_SCAN_HEURISTIC_STRUCTURED_CC) ? 1 : 0)) >= ctx->engine->min_cc_count) {
2928             done = 1;
2929         }
2930 
2931         if (ssnfunc && ((ssn_count += ssnfunc((const unsigned char *)buf, result)) >= ctx->engine->min_ssn_count)) {
2932             done = 1;
2933         }
2934     }
2935 
2936     if (cc_count != 0 && cc_count >= ctx->engine->min_cc_count) {
2937         cli_dbgmsg("cli_scan_structured: %u credit card numbers detected\n", cc_count);
2938         if (CL_VIRUS == cli_append_virus(ctx, "Heuristics.Structured.CreditCardNumber")) {
2939             if (SCAN_ALLMATCHES) {
2940                 viruses_found++;
2941             } else {
2942                 return CL_VIRUS;
2943             }
2944         }
2945     }
2946 
2947     if (ssn_count != 0 && ssn_count >= ctx->engine->min_ssn_count) {
2948         cli_dbgmsg("cli_scan_structured: %u social security numbers detected\n", ssn_count);
2949         if (CL_VIRUS == cli_append_virus(ctx, "Heuristics.Structured.SSN")) {
2950             if (SCAN_ALLMATCHES) {
2951                 viruses_found++;
2952             } else {
2953                 return CL_VIRUS;
2954             }
2955         }
2956     }
2957 
2958     if (viruses_found)
2959         return CL_VIRUS;
2960     return CL_CLEAN;
2961 }
2962 
2963 static cl_error_t cli_scanembpe(cli_ctx *ctx, off_t offset)
2964 {
2965     cl_error_t ret = CL_CLEAN;
2966     int fd;
2967     size_t bytes;
2968     size_t size = 0;
2969     size_t todo;
2970     const char *buff;
2971     char *tmpname;
2972     fmap_t *map = ctx->fmap;
2973     unsigned int corrupted_input;
2974 
2975     tmpname = cli_gentemp_with_prefix(ctx->sub_tmpdir, "embedded-pe");
2976     if (!tmpname)
2977         return CL_EMEM;
2978 
2979     if ((fd = open(tmpname, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR)) < 0) {
2980         cli_errmsg("cli_scanembpe: Can't create file %s\n", tmpname);
2981         free(tmpname);
2982         return CL_ECREAT;
2983     }
2984 
2985     todo = map->len - offset;
2986     while (1) {
2987         bytes = MIN(todo, map->pgsz);
2988         if (!bytes)
2989             break;
2990 
2991         if (!(buff = fmap_need_off_once(map, offset + size, bytes))) {
2992             close(fd);
2993             if (!ctx->engine->keeptmp) {
2994                 if (cli_unlink(tmpname)) {
2995                     free(tmpname);
2996                     return CL_EUNLINK;
2997                 }
2998             }
2999             free(tmpname);
3000             return CL_EREAD;
3001         }
3002         size += bytes;
3003         todo -= bytes;
3004 
3005         if (cli_checklimits("cli_scanembpe", ctx, size, 0, 0) != CL_CLEAN)
3006             break;
3007 
3008         if (cli_writen(fd, buff, bytes) != bytes) {
3009             cli_dbgmsg("cli_scanembpe: Can't write to temporary file\n");
3010             close(fd);
3011             if (!ctx->engine->keeptmp) {
3012                 if (cli_unlink(tmpname)) {
3013                     free(tmpname);
3014                     return CL_EUNLINK;
3015                 }
3016             }
3017             free(tmpname);
3018             return CL_EWRITE;
3019         }
3020     }
3021 
3022     corrupted_input      = ctx->corrupted_input;
3023     ctx->corrupted_input = 1;
3024     ret                  = cli_magic_scan_desc(fd, tmpname, ctx, NULL);
3025     ctx->corrupted_input = corrupted_input;
3026     if (ret == CL_VIRUS) {
3027         cli_dbgmsg("cli_scanembpe: Infected with %s\n", cli_get_last_virus(ctx));
3028         close(fd);
3029         if (!ctx->engine->keeptmp) {
3030             if (cli_unlink(tmpname)) {
3031                 free(tmpname);
3032                 return CL_EUNLINK;
3033             }
3034         }
3035         free(tmpname);
3036         return CL_VIRUS;
3037     }
3038 
3039     close(fd);
3040     if (!ctx->engine->keeptmp) {
3041         if (cli_unlink(tmpname)) {
3042             free(tmpname);
3043             return CL_EUNLINK;
3044         }
3045     }
3046     free(tmpname);
3047 
3048     /* intentionally ignore possible errors from cli_magic_scan_desc */
3049     return CL_CLEAN;
3050 }
3051 
3052 #if defined(_WIN32) || defined(C_LINUX) || defined(C_DARWIN)
3053 #define PERF_MEASURE
3054 #endif
3055 
3056 #ifdef PERF_MEASURE
3057 
3058 static struct
3059 {
3060     enum perfev id;
3061     const char *name;
3062     enum ev_type type;
3063 } perf_events[] = {
3064     {PERFT_SCAN, "full scan", ev_time},
3065     {PERFT_PRECB, "prescan cb", ev_time},
3066     {PERFT_POSTCB, "postscan cb", ev_time},
3067     {PERFT_CACHE, "cache", ev_time},
3068     {PERFT_FT, "filetype", ev_time},
3069     {PERFT_CONTAINER, "container", ev_time},
3070     {PERFT_SCRIPT, "script", ev_time},
3071     {PERFT_PE, "pe", ev_time},
3072     {PERFT_RAW, "raw", ev_time},
3073     {PERFT_RAWTYPENO, "raw container", ev_time},
3074     {PERFT_MAP, "map", ev_time},
3075     {PERFT_BYTECODE, "bytecode", ev_time},
3076     {PERFT_KTIME, "kernel", ev_int},
3077     {PERFT_UTIME, "user", ev_int}};
3078 
3079 static void get_thread_times(uint64_t *kt, uint64_t *ut)
3080 {
3081 #ifdef _WIN32
3082     FILETIME c, e, k, u;
3083     ULARGE_INTEGER kl, ul;
3084     if (!GetThreadTimes(GetCurrentThread(), &c, &e, &k, &u)) {
3085         *kt = *ut = 0;
3086         return;
3087     }
3088     kl.LowPart  = k.dwLowDateTime;
3089     kl.HighPart = k.dwHighDateTime;
3090     ul.LowPart  = u.dwLowDateTime;
3091     ul.HighPart = u.dwHighDateTime;
3092     *kt         = kl.QuadPart / 10;
3093     *ut         = ul.QuadPart / 10;
3094 #else
3095     struct tms tbuf;
3096     if (times(&tbuf) != ((clock_t)-1)) {
3097         clock_t tck = sysconf(_SC_CLK_TCK);
3098         *kt         = ((uint64_t)1000000) * tbuf.tms_stime / tck;
3099         *ut         = ((uint64_t)1000000) * tbuf.tms_utime / tck;
3100     } else {
3101         *kt = *ut = 0;
3102     }
3103 #endif
3104 }
3105 
3106 static inline void perf_init(cli_ctx *ctx)
3107 {
3108     uint64_t kt, ut;
3109     unsigned i;
3110 
3111     if (!SCAN_DEV_COLLECT_PERF_INFO)
3112         return;
3113 
3114     ctx->perf = cli_events_new(PERFT_LAST);
3115     for (i = 0; i < sizeof(perf_events) / sizeof(perf_events[0]); i++) {
3116         if (cli_event_define(ctx->perf, perf_events[i].id, perf_events[i].name,
3117                              perf_events[i].type, multiple_sum) == -1)
3118             continue;
3119     }
3120     cli_event_time_start(ctx->perf, PERFT_SCAN);
3121     get_thread_times(&kt, &ut);
3122     cli_event_int(ctx->perf, PERFT_KTIME, -kt);
3123     cli_event_int(ctx->perf, PERFT_UTIME, -ut);
3124 }
3125 
3126 static inline void perf_done(cli_ctx *ctx)
3127 {
3128     char timestr[512];
3129     char *p;
3130     unsigned i;
3131     uint64_t kt, ut;
3132     char *pend;
3133     cli_events_t *perf = ctx->perf;
3134 
3135     if (!perf)
3136         return;
3137 
3138     p     = timestr;
3139     pend  = timestr + sizeof(timestr) - 1;
3140     *pend = 0;
3141 
3142     cli_event_time_stop(perf, PERFT_SCAN);
3143     get_thread_times(&kt, &ut);
3144     cli_event_int(perf, PERFT_KTIME, kt);
3145     cli_event_int(perf, PERFT_UTIME, ut);
3146 
3147     for (i = 0; i < sizeof(perf_events) / sizeof(perf_events[0]); i++) {
3148         union ev_val val;
3149         unsigned count;
3150 
3151         cli_event_get(perf, perf_events[i].id, &val, &count);
3152         if (p < pend)
3153             p += snprintf(p, pend - p, "%s: %d.%03ums, ", perf_events[i].name,
3154                           (signed)(val.v_int / 1000),
3155                           (unsigned)(val.v_int % 1000));
3156     }
3157     *p = 0;
3158     cli_infomsg(ctx, "performance: %s\n", timestr);
3159 
3160     cli_events_free(perf);
3161     ctx->perf = NULL;
3162 }
3163 
3164 static inline void perf_start(cli_ctx *ctx, int id)
3165 {
3166     cli_event_time_start(ctx->perf, id);
3167 }
3168 
3169 static inline void perf_stop(cli_ctx *ctx, int id)
3170 {
3171     cli_event_time_stop(ctx->perf, id);
3172 }
3173 
3174 static inline void perf_nested_start(cli_ctx *ctx, int id, int nestedid)
3175 {
3176     cli_event_time_nested_start(ctx->perf, id, nestedid);
3177 }
3178 
3179 static inline void perf_nested_stop(cli_ctx *ctx, int id, int nestedid)
3180 {
3181     cli_event_time_nested_stop(ctx->perf, id, nestedid);
3182 }
3183 
3184 #else
3185 static inline void perf_init(cli_ctx *ctx)
3186 {
3187     UNUSEDPARAM(ctx);
3188 }
3189 static inline void perf_start(cli_ctx *ctx, int id)
3190 {
3191     UNUSEDPARAM(ctx);
3192     UNUSEDPARAM(id);
3193 }
3194 static inline void perf_stop(cli_ctx *ctx, int id)
3195 {
3196     UNUSEDPARAM(ctx);
3197     UNUSEDPARAM(id);
3198 }
3199 static inline void perf_nested_start(cli_ctx *ctx, int id, int nestedid)
3200 {
3201     UNUSEDPARAM(ctx);
3202     UNUSEDPARAM(id);
3203     UNUSEDPARAM(nestedid);
3204 }
3205 static inline void perf_nested_stop(cli_ctx *ctx, int id, int nestedid)
3206 {
3207     UNUSEDPARAM(ctx);
3208     UNUSEDPARAM(id);
3209     UNUSEDPARAM(nestedid);
3210 }
3211 static inline void perf_done(cli_ctx *ctx)
3212 {
3213     UNUSEDPARAM(ctx);
3214 }
3215 #endif
3216 
3217 /**
3218  * @brief Perform raw scan of current fmap.
3219  *
3220  * @param ctx       Current scan context.
3221  * @param type      File type
3222  * @param typercg   Enable type recognition (file typing scan results).
3223  *                  If 0, will be a regular ac-mode scan.
3224  * @param dettype   [out] If typercg enabled and scan detects HTML or MAIL types,
3225  *                  will output HTML or MAIL types after performing HTML/MAIL scans
3226  * @param refhash   Hash of current fmap
3227  * @return cl_error_t
3228  */
3229 static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_file_t *dettype, unsigned char *refhash)
3230 {
3231     cl_error_t ret = CL_CLEAN, nret = CL_CLEAN;
3232     struct cli_matched_type *ftoffset = NULL, *fpt;
3233     struct cli_exe_info peinfo;
3234     unsigned int acmode = AC_SCAN_VIR, break_loop = 0;
3235 
3236 #if HAVE_JSON
3237     struct json_object *parent_property = NULL;
3238 #else
3239     void *parent_property = NULL;
3240 #endif
3241 
3242     if ((typercg) &&
3243         // We should also omit bzips, but DMG's may be detected in bzips. (type != CL_TYPE_BZ) &&        /* Omit BZ files because they can contain portions of original files like zip file entries that cause invalid extractions and lots of warnings. Decompress first, then scan! */
3244         (type != CL_TYPE_GZ) &&        /* Omit GZ files because they can contain portions of original files like zip file entries that cause invalid extractions and lots of warnings. Decompress first, then scan! */
3245         (type != CL_TYPE_CPIO_OLD) &&  /* Omit CPIO_OLD files because it's an image format that we can extract and scan manually. */
3246         (type != CL_TYPE_ZIP) &&       /* Omit ZIP files because it'll detect each zip file entry as SFXZIP, which is a waste. We'll extract it and then scan. */
3247         (type != CL_TYPE_ZIPSFX) &&    /* Omit SFX archive types from being checked for embedded content. They should only be parsed for contained files. Those contained files could be EXE's with more SFX, but that's the nature of containers. */
3248         (type != CL_TYPE_ARJSFX) &&    /* " */
3249         (type != CL_TYPE_RARSFX) &&    /* " */
3250         (type != CL_TYPE_EGGSFX) &&    /* " */
3251         (type != CL_TYPE_CABSFX) &&    /* " */
3252         (type != CL_TYPE_7ZSFX) &&     /* " */
3253         (type != CL_TYPE_OLD_TAR) &&   /* Omit OLD TAR files because it's a raw archive format that we can extract and scan manually. */
3254         (type != CL_TYPE_POSIX_TAR)) { /* Omit POSIX TAR files because it's a raw archive format that we can extract and scan manually. */
3255         /*
3256          * Enable file type recognition scan mode if requested, except for some some problematic types (above).
3257          */
3258         acmode |= AC_SCAN_FT;
3259     }
3260 
3261     perf_start(ctx, PERFT_RAW);
3262     ret = cli_scan_fmap(ctx, type == CL_TYPE_TEXT_ASCII ? CL_TYPE_ANY : type, 0, &ftoffset, acmode, NULL, refhash);
3263     perf_stop(ctx, PERFT_RAW);
3264 
3265     // I think this (CL_TYPENO business) causes embedded file extraction to stop when a
3266     // signature has matched in cli_scan_fmap, which wouldn't be what
3267     // we want if allmatch is specified.
3268     //
3269     // TODO: find a way to return type matches separately from malware matches
3270     if (ret >= CL_TYPENO) {
3271         perf_nested_start(ctx, PERFT_RAWTYPENO, PERFT_SCAN);
3272         fpt = ftoffset;
3273 
3274         while (fpt) {
3275             if (fpt->offset > 0) {
3276                 bool type_has_been_handled = true;
3277 
3278 #if HAVE_JSON
3279                 if (SCAN_COLLECT_METADATA && ctx->wrkproperty) {
3280                     json_object *arrobj;
3281 
3282                     parent_property = ctx->wrkproperty;
3283                     if (!json_object_object_get_ex(parent_property, "EmbeddedObjects", &arrobj)) {
3284                         arrobj = json_object_new_array();
3285                         if (NULL == arrobj) {
3286                             cli_errmsg("scanraw: no memory for json properties object\n");
3287                             nret = CL_EMEM;
3288                             break;
3289                         }
3290                         json_object_object_add(parent_property, "EmbeddedObjects", arrobj);
3291                     }
3292                     ctx->wrkproperty = json_object_new_object();
3293                     if (NULL == ctx->wrkproperty) {
3294                         cli_errmsg("scanraw: no memory for json properties object\n");
3295                         nret = CL_EMEM;
3296                         break;
3297                     }
3298                     json_object_array_add(arrobj, ctx->wrkproperty);
3299 
3300                     ret = cli_jsonstr(ctx->wrkproperty, "FileType", cli_ftname(fpt->type));
3301                     if (ret != CL_SUCCESS) {
3302                         cli_errmsg("scanraw: failed to add string to json object\n");
3303                         nret = CL_EMEM;
3304                         break;
3305                     }
3306 
3307                     ret = cli_jsonint64(ctx->wrkproperty, "Offset", (int64_t)fpt->offset);
3308                     if (ret != CL_SUCCESS) {
3309                         cli_errmsg("scanraw: failed to add int to json object\n");
3310                         nret = CL_EMEM;
3311                         break;
3312                     }
3313                 }
3314 #endif
3315                 /*
3316                  * First, use "embedded type recognition" to identify a file's actual type.
3317                  * (a.k.a. not embedded files, but file type detection corrections)
3318                  *
3319                  * Do this at all fmap layers. Though we should only reassign the types
3320                  * if the current type makes sense for the reassignment.
3321                  */
3322                 switch (fpt->type) {
3323                     case CL_TYPE_MHTML:
3324                         if (SCAN_PARSE_MAIL && (DCONF_MAIL & MAIL_CONF_MBOX)) {
3325                             if ((ctx->recursion_stack[ctx->recursion_level].type >= CL_TYPE_TEXT_ASCII) &&
3326                                 (ctx->recursion_stack[ctx->recursion_level].type <= CL_TYPE_BINARY_DATA)) {
3327                                 // HTML files may contain special characters and could be
3328                                 // misidentified as BINARY_DATA by cli_compare_ftm_file()
3329 
3330                                 // Reassign type of current layer based on what we discovered
3331                                 cli_recursion_stack_change_type(ctx, fpt->type);
3332 
3333                                 cli_dbgmsg("MHTML signature found at %u\n", (unsigned int)fpt->offset);
3334                                 nret = ret = cli_scanmail(ctx);
3335                             }
3336                         }
3337                         break;
3338 
3339                     case CL_TYPE_XDP:
3340                         if (SCAN_PARSE_PDF && (DCONF_DOC & DOC_CONF_PDF)) {
3341                             if ((ctx->recursion_stack[ctx->recursion_level].type >= CL_TYPE_TEXT_ASCII) &&
3342                                 (ctx->recursion_stack[ctx->recursion_level].type <= CL_TYPE_BINARY_DATA)) {
3343                                 // XML files may contain special characters and could be
3344                                 // misidentified as BINARY_DATA by cli_compare_ftm_file()
3345 
3346                                 // Reassign type of current layer based on what we discovered
3347                                 cli_recursion_stack_change_type(ctx, fpt->type);
3348 
3349                                 cli_dbgmsg("XDP signature found at %u\n", (unsigned int)fpt->offset);
3350                                 nret = ret = cli_scanxdp(ctx);
3351                             }
3352                         }
3353                         break;
3354 
3355                     case CL_TYPE_XML_WORD:
3356                         if (SCAN_PARSE_XMLDOCS && (DCONF_DOC & DOC_CONF_MSXML)) {
3357                             if ((ctx->recursion_stack[ctx->recursion_level].type >= CL_TYPE_TEXT_ASCII) &&
3358                                 (ctx->recursion_stack[ctx->recursion_level].type <= CL_TYPE_BINARY_DATA)) {
3359                                 // XML files may contain special characters and could be
3360                                 // misidentified as BINARY_DATA by cli_compare_ftm_file()
3361 
3362                                 // Reassign type of current layer based on what we discovered
3363                                 cli_recursion_stack_change_type(ctx, fpt->type);
3364 
3365                                 cli_dbgmsg("XML-WORD signature found at %u\n", (unsigned int)fpt->offset);
3366                                 nret = ret = cli_scanmsxml(ctx);
3367                             }
3368                         }
3369                         break;
3370                     case CL_TYPE_XML_XL:
3371                         if (SCAN_PARSE_XMLDOCS && (DCONF_DOC & DOC_CONF_MSXML)) {
3372                             if ((ctx->recursion_stack[ctx->recursion_level].type >= CL_TYPE_TEXT_ASCII) &&
3373                                 (ctx->recursion_stack[ctx->recursion_level].type <= CL_TYPE_BINARY_DATA)) {
3374                                 // XML files may contain special characters and could be
3375                                 // misidentified as BINARY_DATA by cli_compare_ftm_file()
3376 
3377                                 // Reassign type of current layer based on what we discovered
3378                                 cli_recursion_stack_change_type(ctx, fpt->type);
3379 
3380                                 cli_dbgmsg("XML-XL signature found at %u\n", (unsigned int)fpt->offset);
3381                                 nret = ret = cli_scanmsxml(ctx);
3382                             }
3383                         }
3384                         break;
3385                     case CL_TYPE_XML_HWP:
3386                         if (SCAN_PARSE_XMLDOCS && (DCONF_DOC & DOC_CONF_HWP)) {
3387                             if ((ctx->recursion_stack[ctx->recursion_level].type >= CL_TYPE_TEXT_ASCII) &&
3388                                 (ctx->recursion_stack[ctx->recursion_level].type <= CL_TYPE_BINARY_DATA)) {
3389                                 // XML files may contain special characters and could be
3390                                 // misidentified as BINARY_DATA by cli_compare_ftm_file()
3391 
3392                                 // Reassign type of current layer based on what we discovered
3393                                 cli_recursion_stack_change_type(ctx, fpt->type);
3394 
3395                                 cli_dbgmsg("XML-HWP signature found at %u\n", (unsigned int)fpt->offset);
3396                                 nret = ret = cli_scanhwpml(ctx);
3397                             }
3398                         }
3399                         break;
3400 
3401                     case CL_TYPE_DMG:
3402                         if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_DMG)) {
3403                             // TODO: determine all types that DMG may start with
3404                             // if ((ctx->recursion_stack[ctx->recursion_level].type == CL_TYPE_BZIP2) || ...))
3405                             {
3406                                 // Reassign type of current layer based on what we discovered
3407                                 cli_recursion_stack_change_type(ctx, fpt->type);
3408 
3409                                 cli_dbgmsg("DMG signature found at %u\n", (unsigned int)fpt->offset);
3410                                 nret = cli_scandmg(ctx);
3411                             }
3412                         }
3413                         break;
3414 
3415                     case CL_TYPE_ISO9660:
3416                         if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_ISO9660)) {
3417                             // TODO: determine all types that ISO9660 may start with
3418                             // if ((ctx->recursion_stack[ctx->recursion_level].type == CL_TYPE_ANY) || ...))
3419                             {
3420                                 // Reassign type of current layer based on what we discovered
3421                                 cli_recursion_stack_change_type(ctx, fpt->type);
3422 
3423                                 cli_dbgmsg("DMG signature found at %u\n", (unsigned int)fpt->offset);
3424                                 nret = cli_scaniso(ctx, fpt->offset);
3425                             }
3426                         }
3427                         break;
3428 
3429                     case CL_TYPE_MBR:
3430                         if (SCAN_PARSE_ARCHIVE) {
3431                             // TODO: determine all types that GPT or MBR may start with
3432                             // if ((ctx->recursion_stack[ctx->recursion_level].type == CL_TYPE_???) ||  ...))
3433                             {
3434                                 // First check if actually a GPT, not MBR.
3435                                 int iret = cli_mbr_check2(ctx, 0);
3436 
3437                                 if ((iret == CL_TYPE_GPT) && (DCONF_ARCH & ARCH_CONF_GPT)) {
3438                                     // Reassign type of current layer based on what we discovered
3439                                     cli_recursion_stack_change_type(ctx, CL_TYPE_GPT);
3440 
3441                                     cli_dbgmsg("Recognized GUID Partition Table file\n");
3442                                     cli_dbgmsg("GPT signature found at %u\n", (unsigned int)fpt->offset);
3443                                     nret = cli_scangpt(ctx, 0);
3444                                 } else if ((iret == CL_CLEAN) && (DCONF_ARCH & ARCH_CONF_MBR)) {
3445                                     // Reassign type of current layer based on what we discovered
3446                                     cli_recursion_stack_change_type(ctx, CL_TYPE_MBR);
3447 
3448                                     cli_dbgmsg("MBR signature found at %u\n", (unsigned int)fpt->offset);
3449                                     nret = cli_scanmbr(ctx, 0);
3450                                 }
3451                             }
3452                         }
3453                         break;
3454 
3455                     default:
3456                         type_has_been_handled = false;
3457                 }
3458 
3459                 /*
3460                  * Next, check for actual embedded files.
3461                  */
3462                 if ((ctx->recursion_stack[ctx->recursion_level].recursion_level_buffer_fmap == 0) &&
3463                     (false == type_has_been_handled)) {
3464 
3465                     fmap_t *new_map = NULL;
3466 
3467                     /*
3468                      * Only do this though if we're at the top fmap layer of a buffer.
3469                      *
3470                      * This restriction will prevent detecting the same embedded content
3471                      * more than once when recursing with embedded file type recognition
3472                      * deeper within the same buffer.
3473                      */
3474                     cli_dbgmsg("%s signature found at %u\n", cli_ftname(fpt->type), (unsigned int)fpt->offset);
3475 
3476                     type_has_been_handled = true;
3477 
3478                     switch (fpt->type) {
3479                         case CL_TYPE_RARSFX:
3480                             if (type != CL_TYPE_RAR && have_rar && SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_RAR)) {
3481 
3482                                 /// TODO: This is extremely expensive because it has to hash the fpt->offset -> len!
3483                                 /// We need to find a way to not hash every time!!!!
3484                                 new_map = fmap_duplicate(ctx->fmap, fpt->offset, ctx->fmap->len - fpt->offset, NULL);
3485                                 if (NULL == new_map) {
3486                                     ret = nret = CL_EMEM;
3487                                     cli_dbgmsg("scanraw: Failed to duplicate fmap to scan embedded file.\n");
3488                                     break;
3489                                 }
3490 
3491                                 nret = cli_recursion_stack_push(ctx, new_map, CL_TYPE_RAR, false); /* Perform scan with child fmap */
3492                                 if (CL_SUCCESS != nret) {
3493                                     ret = nret;
3494                                     cli_dbgmsg("scanraw: Failed to add map to recursion stack to scan embedded file.\n");
3495                                     break;
3496                                 }
3497 
3498                                 nret = cli_scanrar(ctx);
3499 
3500                                 (void)cli_recursion_stack_pop(ctx);
3501                             }
3502                             break;
3503 
3504                         case CL_TYPE_EGGSFX:
3505                             if (type != CL_TYPE_EGG && SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_EGG)) {
3506 
3507                                 new_map = fmap_duplicate(ctx->fmap, fpt->offset, ctx->fmap->len - fpt->offset, NULL);
3508                                 if (NULL == new_map) {
3509                                     ret = nret = CL_EMEM;
3510                                     cli_dbgmsg("scanraw: Failed to duplicate fmap to scan embedded file.\n");
3511                                     break;
3512                                 }
3513 
3514                                 nret = cli_recursion_stack_push(ctx, new_map, CL_TYPE_EGG, false); /* Perform scan with child fmap */
3515                                 if (CL_SUCCESS != nret) {
3516                                     ret = nret;
3517                                     cli_dbgmsg("scanraw: Failed to add map to recursion stack to scan embedded file.\n");
3518                                     break;
3519                                 }
3520 
3521                                 nret = cli_scanegg(ctx);
3522 
3523                                 (void)cli_recursion_stack_pop(ctx);
3524                             }
3525                             break;
3526 
3527                         case CL_TYPE_ZIPSFX:
3528                             if (type != CL_TYPE_ZIP && SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_ZIP)) {
3529 
3530                                 new_map = fmap_duplicate(ctx->fmap, fpt->offset, ctx->fmap->len - fpt->offset, NULL);
3531                                 if (NULL == new_map) {
3532                                     ret = nret = CL_EMEM;
3533                                     cli_dbgmsg("scanraw: Failed to duplicate fmap to scan embedded file.\n");
3534                                     break;
3535                                 }
3536 
3537                                 nret = cli_recursion_stack_push(ctx, new_map, CL_TYPE_ZIP, false); /* Perform scan with child fmap */
3538                                 if (CL_SUCCESS != nret) {
3539                                     ret = nret;
3540                                     cli_dbgmsg("scanraw: Failed to add map to recursion stack to scan embedded file.\n");
3541                                     break;
3542                                 }
3543 
3544                                 nret = cli_unzip_single(ctx, 0);
3545 
3546                                 (void)cli_recursion_stack_pop(ctx);
3547                             }
3548                             break;
3549 
3550                         case CL_TYPE_CABSFX:
3551                             if (type != CL_TYPE_MSCAB && SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_CAB)) {
3552 
3553                                 new_map = fmap_duplicate(ctx->fmap, fpt->offset, ctx->fmap->len - fpt->offset, NULL);
3554                                 if (NULL == new_map) {
3555                                     ret = nret = CL_EMEM;
3556                                     cli_dbgmsg("scanraw: Failed to duplicate fmap to scan embedded file.\n");
3557                                     break;
3558                                 }
3559 
3560                                 nret = cli_recursion_stack_push(ctx, new_map, CL_TYPE_MSCAB, false); /* Perform scan with child fmap */
3561                                 if (CL_SUCCESS != nret) {
3562                                     ret = nret;
3563                                     cli_dbgmsg("scanraw: Failed to add map to recursion stack to scan embedded file.\n");
3564                                     break;
3565                                 }
3566 
3567                                 nret = cli_scanmscab(ctx, 0);
3568 
3569                                 (void)cli_recursion_stack_pop(ctx);
3570                             }
3571                             break;
3572 
3573                         case CL_TYPE_ARJSFX:
3574                             if (type != CL_TYPE_ARJ && SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_ARJ)) {
3575 
3576                                 new_map = fmap_duplicate(ctx->fmap, fpt->offset, ctx->fmap->len - fpt->offset, NULL);
3577                                 if (NULL == new_map) {
3578                                     ret = nret = CL_EMEM;
3579                                     cli_dbgmsg("scanraw: Failed to duplicate fmap to scan embedded file.\n");
3580                                     break;
3581                                 }
3582 
3583                                 nret = cli_recursion_stack_push(ctx, new_map, CL_TYPE_ARJ, false); /* Perform scan with child fmap */
3584                                 if (CL_SUCCESS != nret) {
3585                                     ret = nret;
3586                                     cli_dbgmsg("scanraw: Failed to add map to recursion stack to scan embedded file.\n");
3587                                     break;
3588                                 }
3589 
3590                                 nret = cli_scanarj(ctx);
3591 
3592                                 (void)cli_recursion_stack_pop(ctx);
3593                             }
3594                             break;
3595 
3596                         case CL_TYPE_7ZSFX:
3597                             if (type != CL_TYPE_7Z && SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_7Z)) {
3598 
3599                                 new_map = fmap_duplicate(ctx->fmap, fpt->offset, ctx->fmap->len - fpt->offset, NULL);
3600                                 if (NULL == new_map) {
3601                                     ret = nret = CL_EMEM;
3602                                     cli_dbgmsg("scanraw: Failed to duplicate fmap to scan embedded file.\n");
3603                                     break;
3604                                 }
3605 
3606                                 nret = cli_recursion_stack_push(ctx, new_map, CL_TYPE_7Z, false); /* Perform scan with child fmap */
3607                                 if (CL_SUCCESS != nret) {
3608                                     ret = nret;
3609                                     cli_dbgmsg("scanraw: Failed to add map to recursion stack to scan embedded file.\n");
3610                                     break;
3611                                 }
3612 
3613                                 nret = cli_7unz(ctx, 0);
3614 
3615                                 (void)cli_recursion_stack_pop(ctx);
3616                             }
3617                             break;
3618 
3619                         case CL_TYPE_NULSFT:
3620                             if (SCAN_PARSE_ARCHIVE && type == CL_TYPE_MSEXE && (DCONF_ARCH & ARCH_CONF_NSIS) && fpt->offset > 4) {
3621                                 // Note: CL_TYPE_NULSFT is special, because the file actually starts 4 bytes before the start of the signature match
3622                                 new_map = fmap_duplicate(ctx->fmap, fpt->offset - 4, ctx->fmap->len - (fpt->offset - 4), NULL);
3623                                 if (NULL == new_map) {
3624                                     ret = nret = CL_EMEM;
3625                                     cli_dbgmsg("scanraw: Failed to duplicate fmap to scan embedded file.\n");
3626                                     break;
3627                                 }
3628 
3629                                 nret = cli_recursion_stack_push(ctx, new_map, CL_TYPE_NULSFT, false); /* Perform scan with child fmap */
3630                                 if (CL_SUCCESS != nret) {
3631                                     ret = nret;
3632                                     cli_dbgmsg("scanraw: Failed to add map to recursion stack to scan embedded file.\n");
3633                                     break;
3634                                 }
3635 
3636                                 nret = cli_scannulsft(ctx, 0);
3637 
3638                                 (void)cli_recursion_stack_pop(ctx);
3639                             }
3640                             break;
3641 
3642                         case CL_TYPE_AUTOIT:
3643                             if (SCAN_PARSE_ARCHIVE && type == CL_TYPE_MSEXE && (DCONF_ARCH & ARCH_CONF_AUTOIT)) {
3644 
3645                                 new_map = fmap_duplicate(ctx->fmap, fpt->offset, ctx->fmap->len - fpt->offset, NULL);
3646                                 if (NULL == new_map) {
3647                                     ret = nret = CL_EMEM;
3648                                     cli_dbgmsg("scanraw: Failed to duplicate fmap to scan embedded file.\n");
3649                                     break;
3650                                 }
3651 
3652                                 nret = cli_recursion_stack_push(ctx, new_map, CL_TYPE_AUTOIT, false); /* Perform scan with child fmap */
3653                                 if (CL_SUCCESS != nret) {
3654                                     ret = nret;
3655                                     cli_dbgmsg("scanraw: Failed to add map to recursion stack to scan embedded file.\n");
3656                                     break;
3657                                 }
3658 
3659                                 nret = cli_scanautoit(ctx, 23);
3660 
3661                                 (void)cli_recursion_stack_pop(ctx);
3662                             }
3663                             break;
3664 
3665                         case CL_TYPE_ISHIELD_MSI:
3666                             if (SCAN_PARSE_ARCHIVE && type == CL_TYPE_MSEXE && (DCONF_ARCH & ARCH_CONF_ISHIELD)) {
3667 
3668                                 new_map = fmap_duplicate(ctx->fmap, fpt->offset, ctx->fmap->len - fpt->offset, NULL);
3669                                 if (NULL == new_map) {
3670                                     ret = nret = CL_EMEM;
3671                                     cli_dbgmsg("scanraw: Failed to duplicate fmap to scan embedded file.\n");
3672                                     break;
3673                                 }
3674 
3675                                 nret = cli_recursion_stack_push(ctx, new_map, CL_TYPE_ISHIELD_MSI, false); /* Perform scan with child fmap */
3676                                 if (CL_SUCCESS != nret) {
3677                                     ret = nret;
3678                                     cli_dbgmsg("scanraw: Failed to add map to recursion stack to scan embedded file.\n");
3679                                     break;
3680                                 }
3681 
3682                                 nret = cli_scanishield_msi(ctx, 14);
3683 
3684                                 (void)cli_recursion_stack_pop(ctx);
3685                             }
3686                             break;
3687 
3688                         case CL_TYPE_PDF:
3689                             if (type != CL_TYPE_PDF && SCAN_PARSE_PDF && (DCONF_DOC & DOC_CONF_PDF)) {
3690 
3691                                 new_map = fmap_duplicate(ctx->fmap, fpt->offset, ctx->fmap->len - fpt->offset, NULL);
3692                                 if (NULL == new_map) {
3693                                     ret = nret = CL_EMEM;
3694                                     cli_dbgmsg("scanraw: Failed to duplicate fmap to scan embedded file.\n");
3695                                     break;
3696                                 }
3697 
3698                                 nret = cli_recursion_stack_push(ctx, new_map, CL_TYPE_PDF, false); /* Perform scan with child fmap */
3699                                 if (CL_SUCCESS != nret) {
3700                                     ret = nret;
3701                                     cli_dbgmsg("scanraw: Failed to add map to recursion stack to scan embedded file.\n");
3702                                     break;
3703                                 }
3704 
3705                                 nret = cli_scanpdf(ctx, 0);
3706 
3707                                 (void)cli_recursion_stack_pop(ctx);
3708                             }
3709                             break;
3710 
3711                         case CL_TYPE_MSEXE:
3712                             if (SCAN_PARSE_PE && (type == CL_TYPE_MSEXE || type == CL_TYPE_ZIP || type == CL_TYPE_MSOLE2) && ctx->dconf->pe) {
3713 
3714                                 if ((uint64_t)(ctx->fmap->len - fpt->offset) > ctx->engine->maxembeddedpe) {
3715                                     cli_dbgmsg("scanraw: MaxEmbeddedPE exceeded\n");
3716                                     break;
3717                                 }
3718 
3719                                 new_map = fmap_duplicate(ctx->fmap, fpt->offset, ctx->fmap->len - fpt->offset, NULL);
3720                                 if (NULL == new_map) {
3721                                     ret = nret = CL_EMEM;
3722                                     cli_dbgmsg("scanraw: Failed to duplicate fmap to scan embedded file.\n");
3723                                     break;
3724                                 }
3725 
3726                                 nret = cli_recursion_stack_push(ctx, new_map, CL_TYPE_MSEXE, false); /* Perform scan with child fmap */
3727                                 if (CL_SUCCESS != nret) {
3728                                     ret = nret;
3729                                     cli_dbgmsg("scanraw: Failed to add map to recursion stack to scan embedded file.\n");
3730                                     break;
3731                                 }
3732                                 // IMPORTANT: Must not break or return before cli_recursion_stack_pop!
3733 
3734                                 cli_exe_info_init(&peinfo, 0);
3735 
3736                                 // TODO We could probably substitute in a quicker
3737                                 // method of determining whether a PE file exists
3738                                 // at this offset.
3739                                 if (cli_peheader(ctx->fmap, &peinfo, CLI_PEHEADER_OPT_NONE, NULL) != 0) {
3740                                     cli_dbgmsg("Header check for MSEXE detection failed, probably not actually an embedded PE file.\n");
3741 
3742                                     /* Despite failing, peinfo memory may have been allocated and must be freed. */
3743                                     cli_exe_info_destroy(&peinfo);
3744 
3745                                 } else {
3746                                     cli_dbgmsg("*** Detected embedded PE file at %u ***\n", (unsigned int)fpt->offset);
3747 
3748                                     /* Immediately free up peinfo allocated memory, prior to any recursion */
3749                                     cli_exe_info_destroy(&peinfo);
3750 
3751                                     nret       = cli_scanembpe(ctx, 0);
3752                                     break_loop = 1; /* we can stop here and other
3753                                                      * embedded executables will
3754                                                      * be found recursively
3755                                                      * through the above call
3756                                                      */
3757 
3758                                     // TODO This method of embedded PE extraction
3759                                     // is kinda gross in that:
3760                                     //   - if you have an executable that contains
3761                                     //     20 other exes, the bytes associated with
3762                                     //     the last exe will have been included in
3763                                     //     hash computations and things 20 times
3764                                     //     (as overlay data to the previously
3765                                     //     extracted exes).
3766                                     //   - if you have a signed embedded exe, it
3767                                     //     will fail to validate after extraction
3768                                     //     bc it has overlay data, which is a
3769                                     //     violation of the Authenticode spec.
3770                                     //   - this method of extraction is subject to
3771                                     //     the recursion limit, which is fairly
3772                                     //     low by default (I think 16)
3773                                     //
3774                                     // It'd be awesome if we could compute the PE
3775                                     // size from the PE header and just extract
3776                                     // that.
3777                                 }
3778 
3779                                 (void)cli_recursion_stack_pop(ctx);
3780                             }
3781                             break;
3782 
3783                         default:
3784                             type_has_been_handled = false;
3785                             cli_dbgmsg("scanraw: Type %u not handled in fpt loop\n", fpt->type);
3786                     }
3787 
3788                     if (NULL != new_map) {
3789                         free_duplicate_fmap(new_map);
3790                     }
3791                 }
3792             }
3793 
3794             if ((nret == CL_VIRUS && !SCAN_ALLMATCHES) ||
3795                 (nret == CL_EMEM) ||
3796                 (ctx->abort_scan) ||
3797                 (break_loop)) {
3798                 break;
3799             }
3800 
3801             fpt = fpt->next;
3802 
3803 #if HAVE_JSON
3804             if (NULL != parent_property) {
3805                 ctx->wrkproperty = (struct json_object *)(parent_property);
3806                 parent_property  = NULL;
3807             }
3808 #endif
3809         }
3810 
3811         if (nret != CL_VIRUS) {
3812             /*
3813              * Now run the other file type parsers that may rely on file type
3814              * recognition to determine the actual file type.
3815              */
3816             switch (ret) {
3817                 case CL_TYPE_HTML:
3818                     /* bb#11196 - autoit script file misclassified as HTML */
3819                     if (cli_recursion_stack_get_type(ctx, -2) == CL_TYPE_AUTOIT) {
3820                         ret = CL_TYPE_TEXT_ASCII;
3821                     } else if (SCAN_PARSE_HTML &&
3822                                (type == CL_TYPE_TEXT_ASCII ||
3823                                 type == CL_TYPE_GIF) && /* Scan GIFs for embedded HTML/Javascript */
3824                                (DCONF_DOC & DOC_CONF_HTML)) {
3825                         *dettype = CL_TYPE_HTML;
3826                         cli_recursion_stack_change_type(ctx, CL_TYPE_HTML);
3827                         nret = cli_scanhtml(ctx);
3828                     }
3829                     break;
3830 
3831                 case CL_TYPE_MAIL:
3832                     if (SCAN_PARSE_MAIL && type == CL_TYPE_TEXT_ASCII && (DCONF_MAIL & MAIL_CONF_MBOX)) {
3833                         *dettype = CL_TYPE_MAIL;
3834                         cli_recursion_stack_change_type(ctx, CL_TYPE_MAIL);
3835                         nret = cli_scanmail(ctx);
3836                     }
3837                     break;
3838 
3839                 default:
3840                     break;
3841             }
3842         }
3843 
3844         perf_nested_stop(ctx, PERFT_RAWTYPENO, PERFT_SCAN);
3845         ret = nret;
3846     }
3847 
3848 #if HAVE_JSON
3849     if (NULL != parent_property) {
3850         ctx->wrkproperty = (struct json_object *)(parent_property);
3851     }
3852 #endif
3853 
3854     while (ftoffset) {
3855         fpt      = ftoffset;
3856         ftoffset = ftoffset->next;
3857         free(fpt);
3858     }
3859 
3860     if (ret == CL_VIRUS)
3861         cli_dbgmsg("%s found\n", cli_get_last_virus(ctx));
3862 
3863     return ret;
3864 }
3865 
3866 void emax_reached(cli_ctx *ctx)
3867 {
3868     int32_t stack_index;
3869 
3870     if (NULL == ctx || NULL == ctx->recursion_stack) {
3871         return;
3872     }
3873 
3874     stack_index = (int32_t)ctx->recursion_level;
3875 
3876     while (stack_index >= 0) {
3877         fmap_t *map = ctx->recursion_stack[stack_index].fmap;
3878 
3879         if (NULL != map) {
3880             map->dont_cache_flag = 1;
3881         }
3882 
3883         stack_index -= 1;
3884     }
3885 
3886     cli_dbgmsg("emax_reached: marked parents as non cacheable\n");
3887 }
3888 
3889 #define LINESTR(x) #x
3890 #define LINESTR2(x) LINESTR(x)
3891 #define __AT__ " at line " LINESTR2(__LINE__)
3892 
3893 static cl_error_t dispatch_prescan_callback(clcb_pre_scan cb, cli_ctx *ctx, const char *filetype)
3894 {
3895     cl_error_t status = CL_CLEAN;
3896 
3897     if (cb) {
3898         perf_start(ctx, PERFT_PRECB);
3899 
3900         status = cb(fmap_fd(ctx->fmap), filetype, ctx->cb_ctx);
3901         switch (status) {
3902             case CL_BREAK:
3903                 cli_dbgmsg("dispatch_prescan_callback: file whitelisted by callback\n");
3904                 perf_stop(ctx, PERFT_PRECB);
3905                 status = CL_BREAK;
3906                 break;
3907             case CL_VIRUS:
3908                 cli_dbgmsg("dispatch_prescan_callback: file blacklisted by callback\n");
3909                 cli_append_virus(ctx, "Detected.By.Callback");
3910                 perf_stop(ctx, PERFT_PRECB);
3911                 status = CL_VIRUS;
3912                 break;
3913             case CL_CLEAN:
3914                 break;
3915             default:
3916                 status = CL_CLEAN;
3917                 cli_warnmsg("dispatch_prescan_callback: ignoring bad return code from callback\n");
3918         }
3919 
3920         perf_stop(ctx, PERFT_PRECB);
3921     }
3922 
3923     return status;
3924 }
3925 
3926 cl_error_t cli_magic_scan(cli_ctx *ctx, cli_file_t type)
3927 {
3928     cl_error_t ret = CL_CLEAN;
3929     cl_error_t res;
3930     cl_error_t cb_retcode;
3931     cli_file_t dettype = 0;
3932     uint8_t typercg    = 1;
3933     size_t hashed_size;
3934     unsigned char *hash             = NULL;
3935     bitset_t *old_hook_lsig_matches = NULL;
3936     const char *filetype;
3937     int cache_clean = 0;
3938 #if HAVE_JSON
3939     struct json_object *parent_property = NULL;
3940 #else
3941     void *parent_property = NULL;
3942 #endif
3943 
3944     char *old_temp_path = NULL;
3945     char *new_temp_path = NULL;
3946 
3947     if (!ctx->engine) {
3948         cli_errmsg("CRITICAL: engine == NULL\n");
3949         ret = CL_ENULLARG;
3950         goto early_ret;
3951     }
3952 
3953     if (!(ctx->engine->dboptions & CL_DB_COMPILED)) {
3954         cli_errmsg("CRITICAL: engine not compiled\n");
3955         ret = CL_EMALFDB;
3956         goto early_ret;
3957     }
3958 
3959     if (ctx->fmap->len <= 5) {
3960         cli_dbgmsg("cli_magic_scandesc: File is too too small (%zu bytes), ignoring.\n", ctx->fmap->len);
3961         ret = CL_CLEAN;
3962         goto early_ret;
3963     }
3964 
3965     if (cli_updatelimits(ctx, ctx->fmap->len) != CL_CLEAN) {
3966         emax_reached(ctx);
3967         ret = CL_CLEAN;
3968         cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", ret, __AT__);
3969         goto early_ret;
3970     }
3971 
3972     if (ctx->engine->keeptmp) {
3973         char *fmap_basename = NULL;
3974         /*
3975          * Keep-temp enabled, so create a sub-directory to provide extraction directory recursion.
3976          */
3977         if ((NULL != ctx->fmap->name) &&
3978             (CL_SUCCESS == cli_basename(ctx->fmap->name, strlen(ctx->fmap->name), &fmap_basename))) {
3979             /*
3980              * The fmap has a name, lets include it in the new sub-directory.
3981              */
3982             new_temp_path = cli_gentemp_with_prefix(ctx->sub_tmpdir, fmap_basename);
3983             free(fmap_basename);
3984             if (NULL == new_temp_path) {
3985                 cli_errmsg("cli_magic_scan: Failed to generate temp directory name.\n");
3986                 ret = CL_EMEM;
3987                 goto early_ret;
3988             }
3989         } else {
3990             /*
3991              * The fmap has no name or we failed to get the basename.
3992              */
3993             new_temp_path = cli_gentemp(ctx->sub_tmpdir);
3994             if (NULL == new_temp_path) {
3995                 cli_errmsg("cli_magic_scan: Failed to generate temp directory name.\n");
3996                 ret = CL_EMEM;
3997                 goto early_ret;
3998             }
3999         }
4000 
4001         old_temp_path   = ctx->sub_tmpdir;
4002         ctx->sub_tmpdir = new_temp_path;
4003 
4004         if (mkdir(ctx->sub_tmpdir, 0700)) {
4005             cli_errmsg("cli_magic_scan: Can't create tmp sub-directory for scan: %s.\n", ctx->sub_tmpdir);
4006             ret = CL_EACCES;
4007             goto early_ret;
4008         }
4009     }
4010 
4011     if (type == CL_TYPE_PART_ANY) {
4012         typercg = 0;
4013     }
4014 
4015     /*
4016      * Perform file typing from the start of the file.
4017      */
4018     perf_start(ctx, PERFT_FT);
4019     if ((type == CL_TYPE_ANY) || type == CL_TYPE_PART_ANY) {
4020         type = cli_determine_fmap_type(ctx->fmap, ctx->engine, type);
4021     }
4022     perf_stop(ctx, PERFT_FT);
4023     if (type == CL_TYPE_ERROR) {
4024         cli_dbgmsg("cli_magic_scan: cli_determine_fmap_type returned CL_TYPE_ERROR\n");
4025         ret = CL_EREAD;
4026         cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", ret, __AT__);
4027         goto early_ret;
4028     }
4029     filetype = cli_ftname(type);
4030 
4031     /* set current layer to the type we found */
4032     cli_recursion_stack_change_type(ctx, type);
4033 
4034 #if HAVE_JSON
4035     if (SCAN_COLLECT_METADATA) {
4036         /*
4037          * Create JSON object to record metadata during the scan.
4038          */
4039         if (NULL == ctx->properties) {
4040             ctx->properties = json_object_new_object();
4041             if (NULL == ctx->properties) {
4042                 cli_errmsg("cli_magic_scan: no memory for json properties object\n");
4043                 ret = CL_EMEM;
4044                 cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", ret, __AT__);
4045                 goto early_ret;
4046             }
4047             ctx->wrkproperty = ctx->properties;
4048 
4049             ret = cli_jsonstr(ctx->properties, "Magic", "CLAMJSONv0");
4050             if (ret != CL_SUCCESS) {
4051                 cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", ret, __AT__);
4052                 goto early_ret;
4053             }
4054             ret = cli_jsonstr(ctx->properties, "RootFileType", filetype);
4055             if (ret != CL_SUCCESS) {
4056                 cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", ret, __AT__);
4057                 goto early_ret;
4058             }
4059 
4060         } else {
4061             json_object *arrobj;
4062 
4063             parent_property = ctx->wrkproperty;
4064             if (!json_object_object_get_ex(parent_property, "ContainedObjects", &arrobj)) {
4065                 arrobj = json_object_new_array();
4066                 if (NULL == arrobj) {
4067                     cli_errmsg("cli_magic_scan: no memory for json properties object\n");
4068                     ret = CL_EMEM;
4069                     cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", ret, __AT__);
4070                     goto early_ret;
4071                 }
4072                 json_object_object_add(parent_property, "ContainedObjects", arrobj);
4073             }
4074             ctx->wrkproperty = json_object_new_object();
4075             if (NULL == ctx->wrkproperty) {
4076                 cli_errmsg("cli_magic_scan: no memory for json properties object\n");
4077                 ret = CL_EMEM;
4078                 cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", ret, __AT__);
4079                 goto early_ret;
4080             }
4081             json_object_array_add(arrobj, ctx->wrkproperty);
4082         }
4083 
4084         if (ctx->fmap->name) {
4085             ret = cli_jsonstr(ctx->wrkproperty, "FileName", ctx->fmap->name);
4086             if (ret != CL_SUCCESS) {
4087                 cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", ret, __AT__);
4088                 goto early_ret;
4089             }
4090         }
4091         if (ctx->sub_filepath) {
4092             ret = cli_jsonstr(ctx->wrkproperty, "FilePath", ctx->sub_filepath);
4093             if (ret != CL_SUCCESS) {
4094                 cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", ret, __AT__);
4095                 goto early_ret;
4096             }
4097         }
4098         ret = cli_jsonstr(ctx->wrkproperty, "FileType", filetype);
4099         if (ret != CL_SUCCESS) {
4100             cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", ret, __AT__);
4101             goto early_ret;
4102         }
4103         ret = cli_jsonint(ctx->wrkproperty, "FileSize", ctx->fmap->len);
4104         if (ret != CL_SUCCESS) {
4105             cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", ret, __AT__);
4106             goto early_ret;
4107         }
4108     }
4109 #endif
4110 
4111     ret = dispatch_prescan_callback(ctx->engine->cb_pre_cache, ctx, filetype);
4112     if (CL_CLEAN != ret) {
4113         if (ret == CL_VIRUS) {
4114             ret = cli_check_fp(ctx, NULL);
4115         } else {
4116             ret = CL_CLEAN;
4117         }
4118         goto done;
4119     }
4120 
4121     /*
4122      * Get the maphash
4123      */
4124     if (CL_SUCCESS != fmap_get_MD5(ctx->fmap, &hash)) {
4125         cli_dbgmsg("cli_magic_scan: Failed to get a hash for the current fmap.\n");
4126         goto done;
4127     }
4128     hashed_size = ctx->fmap->len;
4129 
4130     /*
4131      * Check if we've already scanned this file before.
4132      */
4133     perf_start(ctx, PERFT_CACHE);
4134 
4135     if (!(SCAN_COLLECT_METADATA))
4136         res = cache_check(hash, ctx);
4137     else
4138         res = CL_VIRUS;
4139 
4140 #if HAVE_JSON
4141     if (SCAN_COLLECT_METADATA /* ctx.options->general & CL_SCAN_GENERAL_COLLECT_METADATA && ctx->wrkproperty != NULL */) {
4142         char hashstr[33];
4143         snprintf(hashstr, 33, "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
4144                  hash[0], hash[1], hash[2], hash[3], hash[4], hash[5], hash[6], hash[7],
4145                  hash[8], hash[9], hash[10], hash[11], hash[12], hash[13], hash[14], hash[15]);
4146 
4147         ret = cli_jsonstr(ctx->wrkproperty, "FileMD5", hashstr);
4148         if (ctx->engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE)
4149             memset(hash, 0, 16);
4150         if (ret != CL_SUCCESS) {
4151             cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", ret, __AT__);
4152             goto early_ret;
4153         }
4154     }
4155 #endif
4156 
4157     perf_stop(ctx, PERFT_CACHE);
4158 
4159     if (res != CL_VIRUS) {
4160         cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", ret, __AT__);
4161         goto early_ret;
4162     }
4163 
4164     old_hook_lsig_matches  = ctx->hook_lsig_matches;
4165     ctx->hook_lsig_matches = NULL;
4166 
4167     if (!((ctx->options->general & ~CL_SCAN_GENERAL_ALLMATCHES) || (ctx->options->parse) || (ctx->options->heuristic) || (ctx->options->mail) || (ctx->options->dev))) {
4168         /*
4169          * Scanning in raw mode (stdin, etc.)
4170          */
4171         ret = dispatch_prescan_callback(ctx->engine->cb_pre_scan, ctx, filetype);
4172         if (CL_CLEAN != ret) {
4173             if (ret == CL_VIRUS) {
4174                 ret = cli_check_fp(ctx, NULL);
4175             } else if (ret == CL_BREAK) {
4176                 ret = CL_CLEAN;
4177             }
4178             goto done;
4179         }
4180 
4181         if (CL_VIRUS == (ret = cli_scan_fmap(ctx, CL_TYPE_ANY, 0, NULL, AC_SCAN_VIR, NULL, hash)))
4182             cli_dbgmsg("cli_magic_scan: %s found in descriptor %d\n", cli_get_last_virus(ctx), fmap_fd(ctx->fmap));
4183 
4184         goto done;
4185     }
4186 
4187     ret = dispatch_prescan_callback(ctx->engine->cb_pre_scan, ctx, filetype);
4188     if (CL_CLEAN != ret) {
4189         if (ret == CL_VIRUS) {
4190             ret = cli_check_fp(ctx, NULL);
4191         } else if (ret == CL_BREAK) {
4192             ret = CL_CLEAN;
4193         }
4194         goto done;
4195     }
4196 
4197 #ifdef HAVE__INTERNAL__SHA_COLLECT
4198     if (!ctx->sha_collect && type == CL_TYPE_MSEXE)
4199         ctx->sha_collect = 1;
4200 #endif
4201 
4202     // We already saved the hook_lsig_matches (above)
4203     // The ctx one is NULL at present.
4204     ctx->hook_lsig_matches = cli_bitset_init();
4205     if (!ctx->hook_lsig_matches) {
4206         ret = CL_EMEM;
4207         goto done;
4208     }
4209 
4210     if (type != CL_TYPE_IGNORED && ctx->engine->sdb) {
4211         /*
4212          * If self protection mechanism enabled, do the scanraw() scan first
4213          * before extracting with a file type parser.
4214          */
4215         ret = scanraw(ctx, type, 0, &dettype, (ctx->engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE) ? NULL : hash);
4216         if (ret == CL_EMEM || ret == CL_VIRUS) {
4217             ret = cli_check_fp(ctx, NULL);
4218             goto done;
4219         }
4220     }
4221 
4222     /*
4223      * Run the file type parsers that we normally use before the raw scan.
4224      */
4225     perf_nested_start(ctx, PERFT_CONTAINER, PERFT_SCAN);
4226     switch (type) {
4227         case CL_TYPE_IGNORED:
4228             break;
4229 
4230         case CL_TYPE_HWP3:
4231             if (SCAN_PARSE_HWP3 && (DCONF_DOC & DOC_CONF_HWP))
4232                 ret = cli_scanhwp3(ctx);
4233             break;
4234 
4235         case CL_TYPE_HWPOLE2:
4236             if (SCAN_PARSE_OLE2 && (DCONF_ARCH & ARCH_CONF_OLE2))
4237                 ret = cli_scanhwpole2(ctx);
4238             break;
4239 
4240         case CL_TYPE_XML_WORD:
4241             if (SCAN_PARSE_XMLDOCS && (DCONF_DOC & DOC_CONF_MSXML))
4242                 ret = cli_scanmsxml(ctx);
4243             break;
4244 
4245         case CL_TYPE_XML_XL:
4246             if (SCAN_PARSE_XMLDOCS && (DCONF_DOC & DOC_CONF_MSXML))
4247                 ret = cli_scanmsxml(ctx);
4248             break;
4249 
4250         case CL_TYPE_XML_HWP:
4251             if (SCAN_PARSE_XMLDOCS && (DCONF_DOC & DOC_CONF_HWP))
4252                 ret = cli_scanhwpml(ctx);
4253             break;
4254 
4255         case CL_TYPE_XDP:
4256             if (SCAN_PARSE_PDF && (DCONF_DOC & DOC_CONF_PDF))
4257                 ret = cli_scanxdp(ctx);
4258             break;
4259 
4260         case CL_TYPE_RAR:
4261             if (have_rar && SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_RAR))
4262                 ret = cli_scanrar(ctx);
4263             break;
4264 
4265         case CL_TYPE_EGG:
4266             if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_EGG))
4267                 ret = cli_scanegg(ctx);
4268             break;
4269 
4270         case CL_TYPE_OOXML_WORD:
4271         case CL_TYPE_OOXML_PPT:
4272         case CL_TYPE_OOXML_XL:
4273         case CL_TYPE_OOXML_HWP:
4274 #if HAVE_JSON
4275             if (SCAN_PARSE_XMLDOCS && (DCONF_DOC & DOC_CONF_OOXML)) {
4276                 if (SCAN_COLLECT_METADATA && (ctx->wrkproperty != NULL)) {
4277                     ret = cli_process_ooxml(ctx, type);
4278 
4279                     if (ret == CL_EMEM || ret == CL_ENULLARG) {
4280                         /* critical error */
4281                         break;
4282                     } else if (ret != CL_SUCCESS) {
4283                         /*
4284                          * non-critical return => allow for the CL_TYPE_ZIP scan to occur
4285                          * cli_process_ooxml other possible returns:
4286                          *   CL_ETIMEOUT, CL_EMAXSIZE, CL_EMAXFILES, CL_EPARSE,
4287                          *   CL_EFORMAT, CL_BREAK, CL_ESTAT
4288                          */
4289                         ret = CL_SUCCESS;
4290                     }
4291                 }
4292             }
4293 #endif
4294             /* fall-through */
4295         case CL_TYPE_ZIP:
4296             if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_ZIP))
4297                 ret = cli_unzip(ctx);
4298             break;
4299 
4300         case CL_TYPE_GZ:
4301             if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_GZ))
4302                 ret = cli_scangzip(ctx);
4303             break;
4304 
4305         case CL_TYPE_BZ:
4306             if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_BZ))
4307                 ret = cli_scanbzip(ctx);
4308             break;
4309 
4310         case CL_TYPE_XZ:
4311             if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_XZ))
4312                 ret = cli_scanxz(ctx);
4313             break;
4314 
4315         case CL_TYPE_GPT:
4316             if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_GPT))
4317                 ret = cli_scangpt(ctx, 0);
4318             break;
4319 
4320         case CL_TYPE_APM:
4321             if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_APM))
4322                 ret = cli_scanapm(ctx);
4323             break;
4324 
4325         case CL_TYPE_ARJ:
4326             if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_ARJ))
4327                 ret = cli_scanarj(ctx);
4328             break;
4329 
4330         case CL_TYPE_NULSFT:
4331             if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_NSIS))
4332                 ret = cli_scannulsft(ctx, 0);
4333             break;
4334 
4335         case CL_TYPE_AUTOIT:
4336             if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_AUTOIT))
4337                 ret = cli_scanautoit(ctx, 23);
4338             break;
4339 
4340         case CL_TYPE_MSSZDD:
4341             if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_SZDD))
4342                 ret = cli_scanszdd(ctx);
4343             break;
4344 
4345         case CL_TYPE_MSCAB:
4346             if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_CAB))
4347                 ret = cli_scanmscab(ctx, 0);
4348             break;
4349 
4350         case CL_TYPE_HTML:
4351             if (SCAN_PARSE_HTML && (DCONF_DOC & DOC_CONF_HTML))
4352                 ret = cli_scanhtml(ctx);
4353             break;
4354 
4355         case CL_TYPE_HTML_UTF16:
4356             if (SCAN_PARSE_HTML && (DCONF_DOC & DOC_CONF_HTML))
4357                 ret = cli_scanhtml_utf16(ctx);
4358             break;
4359 
4360         case CL_TYPE_SCRIPT:
4361             if ((DCONF_DOC & DOC_CONF_SCRIPT) && dettype != CL_TYPE_HTML)
4362                 ret = cli_scanscript(ctx);
4363             break;
4364 
4365         case CL_TYPE_SWF:
4366             if (SCAN_PARSE_SWF && (DCONF_DOC & DOC_CONF_SWF))
4367                 ret = cli_scanswf(ctx);
4368             break;
4369 
4370         case CL_TYPE_RTF:
4371             if (SCAN_PARSE_ARCHIVE && (DCONF_DOC & DOC_CONF_RTF))
4372                 ret = cli_scanrtf(ctx);
4373             break;
4374 
4375         case CL_TYPE_MAIL:
4376             if (SCAN_PARSE_MAIL && (DCONF_MAIL & MAIL_CONF_MBOX))
4377                 ret = cli_scanmail(ctx);
4378             break;
4379 
4380         case CL_TYPE_MHTML:
4381             if (SCAN_PARSE_MAIL && (DCONF_MAIL & MAIL_CONF_MBOX))
4382                 ret = cli_scanmail(ctx);
4383             break;
4384 
4385         case CL_TYPE_TNEF:
4386             if (SCAN_PARSE_MAIL && (DCONF_MAIL & MAIL_CONF_TNEF))
4387                 ret = cli_scantnef(ctx);
4388             break;
4389 
4390         case CL_TYPE_UUENCODED:
4391             if (DCONF_OTHER & OTHER_CONF_UUENC)
4392                 ret = cli_scanuuencoded(ctx);
4393             break;
4394 
4395         case CL_TYPE_MSCHM:
4396             if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_CHM))
4397                 ret = cli_scanmschm(ctx);
4398             break;
4399 
4400         case CL_TYPE_MSOLE2:
4401             if (SCAN_PARSE_OLE2 && (DCONF_ARCH & ARCH_CONF_OLE2))
4402                 ret = cli_scanole2(ctx);
4403             break;
4404 
4405         case CL_TYPE_7Z:
4406             if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_7Z))
4407                 ret = cli_7unz(ctx, 0);
4408             break;
4409 
4410         case CL_TYPE_POSIX_TAR:
4411             if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_TAR))
4412                 ret = cli_scantar(ctx, 1);
4413             break;
4414 
4415         case CL_TYPE_OLD_TAR:
4416             if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_TAR))
4417                 ret = cli_scantar(ctx, 0);
4418             break;
4419 
4420         case CL_TYPE_CPIO_OLD:
4421             if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_CPIO))
4422                 ret = cli_scancpio_old(ctx);
4423             break;
4424 
4425         case CL_TYPE_CPIO_ODC:
4426             if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_CPIO))
4427                 ret = cli_scancpio_odc(ctx);
4428             break;
4429 
4430         case CL_TYPE_CPIO_NEWC:
4431             if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_CPIO))
4432                 ret = cli_scancpio_newc(ctx, 0);
4433             break;
4434 
4435         case CL_TYPE_CPIO_CRC:
4436             if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_CPIO))
4437                 ret = cli_scancpio_newc(ctx, 1);
4438             break;
4439 
4440         case CL_TYPE_BINHEX:
4441             if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_BINHEX))
4442                 ret = cli_binhex(ctx);
4443             break;
4444 
4445         case CL_TYPE_SCRENC:
4446             if (DCONF_OTHER & OTHER_CONF_SCRENC)
4447                 ret = cli_scanscrenc(ctx);
4448             break;
4449 
4450         case CL_TYPE_RIFF:
4451             if (SCAN_HEURISTICS && (DCONF_OTHER & OTHER_CONF_RIFF))
4452                 ret = cli_scanriff(ctx);
4453             break;
4454 
4455         case CL_TYPE_GRAPHICS:
4456             /*
4457              * This case is for unhandled graphics types such as BMP, JPEG 2000, etc.
4458              *
4459              * Note: JPEG 2000 is a very different format from JPEG, JPEG/JFIF, JPEG/Exif, JPEG/SPIFF (1994, 1997)
4460              * JPEG 2000 is not handled by cli_scanjpeg or cli_parsejpeg.
4461              */
4462             break;
4463 
4464         case CL_TYPE_GIF:
4465             if (SCAN_HEURISTICS && SCAN_HEURISTIC_BROKEN_MEDIA && (DCONF_OTHER & OTHER_CONF_GIF))
4466                 ret = cli_parsegif(ctx);
4467             break;
4468 
4469         case CL_TYPE_PNG:
4470             if (SCAN_HEURISTICS && (DCONF_OTHER & OTHER_CONF_PNG))
4471                 ret = cli_parsepng(ctx); /* PNG parser detects a couple CVE's as well as Broken.Media */
4472             break;
4473 
4474         case CL_TYPE_JPEG:
4475             if (SCAN_HEURISTICS && (DCONF_OTHER & OTHER_CONF_JPEG))
4476                 ret = cli_parsejpeg(ctx); /* JPG parser detects MS04-028 exploits as well as Broken.Media */
4477             break;
4478 
4479         case CL_TYPE_TIFF:
4480             if (SCAN_HEURISTICS && SCAN_HEURISTIC_BROKEN_MEDIA && (DCONF_OTHER & OTHER_CONF_TIFF) && ret != CL_VIRUS)
4481                 ret = cli_parsetiff(ctx);
4482             break;
4483 
4484         case CL_TYPE_PDF: /* FIXMELIMITS: pdf should be an archive! */
4485             if (SCAN_PARSE_PDF && (DCONF_DOC & DOC_CONF_PDF))
4486                 ret = cli_scanpdf(ctx, 0);
4487             break;
4488 
4489         case CL_TYPE_CRYPTFF:
4490             if (DCONF_OTHER & OTHER_CONF_CRYPTFF)
4491                 ret = cli_scancryptff(ctx);
4492             break;
4493 
4494         case CL_TYPE_ELF:
4495             if (SCAN_PARSE_ELF && ctx->dconf->elf)
4496                 ret = cli_scanelf(ctx);
4497             break;
4498 
4499         case CL_TYPE_MACHO:
4500             if (ctx->dconf->macho)
4501                 ret = cli_scanmacho(ctx, NULL);
4502             break;
4503 
4504         case CL_TYPE_MACHO_UNIBIN:
4505             if (ctx->dconf->macho)
4506                 ret = cli_scanmacho_unibin(ctx);
4507             break;
4508 
4509         case CL_TYPE_SIS:
4510             if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_SIS))
4511                 ret = cli_scansis(ctx);
4512             break;
4513 
4514         case CL_TYPE_XAR:
4515             if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_XAR))
4516                 ret = cli_scanxar(ctx);
4517             break;
4518 
4519         case CL_TYPE_PART_HFSPLUS:
4520             if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_HFSPLUS))
4521                 ret = cli_scanhfsplus(ctx);
4522             break;
4523 
4524         case CL_TYPE_BINARY_DATA:
4525         case CL_TYPE_TEXT_UTF16BE:
4526             if (SCAN_HEURISTICS && (DCONF_OTHER & OTHER_CONF_MYDOOMLOG))
4527                 ret = cli_check_mydoom_log(ctx);
4528             break;
4529 
4530         case CL_TYPE_TEXT_ASCII:
4531             if (SCAN_HEURISTIC_STRUCTURED && (DCONF_OTHER & OTHER_CONF_DLP))
4532                 /* TODO: consider calling this from cli_scanscript() for
4533                  * a normalised text
4534                  */
4535 
4536                 ret = cli_scan_structured(ctx);
4537             break;
4538 
4539         default:
4540             break;
4541     }
4542     perf_nested_stop(ctx, PERFT_CONTAINER, PERFT_SCAN);
4543 
4544     /*
4545      * Perform the raw scan, which may include file type recognition signatures.
4546      */
4547     if ((ret == CL_VIRUS && !SCAN_ALLMATCHES) ||
4548         (ctx->abort_scan)) {
4549         goto done;
4550     }
4551 
4552     /* Disable type recognition for the raw scan for zip files larger than maxziptypercg */
4553     if (type == CL_TYPE_ZIP && SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_ZIP)) {
4554         /* CL_ENGINE_MAX_ZIPTYPERCG */
4555         uint64_t curr_len = ctx->fmap->len;
4556         if (curr_len > ctx->engine->maxziptypercg) {
4557             cli_dbgmsg("cli_magic_scan_desc: Not checking for embedded PEs (zip file > MaxZipTypeRcg)\n");
4558             typercg = 0;
4559         }
4560     }
4561 
4562     /* CL_TYPE_HTML: raw HTML files are not scanned, unless safety measure activated via DCONF */
4563     if (type != CL_TYPE_IGNORED && (type != CL_TYPE_HTML || !(SCAN_PARSE_HTML) || !(DCONF_DOC & DOC_CONF_HTML_SKIPRAW)) && !ctx->engine->sdb) {
4564         res = scanraw(ctx, type, typercg, &dettype, (ctx->engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE) ? NULL : hash);
4565         if (res != CL_CLEAN) {
4566             switch (res) {
4567                 /* List of scan halts, runtime errors only! */
4568                 case CL_EUNLINK:
4569                 case CL_ESTAT:
4570                 case CL_ESEEK:
4571                 case CL_EWRITE:
4572                 case CL_EDUP:
4573                 case CL_ETMPFILE:
4574                 case CL_ETMPDIR:
4575                 case CL_EMEM:
4576                     cli_dbgmsg("Descriptor[%d]: scanraw error %s\n", fmap_fd(ctx->fmap), cl_strerror(res));
4577                     ret = res;
4578                     goto done;
4579                 /* CL_VIRUS = malware found, check FP and report.
4580                  * Likewise, if the file was determined to be trusted, then we
4581                  * can also finish with the scan. (Ex: EXE with a valid
4582                  * Authenticode sig.) */
4583                 case CL_VERIFIED:
4584                     // For now just conver CL_VERIFIED to CL_CLEAN, since
4585                     // CL_VERIFIED isn't used elsewhere
4586                     res = CL_CLEAN;
4587                     // Fall through
4588                 case CL_VIRUS:
4589                     ret = res;
4590                     if (SCAN_ALLMATCHES)
4591                         break;
4592                     goto done;
4593                 /* All other "MAX" conditions should still fully scan the current file */
4594                 case CL_ETIMEOUT:
4595                 case CL_EMAXREC:
4596                 case CL_EMAXSIZE:
4597                 case CL_EMAXFILES:
4598                     ret = res;
4599                     cli_dbgmsg("Descriptor[%d]: Continuing after scanraw reached %s\n",
4600                                fmap_fd(ctx->fmap), cl_strerror(res));
4601                     break;
4602                 /* Other errors must not block further scans below
4603                  * This specifically includes CL_EFORMAT & CL_EREAD & CL_EUNPACK
4604                  * Malformed/truncated files could report as any of these three.
4605                  */
4606                 default:
4607                     ret = res;
4608                     cli_dbgmsg("Descriptor[%d]: Continuing after scanraw error %s\n",
4609                                fmap_fd(ctx->fmap), cl_strerror(res));
4610             }
4611         }
4612     }
4613 
4614     /* Make sure we bail out if required. */
4615     if (ctx->abort_scan) {
4616         goto done;
4617     }
4618 
4619     /*
4620      * Now run the rest of the file type parsers.
4621      */
4622     switch (type) {
4623         /* bytecode hooks triggered by a lsig must be a hook
4624          * called from one of the functions here */
4625         case CL_TYPE_TEXT_ASCII:
4626         case CL_TYPE_TEXT_UTF16BE:
4627         case CL_TYPE_TEXT_UTF16LE:
4628         case CL_TYPE_TEXT_UTF8:
4629             perf_nested_start(ctx, PERFT_SCRIPT, PERFT_SCAN);
4630             if ((DCONF_DOC & DOC_CONF_SCRIPT) && dettype != CL_TYPE_HTML && (ret != CL_VIRUS || SCAN_ALLMATCHES) && SCAN_PARSE_HTML)
4631                 ret = cli_scanscript(ctx);
4632             if (SCAN_PARSE_MAIL && (DCONF_MAIL & MAIL_CONF_MBOX) && ret != CL_VIRUS && (cli_recursion_stack_get_type(ctx, -1) == CL_TYPE_MAIL || dettype == CL_TYPE_MAIL)) {
4633                 ret = cli_scan_fmap(ctx, CL_TYPE_MAIL, 0, NULL, AC_SCAN_VIR, NULL, NULL);
4634             }
4635             perf_nested_stop(ctx, PERFT_SCRIPT, PERFT_SCAN);
4636             break;
4637         /* Due to performance reasons all executables were first scanned
4638          * in raw mode. Now we will try to unpack them
4639          */
4640         case CL_TYPE_MSEXE:
4641             perf_nested_start(ctx, PERFT_PE, PERFT_SCAN);
4642             if (SCAN_PARSE_PE && ctx->dconf->pe) {
4643                 unsigned int corrupted_input = ctx->corrupted_input;
4644                 ret                          = cli_scanpe(ctx);
4645                 ctx->corrupted_input         = corrupted_input;
4646             }
4647             perf_nested_stop(ctx, PERFT_PE, PERFT_SCAN);
4648             break;
4649         case CL_TYPE_ELF:
4650             perf_nested_start(ctx, PERFT_ELF, PERFT_SCAN);
4651             ret = cli_unpackelf(ctx);
4652             perf_nested_stop(ctx, PERFT_ELF, PERFT_SCAN);
4653             break;
4654         case CL_TYPE_MACHO:
4655         case CL_TYPE_MACHO_UNIBIN:
4656             perf_nested_start(ctx, PERFT_MACHO, PERFT_SCAN);
4657             ret = cli_unpackmacho(ctx);
4658             perf_nested_stop(ctx, PERFT_MACHO, PERFT_SCAN);
4659             break;
4660         case CL_TYPE_BINARY_DATA:
4661             ret = cli_scan_fmap(ctx, CL_TYPE_OTHER, 0, NULL, AC_SCAN_VIR, NULL, NULL);
4662             break;
4663         default:
4664             break;
4665     }
4666 
4667 done:
4668     switch (ret) {
4669         /*
4670          * Limits exceeded
4671          */
4672         // Exceeding these maximums means we have to stop scanning:
4673         case CL_ETIMEOUT:
4674         case CL_EMAXFILES:
4675             ctx->abort_scan = true;
4676             cli_dbgmsg("Descriptor[%d]: %s\n", fmap_fd(ctx->fmap), cl_strerror(ret));
4677             ret = CL_CLEAN;
4678             break;
4679         // Exceeding these maximums means we had to skip an embedded file:
4680         case CL_EMAXREC:
4681         case CL_EMAXSIZE:
4682             cli_dbgmsg("Descriptor[%d]: %s\n", fmap_fd(ctx->fmap), cl_strerror(ret));
4683             ret = CL_CLEAN;
4684             break;
4685 
4686         /*
4687          * Malformed file cases
4688          */
4689         case CL_EFORMAT:
4690         case CL_EREAD:
4691         case CL_EUNPACK:
4692             cli_dbgmsg("Descriptor[%d]: %s\n", fmap_fd(ctx->fmap), cl_strerror(ret));
4693             ret = CL_CLEAN;
4694             break;
4695 
4696         case CL_CLEAN:
4697             cache_clean = 1;
4698             break;
4699 
4700         default:
4701             break;
4702     }
4703 
4704     if (old_hook_lsig_matches) {
4705         /* We need to restore the old hook_lsig_matches */
4706         cli_bitset_free(ctx->hook_lsig_matches); // safe to call, even if NULL
4707         ctx->hook_lsig_matches = old_hook_lsig_matches;
4708     }
4709 
4710 #if HAVE_JSON
4711     ctx->wrkproperty = (struct json_object *)(parent_property);
4712 #endif
4713 
4714     if (ret == CL_CLEAN && ctx->found_possibly_unwanted) {
4715         cb_retcode = CL_VIRUS;
4716     } else {
4717         if (ret == CL_CLEAN && ctx->num_viruses != 0)
4718             cb_retcode = CL_VIRUS;
4719         else
4720             cb_retcode = ret;
4721     }
4722 
4723     cli_dbgmsg("cli_magic_scan_desc: returning %d %s\n", ret, __AT__);
4724     if (ctx->engine->cb_post_scan) {
4725         const char *virusname = NULL;
4726         perf_start(ctx, PERFT_POSTCB);
4727         if (cb_retcode == CL_VIRUS)
4728             virusname = cli_get_last_virus(ctx);
4729         switch (ctx->engine->cb_post_scan(fmap_fd(ctx->fmap), cb_retcode, virusname, ctx->cb_ctx)) {
4730             case CL_BREAK:
4731                 cli_dbgmsg("cli_magic_scan_desc: file whitelisted by post_scan callback\n");
4732                 perf_stop(ctx, PERFT_POSTCB);
4733                 ret = CL_CLEAN;
4734                 break;
4735             case CL_VIRUS:
4736                 cli_dbgmsg("cli_magic_scan_desc: file blacklisted by post_scan callback\n");
4737                 cli_append_virus(ctx, "Detected.By.Callback");
4738                 perf_stop(ctx, PERFT_POSTCB);
4739                 if (ret != CL_VIRUS) {
4740                     ret = cli_check_fp(ctx, NULL);
4741                 }
4742                 break;
4743             case CL_CLEAN:
4744                 break;
4745             default:
4746                 cli_warnmsg("cli_magic_scan_desc: ignoring bad return code from post_scan callback\n");
4747         }
4748         perf_stop(ctx, PERFT_POSTCB);
4749     }
4750 
4751     if (cb_retcode == CL_CLEAN && cache_clean && !ctx->fmap->dont_cache_flag && !SCAN_COLLECT_METADATA) {
4752         perf_start(ctx, PERFT_CACHE);
4753         cache_add(hash, hashed_size, ctx);
4754         perf_stop(ctx, PERFT_CACHE);
4755     }
4756 
4757     if (ret == CL_VIRUS && SCAN_ALLMATCHES) {
4758         ret = CL_CLEAN;
4759     }
4760 
4761 early_ret:
4762 
4763     if ((ctx->engine->keeptmp) && (NULL != old_temp_path)) {
4764         /* Use rmdir to remove empty tmp subdirectories. If rmdir fails, it wasn't empty. */
4765         (void)rmdir(ctx->sub_tmpdir);
4766 
4767         free((void *)ctx->sub_tmpdir);
4768         ctx->sub_tmpdir = old_temp_path;
4769     }
4770 
4771 #if HAVE_JSON
4772     if (NULL != parent_property) {
4773         ctx->wrkproperty = (struct json_object *)(parent_property);
4774     }
4775 #endif
4776 
4777     return ret;
4778 }
4779 
4780 cl_error_t cli_magic_scan_desc_type(int desc, const char *filepath, cli_ctx *ctx, cli_file_t type, const char *name)
4781 {
4782     STATBUF sb;
4783     cl_error_t status = CL_CLEAN;
4784     fmap_t *new_map   = NULL;
4785 
4786     if (!ctx) {
4787         return CL_EARG;
4788     }
4789 
4790     const char *parent_filepath = ctx->sub_filepath;
4791     ctx->sub_filepath           = filepath;
4792 
4793 #ifdef HAVE__INTERNAL__SHA_COLLECT
4794     if (ctx->sha_collect > 0)
4795         ctx->sha_collect = 0;
4796 #endif
4797 
4798     cli_dbgmsg("in cli_magic_scan_desc_type (recursion_level: %u/%u)\n", ctx->recursion_level, ctx->engine->max_recursion_level);
4799 
4800     if (FSTAT(desc, &sb) == -1) {
4801         cli_errmsg("cli_magic_scan: Can't fstat descriptor %d\n", desc);
4802 
4803         status = CL_ESTAT;
4804         cli_dbgmsg("cli_magic_scan_desc_type: returning %d %s (no post, no cache)\n", status, __AT__);
4805         goto done;
4806     }
4807     if (sb.st_size <= 5) {
4808         cli_dbgmsg("Small data (%u bytes)\n", (unsigned int)sb.st_size);
4809 
4810         status = CL_CLEAN;
4811         cli_dbgmsg("cli_magic_scan_desc_type: returning %d %s (no post, no cache)\n", status, __AT__);
4812         goto done;
4813     }
4814 
4815     perf_start(ctx, PERFT_MAP);
4816     new_map = fmap(desc, 0, sb.st_size, name);
4817     perf_stop(ctx, PERFT_MAP);
4818     if (NULL == new_map) {
4819         cli_errmsg("CRITICAL: fmap() failed\n");
4820         status = CL_EMEM;
4821         cli_dbgmsg("cli_magic_scan_desc_type: returning %d %s (no post, no cache)\n", status, __AT__);
4822         goto done;
4823     }
4824 
4825     status = cli_recursion_stack_push(ctx, new_map, type, true); /* Perform scan with child fmap */
4826     if (CL_SUCCESS != status) {
4827         cli_dbgmsg("Failed to scan fmap.\n");
4828         goto done;
4829     }
4830 
4831     status = cli_magic_scan(ctx, type);
4832 
4833     (void)cli_recursion_stack_pop(ctx); /* Restore the parent fmap */
4834 
4835 done:
4836     if (NULL != new_map) {
4837         funmap(new_map);
4838     }
4839 
4840     ctx->sub_filepath = parent_filepath;
4841 
4842     return status;
4843 }
4844 
4845 cl_error_t cli_magic_scan_desc(int desc, const char *filepath, cli_ctx *ctx, const char *name)
4846 {
4847     return cli_magic_scan_desc_type(desc, filepath, ctx, CL_TYPE_ANY, name);
4848 }
4849 
4850 cl_error_t cl_scandesc(int desc, const char *filename, const char **virname, unsigned long int *scanned, const struct cl_engine *engine, struct cl_scan_options *scanoptions)
4851 {
4852     return cl_scandesc_callback(desc, filename, virname, scanned, engine, scanoptions, NULL);
4853 }
4854 
4855 /**
4856  * @brief   Scan an offset/length into a file map.
4857  *
4858  * Magic-scan some portion of an existing fmap.
4859  *
4860  * @param map       File map.
4861  * @param offset    Offset into file map.
4862  * @param length    Length from offset.
4863  * @param ctx       Scanning context structure.
4864  * @param type      CL_TYPE of data to be scanned.
4865  * @param name      (optional) Original name of the file (to set fmap name metadata)
4866  * @return int      CL_SUCCESS, or an error code.
4867  */
4868 static cl_error_t magic_scan_nested_fmap_type(cl_fmap_t *map, size_t offset, size_t length, cli_ctx *ctx, cli_file_t type, const char *name)
4869 {
4870     cl_error_t status = CL_CLEAN;
4871     fmap_t *new_map   = NULL;
4872 
4873     cli_dbgmsg("magic_scan_nested_fmap_type: [0, +%zu), [%zu, +%zu)\n",
4874                map->len, offset, length);
4875 
4876     if (offset >= map->len) {
4877         cli_dbgmsg("magic_scan_nested_fmap_type: Invalid offset: %zu\n", offset);
4878         goto done;
4879     }
4880 
4881     if (!length)
4882         length = map->len - offset;
4883 
4884     if (length > map->len - offset) {
4885         cli_dbgmsg("magic_scan_nested_fmap_type: Data truncated: %zu -> %zu\n",
4886                    length, map->len - offset);
4887         length = map->len - offset;
4888     }
4889 
4890     if (length <= 5) {
4891         cli_dbgmsg("magic_scan_nested_fmap_type: Small data (%zu bytes)\n", length);
4892         goto done;
4893     }
4894 
4895     new_map = fmap_duplicate(map, offset, length, name);
4896     if (NULL == new_map) {
4897         cli_dbgmsg("magic_scan_nested_fmap_type: Failed to duplicate fmap for scan of fmap subsection\n");
4898         goto done;
4899     }
4900 
4901     status = cli_recursion_stack_push(ctx, new_map, type, false); /* Perform scan with child fmap */
4902     if (CL_SUCCESS != status) {
4903         cli_dbgmsg("magic_scan_nested_fmap_type: Failed to add map to recursion stack for magic scan.\n");
4904         goto done;
4905     }
4906 
4907     status = cli_magic_scan(ctx, type);
4908 
4909     (void)cli_recursion_stack_pop(ctx); /* Restore the parent fmap */
4910 
4911 done:
4912     if (NULL != new_map) {
4913         free_duplicate_fmap(new_map); /* This fmap is just a duplicate. */
4914     }
4915 
4916     return status;
4917 }
4918 
4919 /* For map scans that may be forced to disk */
4920 cl_error_t cli_magic_scan_nested_fmap_type(cl_fmap_t *map, size_t offset, size_t length, cli_ctx *ctx, cli_file_t type, const char *name)
4921 {
4922     cl_error_t ret = CL_CLEAN;
4923 
4924     cli_dbgmsg("cli_magic_scan_nested_fmap_type: [%zu, +%zu)\n", offset, length);
4925     if (offset >= map->len) {
4926         cli_dbgmsg("Invalid offset: %zu\n", offset);
4927         return CL_CLEAN;
4928     }
4929 
4930     if (ctx->engine->engine_options & ENGINE_OPTIONS_FORCE_TO_DISK) {
4931         /*
4932          * Force to disk!
4933          *
4934          * Write the offset + length section of the fmap to disk, and scan it.
4935          */
4936         const uint8_t *mapdata = NULL;
4937         char *tempfile         = NULL;
4938         int fd                 = -1;
4939         size_t nread           = 0;
4940 
4941         /* Then check length */
4942         if (!length) {
4943             /* Caller didn't specify len, use rest of the map */
4944             length = map->len - offset;
4945         }
4946         if (length > map->len - offset) {
4947             cli_dbgmsg("cli_magic_scan_nested_fmap_type: Data truncated: %zu -> %zu\n", length, map->len - offset);
4948             length = map->len - offset;
4949         }
4950         if (length <= 5) {
4951             cli_dbgmsg("cli_magic_scan_nested_fmap_type: Small data (%u bytes)\n", (unsigned int)length);
4952             return CL_CLEAN;
4953         }
4954         if (!CLI_ISCONTAINED_0_TO(map->len, offset, length)) {
4955             cli_dbgmsg("cli_magic_scan_nested_fmap_type: map error occurred [%zu, %zu] not within [0, %zu]\n", offset, length, map->len);
4956             return CL_CLEAN;
4957         }
4958 
4959         /* Length checked, now get map */
4960         mapdata = fmap_need_off_once_len(map, offset, length, &nread);
4961         if (!mapdata || (nread != length)) {
4962             cli_errmsg("cli_magic_scan_nested_fmap_type: could not map sub-file\n");
4963             return CL_EMAP;
4964         }
4965 
4966         ret = cli_gentempfd(ctx->sub_tmpdir, &tempfile, &fd);
4967         if (ret != CL_SUCCESS) {
4968             return ret;
4969         }
4970 
4971         cli_dbgmsg("cli_magic_scan_nested_fmap_type: writing nested map content to temp file %s\n", tempfile);
4972         if (cli_writen(fd, mapdata, length) == (size_t)-1) {
4973             cli_errmsg("cli_magic_scan_nested_fmap_type: cli_writen error writing subdoc temporary file.\n");
4974             ret = CL_EWRITE;
4975         }
4976 
4977         /* scan the temp file */
4978         ret = cli_magic_scan_desc_type(fd, tempfile, ctx, type, name);
4979 
4980         /* remove the temp file, if needed */
4981         if (fd >= 0) {
4982             close(fd);
4983         }
4984         if (!ctx->engine->keeptmp) {
4985             if (cli_unlink(tempfile)) {
4986                 cli_errmsg("cli_magic_scan_nested_fmap_type: error unlinking tempfile %s\n", tempfile);
4987                 ret = CL_EUNLINK;
4988             }
4989         }
4990         free(tempfile);
4991     } else {
4992         /*
4993          * Not forced to disk.
4994          *
4995          * Just use nested map by scanning given fmap at offset + length.
4996          */
4997         ret = magic_scan_nested_fmap_type(map, offset, length, ctx, type, name);
4998     }
4999     return ret;
5000 }
5001 
5002 cl_error_t cli_magic_scan_buff(const void *buffer, size_t length, cli_ctx *ctx, const char *name)
5003 {
5004     cl_error_t ret;
5005     fmap_t *map = NULL;
5006 
5007     map = fmap_open_memory(buffer, length, name);
5008     if (!map) {
5009         return CL_EMAP;
5010     }
5011 
5012     ret = cli_magic_scan_nested_fmap_type(map, 0, length, ctx, CL_TYPE_ANY, name);
5013 
5014     funmap(map);
5015 
5016     return ret;
5017 }
5018 
5019 /**
5020  * @brief   The main function to initiate a scan of an fmap.
5021  *
5022  * @param map               File map.
5023  * @param filepath          (optional, recommended) filepath of the open file descriptor or file map.
5024  * @param[out] virname      Will be set to a statically allocated (i.e. needs not be freed) signature name if the scan matches against a signature.
5025  * @param[out] scanned      The number of bytes scanned.
5026  * @param engine            The scanning engine.
5027  * @param scanoptions       Scanning options.
5028  * @param[in,out] context   An opaque context structure allowing the caller to record details about the sample being scanned.
5029  * @return int              CL_CLEAN, CL_VIRUS, or an error code if an error occured during the scan.
5030  */
5031 static cl_error_t scan_common(cl_fmap_t *map, const char *filepath, const char **virname, unsigned long int *scanned, const struct cl_engine *engine, struct cl_scan_options *scanoptions, void *context)
5032 {
5033     cl_error_t status;
5034     cli_ctx ctx = {0};
5035 
5036     char *target_basename = NULL;
5037     char *new_temp_prefix = NULL;
5038     size_t new_temp_prefix_len;
5039     char *new_temp_path = NULL;
5040 
5041     time_t current_time;
5042     struct tm tm_struct;
5043 
5044     if (NULL == map) {
5045         return CL_ENULLARG;
5046     }
5047 
5048     ctx.engine  = engine;
5049     ctx.virname = virname;
5050     ctx.scanned = scanned;
5051     ctx.options = malloc(sizeof(struct cl_scan_options));
5052     memcpy(ctx.options, scanoptions, sizeof(struct cl_scan_options));
5053     ctx.found_possibly_unwanted = 0;
5054 
5055     ctx.dconf  = (struct cli_dconf *)engine->dconf;
5056     ctx.cb_ctx = context;
5057 
5058     if (!(ctx.hook_lsig_matches = cli_bitset_init())) {
5059         status = CL_EMEM;
5060         goto done;
5061     }
5062 
5063     ctx.recursion_stack_size = ctx.engine->max_recursion_level;
5064     ctx.recursion_stack      = cli_calloc(sizeof(recursion_level_t), ctx.recursion_stack_size);
5065     if (!ctx.recursion_stack) {
5066         status = CL_EMEM;
5067         goto done;
5068     }
5069 
5070     // ctx was memset, so recursion_level starts at 0.
5071     ctx.recursion_stack[ctx.recursion_level].fmap = map;
5072     ctx.recursion_stack[ctx.recursion_level].type = CL_TYPE_ANY; // ANY for the top level, because we don't yet know the type.
5073     ctx.recursion_stack[ctx.recursion_level].size = map->len;
5074 
5075     ctx.fmap = ctx.recursion_stack[ctx.recursion_level].fmap;
5076 
5077     perf_init(&ctx);
5078 
5079     if (ctx.engine->maxscantime != 0) {
5080         if (gettimeofday(&ctx.time_limit, NULL) == 0) {
5081             uint32_t secs  = ctx.engine->maxscantime / 1000;
5082             uint32_t usecs = (ctx.engine->maxscantime % 1000) * 1000;
5083             ctx.time_limit.tv_sec += secs;
5084             ctx.time_limit.tv_usec += usecs;
5085             if (ctx.time_limit.tv_usec >= 1000000) {
5086                 ctx.time_limit.tv_usec -= 1000000;
5087                 ctx.time_limit.tv_sec++;
5088             }
5089         } else {
5090             char buf[64];
5091             cli_dbgmsg("scan_common: gettimeofday error: %s\n", cli_strerror(errno, buf, 64));
5092         }
5093     }
5094 
5095     if (filepath != NULL) {
5096         ctx.target_filepath = strdup(filepath);
5097     }
5098 
5099     /*
5100      * Create a tmp sub-directory for the temp files generated by this scan.
5101      *
5102      * If keeptmp (LeaveTemporaryFiles / --leave-temps) is enabled, we'll include the
5103      *   basename in the tmp directory.
5104      * If keeptmp is not enabled, we'll just call it "scantemp".
5105      */
5106     current_time = time(NULL);
5107 
5108 #ifdef _WIN32
5109     if (0 != localtime_s(&tm_struct, &current_time)) {
5110 #else
5111     if (!localtime_r(&current_time, &tm_struct)) {
5112 #endif
5113         cli_errmsg("scan_common: Failed to get local time.\n");
5114         status = CL_ESTAT;
5115         goto done;
5116     }
5117 
5118     if ((ctx.engine->keeptmp) &&
5119         (NULL != ctx.target_filepath) &&
5120         (CL_SUCCESS == cli_basename(ctx.target_filepath, strlen(ctx.target_filepath), &target_basename))) {
5121         /* Include the basename in the temp directory */
5122         new_temp_prefix_len = strlen("YYYYMMDD_HHMMSS-") + strlen(target_basename);
5123         new_temp_prefix     = cli_calloc(1, new_temp_prefix_len + 1);
5124         if (!new_temp_prefix) {
5125             cli_errmsg("scan_common: Failed to allocate memory for temp directory name.\n");
5126             status = CL_EMEM;
5127             goto done;
5128         }
5129         strftime(new_temp_prefix, new_temp_prefix_len, "%Y%m%d_%H%M%S-", &tm_struct);
5130         strcpy(new_temp_prefix + strlen("YYYYMMDD_HHMMSS-"), target_basename);
5131     } else {
5132         /* Just use date */
5133         new_temp_prefix_len = strlen("YYYYMMDD_HHMMSS-scantemp");
5134         new_temp_prefix     = cli_calloc(1, new_temp_prefix_len + 1);
5135         if (!new_temp_prefix) {
5136             cli_errmsg("scan_common: Failed to allocate memory for temp directory name.\n");
5137             status = CL_EMEM;
5138             goto done;
5139         }
5140         strftime(new_temp_prefix, new_temp_prefix_len, "%Y%m%d_%H%M%S-scantemp", &tm_struct);
5141     }
5142 
5143     /* Place the new temp sub-directory within the configured temp directory */
5144     new_temp_path = cli_gentemp_with_prefix(ctx.engine->tmpdir, new_temp_prefix);
5145     free(new_temp_prefix);
5146     if (NULL == new_temp_path) {
5147         cli_errmsg("scan_common: Failed to generate temp directory name.\n");
5148         status = CL_EMEM;
5149         goto done;
5150     }
5151 
5152     ctx.sub_tmpdir = new_temp_path;
5153 
5154     if (mkdir(ctx.sub_tmpdir, 0700)) {
5155         cli_errmsg("Can't create temporary directory for scan: %s.\n", ctx.sub_tmpdir);
5156         status = CL_EACCES;
5157         goto done;
5158     }
5159 
5160     cli_logg_setup(&ctx);
5161 
5162     /* We have a limit of around 2GB (INT_MAX - 2). Enforce it here. */
5163     /* TODO: Large file support is large-ly untested. Remove this restriction
5164      * and test with a large set of large files of various types. libclamav's
5165      * integer type safety has come a long way since 2014, so it's possible
5166      * we could lift this restriction, but at least one of the parsers is
5167      * bound to behave badly with large files. */
5168     if (map->len > INT_MAX - 2) {
5169         if (scanoptions->heuristic & CL_SCAN_HEURISTIC_EXCEEDS_MAX) {
5170             status = cli_append_virus(&ctx, "Heuristics.Limits.Exceeded.MaxFileSize");
5171         } else {
5172             status = CL_CLEAN;
5173         }
5174         goto done;
5175     }
5176 
5177     status = cli_magic_scan(&ctx, CL_TYPE_ANY);
5178 
5179     if (status == CL_CLEAN && ctx.found_possibly_unwanted) {
5180         cli_virus_found_cb(&ctx);
5181     }
5182 
5183 #if HAVE_JSON
5184     if (ctx.options->general & CL_SCAN_GENERAL_COLLECT_METADATA && (ctx.properties != NULL)) {
5185         json_object *jobj;
5186         const char *jstring;
5187 
5188         /* set value of unique root object tag */
5189         if (json_object_object_get_ex(ctx.properties, "FileType", &jobj)) {
5190             enum json_type type;
5191             const char *jstr;
5192 
5193             type = json_object_get_type(jobj);
5194             if (type == json_type_string) {
5195                 jstr = json_object_get_string(jobj);
5196                 cli_jsonstr(ctx.properties, "RootFileType", jstr);
5197             }
5198         }
5199 
5200         /* serialize json properties to string */
5201 #ifdef JSON_C_TO_STRING_NOSLASHESCAPE
5202         jstring = json_object_to_json_string_ext(ctx.properties, JSON_C_TO_STRING_PRETTY | JSON_C_TO_STRING_NOSLASHESCAPE);
5203 #else
5204         jstring = json_object_to_json_string_ext(ctx.properties, JSON_C_TO_STRING_PRETTY);
5205 #endif
5206         if (NULL == jstring) {
5207             cli_errmsg("scan_common: no memory for json serialization.\n");
5208             status = CL_EMEM;
5209         } else {
5210             int ret                   = CL_SUCCESS;
5211             struct cli_matcher *iroot = ctx.engine->root[13];
5212             cli_dbgmsg("%s\n", jstring);
5213 
5214             if ((status != CL_VIRUS) || (ctx.options->general & CL_SCAN_GENERAL_ALLMATCHES)) {
5215                 /* run bytecode preclass hook; generate fmap if needed for running hook */
5216                 struct cli_bc_ctx *bc_ctx = cli_bytecode_context_alloc();
5217                 if (!bc_ctx) {
5218                     cli_errmsg("scan_common: can't allocate memory for bc_ctx\n");
5219                     status = CL_EMEM;
5220                 } else {
5221                     cli_bytecode_context_setctx(bc_ctx, &ctx);
5222                     status = cli_bytecode_runhook(&ctx, ctx.engine, bc_ctx, BC_PRECLASS, map);
5223                     cli_bytecode_context_destroy(bc_ctx);
5224                 }
5225 
5226                 /* backwards compatibility: scan the json string unless a virus was detected */
5227                 if (status != CL_VIRUS && (iroot->ac_lsigs || iroot->ac_patterns
5228 #ifdef HAVE_PCRE
5229                                            || iroot->pcre_metas
5230 #endif // HAVE_PCRE
5231                                            )) {
5232                     cli_dbgmsg("scan_common: running deprecated preclass bytecodes for target type 13\n");
5233                     ctx.options->general &= ~CL_SCAN_GENERAL_COLLECT_METADATA;
5234                     status = cli_magic_scan_buff(jstring, strlen(jstring), &ctx, NULL);
5235                 }
5236             }
5237 
5238             /* Invoke file props callback */
5239             if (ctx.engine->cb_file_props != NULL) {
5240                 ret = ctx.engine->cb_file_props(jstring, status, ctx.cb_ctx);
5241                 if (ret != CL_SUCCESS)
5242                     status = ret;
5243             }
5244 
5245             /* keeptmp file processing for file properties json string */
5246             if (ctx.engine->keeptmp) {
5247                 int fd        = -1;
5248                 char *tmpname = NULL;
5249 
5250                 if ((ret = cli_newfilepathfd(ctx.sub_tmpdir, "metadata.json", &tmpname, &fd)) != CL_SUCCESS) {
5251                     cli_dbgmsg("scan_common: Can't create json properties file, ret = %i.\n", ret);
5252                 } else {
5253                     if (cli_writen(fd, jstring, strlen(jstring)) == (size_t)-1)
5254                         cli_dbgmsg("scan_common: cli_writen error writing json properties file.\n");
5255                     else
5256                         cli_dbgmsg("json written to: %s\n", tmpname);
5257                 }
5258                 if (fd != -1)
5259                     close(fd);
5260                 if (NULL != tmpname)
5261                     free(tmpname);
5262             }
5263         }
5264         cli_json_delobj(ctx.properties); /* frees all json memory */
5265     }
5266 #endif // HAVE_JSON
5267 
5268     if (status == CL_CLEAN) {
5269         if ((ctx.found_possibly_unwanted) ||
5270             ((ctx.num_viruses != 0) &&
5271              ((ctx.options->general & CL_SCAN_GENERAL_ALLMATCHES) ||
5272               (ctx.options->heuristic & CL_SCAN_HEURISTIC_EXCEEDS_MAX)))) {
5273             status = CL_VIRUS;
5274         }
5275     }
5276 
5277     cli_logg_unsetup();
5278 
5279 done:
5280     if (NULL != ctx.sub_tmpdir) {
5281         if (!ctx.engine->keeptmp) {
5282             (void)cli_rmdirs(ctx.sub_tmpdir);
5283         }
5284         free(ctx.sub_tmpdir);
5285     }
5286 
5287     if (NULL != target_basename) {
5288         free(target_basename);
5289     }
5290 
5291     if (NULL != ctx.target_filepath) {
5292         free(ctx.target_filepath);
5293     }
5294 
5295     if (NULL != ctx.perf) {
5296         perf_done(&ctx);
5297     }
5298 
5299     if (NULL != ctx.hook_lsig_matches) {
5300         cli_bitset_free(ctx.hook_lsig_matches);
5301     }
5302 
5303     if (NULL != ctx.recursion_stack) {
5304         free(ctx.recursion_stack);
5305     }
5306 
5307     if (NULL != ctx.options) {
5308         free(ctx.options);
5309     }
5310 
5311     return status;
5312 }
5313 
5314 cl_error_t cl_scandesc_callback(int desc, const char *filename, const char **virname, unsigned long int *scanned, const struct cl_engine *engine, struct cl_scan_options *scanoptions, void *context)
5315 {
5316     cl_error_t status = CL_SUCCESS;
5317     cl_fmap_t *map    = NULL;
5318     STATBUF sb;
5319     char *filename_base = NULL;
5320 
5321     if (FSTAT(desc, &sb) == -1) {
5322         cli_errmsg("cl_scandesc_callback: Can't fstat descriptor %d\n", desc);
5323         status = CL_ESTAT;
5324         goto done;
5325     }
5326     if (sb.st_size <= 5) {
5327         cli_dbgmsg("cl_scandesc_callback: File too small (" STDu64 " bytes), ignoring\n", (uint64_t)sb.st_size);
5328         status = CL_CLEAN;
5329         goto done;
5330     }
5331     if ((engine->maxfilesize > 0) && ((uint64_t)sb.st_size > engine->maxfilesize)) {
5332         cli_dbgmsg("cl_scandesc_callback: File too large (" STDu64 " bytes), ignoring\n", (uint64_t)sb.st_size);
5333         if (scanoptions->heuristic & CL_SCAN_HEURISTIC_EXCEEDS_MAX) {
5334             if (engine->cb_virus_found)
5335                 engine->cb_virus_found(desc, "Heuristics.Limits.Exceeded.MaxFileSize", context);
5336             status = CL_VIRUS;
5337         } else {
5338             status = CL_CLEAN;
5339         }
5340         goto done;
5341     }
5342 
5343     if (NULL != filename) {
5344         (void)cli_basename(filename, strlen(filename), &filename_base);
5345     }
5346 
5347     if (NULL == (map = fmap(desc, 0, sb.st_size, filename_base))) {
5348         cli_errmsg("CRITICAL: fmap() failed\n");
5349         status = CL_EMEM;
5350         goto done;
5351     }
5352 
5353     status = scan_common(map, filename, virname, scanned, engine, scanoptions, context);
5354 
5355 done:
5356     if (NULL != map) {
5357         funmap(map);
5358     }
5359     if (NULL != filename_base) {
5360         free(filename_base);
5361     }
5362 
5363     return status;
5364 }
5365 
5366 cl_error_t cl_scanmap_callback(cl_fmap_t *map, const char *filename, const char **virname, unsigned long int *scanned, const struct cl_engine *engine, struct cl_scan_options *scanoptions, void *context)
5367 {
5368     if ((engine->maxfilesize > 0) && (map->len > engine->maxfilesize)) {
5369         cli_dbgmsg("cl_scandesc_callback: File too large (%zu bytes), ignoring\n", map->len);
5370         if (scanoptions->heuristic & CL_SCAN_HEURISTIC_EXCEEDS_MAX) {
5371             if (engine->cb_virus_found)
5372                 engine->cb_virus_found(fmap_fd(map), "Heuristics.Limits.Exceeded.MaxFileSize", context);
5373             return CL_VIRUS;
5374         }
5375         return CL_CLEAN;
5376     }
5377 
5378     return scan_common(map, filename, virname, scanned, engine, scanoptions, context);
5379 }
5380 
5381 cl_error_t cli_found_possibly_unwanted(cli_ctx *ctx)
5382 {
5383     if (cli_get_last_virus(ctx)) {
5384         cli_dbgmsg("found Possibly Unwanted: %s\n", cli_get_last_virus(ctx));
5385         if (SCAN_HEURISTIC_PRECEDENCE) {
5386             /* we found a heuristic match, don't scan further,
5387              * but consider it a virus. */
5388             cli_dbgmsg("cli_found_possibly_unwanted: CL_VIRUS\n");
5389             return CL_VIRUS;
5390         }
5391         /* heuristic scan isn't taking precedence, keep scanning.
5392          * If this is part of an archive, and
5393          * we find a real malware we report that instead of the
5394          * heuristic match */
5395         ctx->found_possibly_unwanted = 1;
5396     } else {
5397         cli_warnmsg("cli_found_possibly_unwanted called, but virname is not set\n");
5398     }
5399     emax_reached(ctx);
5400     return CL_CLEAN;
5401 }
5402 
5403 cl_error_t cli_magic_scan_file(const char *filename, cli_ctx *ctx, const char *original_name)
5404 {
5405     int fd         = -1;
5406     cl_error_t ret = CL_EOPEN;
5407 
5408     /* internal version of cl_scanfile with arec/mrec preserved */
5409     fd = safe_open(filename, O_RDONLY | O_BINARY);
5410     if (fd < 0) {
5411         goto done;
5412     }
5413 
5414     ret = cli_magic_scan_desc(fd, filename, ctx, original_name);
5415 
5416 done:
5417     if (fd >= 0) {
5418         close(fd);
5419     }
5420     return ret;
5421 }
5422 
5423 cl_error_t cl_scanfile(const char *filename, const char **virname, unsigned long int *scanned, const struct cl_engine *engine, struct cl_scan_options *scanoptions)
5424 {
5425     return cl_scanfile_callback(filename, virname, scanned, engine, scanoptions, NULL);
5426 }
5427 
5428 cl_error_t cl_scanfile_callback(const char *filename, const char **virname, unsigned long int *scanned, const struct cl_engine *engine, struct cl_scan_options *scanoptions, void *context)
5429 {
5430     int fd;
5431     cl_error_t ret;
5432     const char *fname = cli_to_utf8_maybe_alloc(filename);
5433 
5434     if (!fname)
5435         return CL_EARG;
5436 
5437     if ((fd = safe_open(fname, O_RDONLY | O_BINARY)) == -1)
5438         return CL_EOPEN;
5439 
5440     if (fname != filename)
5441         free((char *)fname);
5442 
5443     ret = cl_scandesc_callback(fd, filename, virname, scanned, engine, scanoptions, context);
5444     close(fd);
5445 
5446     return ret;
5447 }
5448 
5449 /*
5450 Local Variables:
5451    c-basic-offset: 4
5452 End:
5453 */
5454