1 /*
2  *  Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
3  *  Copyright (C) 2007-2013 Sourcefire, Inc.
4  *
5  *  Authors: Trog
6  *
7  *  Summary: Extract component parts of OLE2 files (e.g. MS Office Documents).
8  *
9  *  Acknowledgements: Some ideas and algorithms were based upon OpenOffice and libgsf.
10  *
11  *  This program is free software; you can redistribute it and/or modify
12  *  it under the terms of the GNU General Public License version 2 as
13  *  published by the Free Software Foundation.
14  *
15  *  This program is distributed in the hope that it will be useful,
16  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  *  GNU General Public License for more details.
19  *
20  *  You should have received a copy of the GNU General Public License
21  *  along with this program; if not, write to the Free Software
22  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
23  *  MA 02110-1301, USA.
24  */
25 
26 #if HAVE_CONFIG_H
27 #include "clamav-config.h"
28 #endif
29 
30 #include <sys/types.h>
31 #include <sys/stat.h>
32 #include <fcntl.h>
33 #include <stdio.h>
34 #include <string.h>
35 #include <ctype.h>
36 #include <stdlib.h>
37 #include <errno.h>
38 #include <conv.h>
39 #include <zlib.h>
40 #ifdef HAVE_UNISTD_H
41 #include <unistd.h>
42 #endif
43 #include <stdbool.h>
44 
45 #include "clamav.h"
46 #include "others.h"
47 #include "hwp.h"
48 #include "ole2_extract.h"
49 #include "scanners.h"
50 #include "fmap.h"
51 #include "json_api.h"
52 #if HAVE_JSON
53 #include "msdoc.h"
54 #endif
55 
56 #ifdef DEBUG_OLE2_LIST
57 #define ole2_listmsg(...) cli_dbgmsg(__VA_ARGS__)
58 #else
59 #define ole2_listmsg(...) ;
60 #endif
61 
62 #define ole2_endian_convert_16(v) le16_to_host((uint16_t)(v))
63 #define ole2_endian_convert_32(v) le32_to_host((uint32_t)(v))
64 
65 #ifndef HAVE_ATTRIB_PACKED
66 #define __attribute__(x)
67 #endif
68 
69 #ifdef HAVE_PRAGMA_PACK
70 #pragma pack(1)
71 #endif
72 
73 #ifdef HAVE_PRAGMA_PACK_HPPA
74 #pragma pack 1
75 #endif
76 
77 typedef struct ole2_header_tag {
78     unsigned char magic[8]; /* should be: 0xd0cf11e0a1b11ae1 */
79     unsigned char clsid[16];
80     uint16_t minor_version __attribute__((packed));
81     uint16_t dll_version __attribute__((packed));
82     int16_t byte_order __attribute__((packed)); /* -2=intel */
83 
84     uint16_t log2_big_block_size __attribute__((packed));   /* usually 9 (2^9 = 512) */
85     uint32_t log2_small_block_size __attribute__((packed)); /* usually 6 (2^6 = 64) */
86 
87     int32_t reserved[2] __attribute__((packed));
88     int32_t bat_count __attribute__((packed));
89     int32_t prop_start __attribute__((packed));
90 
91     uint32_t signature __attribute__((packed));
92     uint32_t sbat_cutoff __attribute__((packed)); /* cutoff for files held
93                                                          * in small blocks
94                                                          * (4096) */
95 
96     int32_t sbat_start __attribute__((packed));
97     int32_t sbat_block_count __attribute__((packed));
98     int32_t xbat_start __attribute__((packed));
99     int32_t xbat_count __attribute__((packed));
100     int32_t bat_array[109] __attribute__((packed));
101 
102     /* not part of the ole2 header, but stuff we need in order to decode */
103 
104     /*
105      * must take account of the size of variables below here when reading the
106      * header
107      */
108     int32_t sbat_root_start __attribute__((packed));
109     uint32_t max_block_no;
110     off_t m_length;
111     bitset_t *bitset;
112     struct uniq *U;
113     fmap_t *map;
114     int has_vba;
115     int has_xlm;
116     hwp5_header_t *is_hwp;
117 } ole2_header_t;
118 
119 typedef struct property_tag {
120     char name[64]; /* in unicode */
121     uint16_t name_size __attribute__((packed));
122     unsigned char type;  /* 1=dir 2=file 5=root */
123     unsigned char color; /* black or red */
124     uint32_t prev __attribute__((packed));
125     uint32_t next __attribute__((packed));
126     uint32_t child __attribute__((packed));
127 
128     unsigned char clsid[16];
129     uint32_t user_flags __attribute__((packed));
130 
131     uint32_t create_lowdate __attribute__((packed));
132     uint32_t create_highdate __attribute__((packed));
133     uint32_t mod_lowdate __attribute__((packed));
134     uint32_t mod_highdate __attribute__((packed));
135     uint32_t start_block __attribute__((packed));
136     uint32_t size __attribute__((packed));
137     unsigned char reserved[4];
138 } property_t;
139 
140 struct ole2_list_node;
141 
142 typedef struct ole2_list_node {
143     uint32_t Val;
144     struct ole2_list_node *Next;
145 } ole2_list_node_t;
146 
147 typedef struct ole2_list {
148     uint32_t Size;
149     ole2_list_node_t *Head;
150 } ole2_list_t;
151 
152 int ole2_list_init(ole2_list_t *list);
153 int ole2_list_is_empty(ole2_list_t *list);
154 uint32_t ole2_list_size(ole2_list_t *list);
155 int ole2_list_push(ole2_list_t *list, uint32_t val);
156 uint32_t ole2_list_pop(ole2_list_t *list);
157 int ole2_list_delete(ole2_list_t *list);
158 
ole2_list_init(ole2_list_t * list)159 int ole2_list_init(ole2_list_t *list)
160 {
161     list->Head = NULL;
162     list->Size = 0;
163     return CL_SUCCESS;
164 }
165 
ole2_list_is_empty(ole2_list_t * list)166 int ole2_list_is_empty(ole2_list_t *list)
167 {
168     return (list->Head == NULL);
169 }
170 
171 uint32_t
ole2_list_size(ole2_list_t * list)172 ole2_list_size(ole2_list_t *list)
173 {
174     return (list->Size);
175 }
176 
ole2_list_push(ole2_list_t * list,uint32_t val)177 int ole2_list_push(ole2_list_t *list, uint32_t val)
178 {
179     //check the cli - malloc ?
180     ole2_list_node_t *new_node;
181 
182     new_node = (ole2_list_node_t *)cli_malloc(sizeof(ole2_list_node_t));
183     if (!new_node) {
184         cli_dbgmsg("OLE2: could not allocate new node for worklist!\n");
185         return CL_EMEM;
186     }
187     new_node->Val  = val;
188     new_node->Next = list->Head;
189 
190     list->Head = new_node;
191     (list->Size)++;
192     return CL_SUCCESS;
193 }
194 
195 uint32_t
ole2_list_pop(ole2_list_t * list)196 ole2_list_pop(ole2_list_t *list)
197 {
198     uint32_t val;
199     ole2_list_node_t *next;
200 
201     if (ole2_list_is_empty(list)) {
202         cli_dbgmsg("OLE2: work list is empty and ole2_list_pop() called!\n");
203         return -1;
204     }
205     val  = list->Head->Val;
206     next = list->Head->Next;
207 
208     free(list->Head);
209     list->Head = next;
210 
211     (list->Size)--;
212     return val;
213 }
214 
ole2_list_delete(ole2_list_t * list)215 int ole2_list_delete(ole2_list_t *list)
216 {
217     while (!ole2_list_is_empty(list))
218         ole2_list_pop(list);
219     return CL_SUCCESS;
220 }
221 
222 #ifdef HAVE_PRAGMA_PACK
223 #pragma pack()
224 #endif
225 
226 #ifdef HAVE_PRAGMA_PACK_HPPA
227 #pragma pack
228 #endif
229 
230 static unsigned char magic_id[] = {0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1};
231 
232 char *
cli_ole2_get_property_name2(const char * name,int size)233 cli_ole2_get_property_name2(const char *name, int size)
234 {
235     int i, j;
236     char *newname;
237 
238     if ((name[0] == 0 && name[1] == 0) || size <= 0 || size > 128) {
239         return NULL;
240     }
241     newname = (char *)cli_malloc(size * 7);
242     if (!newname) {
243         cli_errmsg("OLE2 [cli_ole2_get_property_name2]: Unable to allocate memory for newname: %u\n", size * 7);
244         return NULL;
245     }
246     j = 0;
247     /* size-2 to ignore trailing NULL */
248     for (i = 0; i < size - 2; i += 2) {
249         if ((!(name[i] & 0x80)) && isprint(name[i]) && name[i + 1] == 0) {
250             newname[j++] = tolower(name[i]);
251         } else {
252             if (name[i] < 10 && name[i] >= 0 && name[i + 1] == 0) {
253                 newname[j++] = '_';
254                 newname[j++] = name[i] + '0';
255             } else {
256                 const uint16_t x = (((uint16_t)name[i]) << 8) | name[i + 1];
257 
258                 newname[j++] = '_';
259                 newname[j++] = 'a' + ((x & 0xF));
260                 newname[j++] = 'a' + ((x >> 4) & 0xF);
261                 newname[j++] = 'a' + ((x >> 8) & 0xF);
262                 newname[j++] = 'a' + ((x >> 16) & 0xF);
263                 newname[j++] = 'a' + ((x >> 24) & 0xF);
264             }
265             newname[j++] = '_';
266         }
267     }
268     newname[j] = '\0';
269     if (strlen(newname) == 0) {
270         free(newname);
271         return NULL;
272     }
273     return newname;
274 }
275 
276 static char *
get_property_name(char * name,int size)277 get_property_name(char *name, int size)
278 {
279     const char *carray = "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz._";
280     int csize          = size >> 1;
281     char *newname, *cname;
282     char *oname = name;
283 
284     if (csize <= 0)
285         return NULL;
286 
287     newname = cname = (char *)cli_malloc(size);
288     if (!newname) {
289         cli_errmsg("OLE2 [get_property_name]: Unable to allocate memory for newname %u\n", size);
290         return NULL;
291     }
292     while (--csize) {
293         uint16_t lo, hi, u = cli_readint16(oname) - 0x3800;
294 
295         oname += 2;
296         if (u > 0x1040) {
297             free(newname);
298             return cli_ole2_get_property_name2(name, size);
299         }
300         lo = u % 64;
301         u >>= 6;
302         hi       = u % 64;
303         *cname++ = carray[lo];
304         if (csize != 1 || u != 64)
305             *cname++ = carray[hi];
306     }
307     *cname = '\0';
308     return newname;
309 }
310 
311 static void
print_ole2_property(property_t * property)312 print_ole2_property(property_t *property)
313 {
314     char spam[128], *buf;
315 
316     if (property->name_size > 64) {
317         cli_dbgmsg("[err name len: %d]\n", property->name_size);
318         return;
319     }
320     buf = get_property_name(property->name, property->name_size);
321     snprintf(spam, sizeof(spam), "OLE2: %s ", buf ? buf : "<noname>");
322     spam[sizeof(spam) - 1] = '\0';
323     if (buf)
324         free(buf);
325     switch (property->type) {
326         case 2:
327             strncat(spam, " [file] ", sizeof(spam) - 1 - strlen(spam));
328             break;
329         case 1:
330             strncat(spam, " [dir ] ", sizeof(spam) - 1 - strlen(spam));
331             break;
332         case 5:
333             strncat(spam, " [root] ", sizeof(spam) - 1 - strlen(spam));
334             break;
335         default:
336             strncat(spam, " [unkn] ", sizeof(spam) - 1 - strlen(spam));
337     }
338     spam[sizeof(spam) - 1] = '\0';
339     switch (property->color) {
340         case 0:
341             strncat(spam, " r  ", sizeof(spam) - 1 - strlen(spam));
342             break;
343         case 1:
344             strncat(spam, " b  ", sizeof(spam) - 1 - strlen(spam));
345             break;
346         default:
347             strncat(spam, " u  ", sizeof(spam) - 1 - strlen(spam));
348     }
349     spam[sizeof(spam) - 1] = '\0';
350     cli_dbgmsg("%s size:0x%.8x flags:0x%.8x\n", spam, property->size, property->user_flags);
351 }
352 
353 static void
print_ole2_header(ole2_header_t * hdr)354 print_ole2_header(ole2_header_t *hdr)
355 {
356     if (!hdr || !cli_debug_flag) {
357         return;
358     }
359     cli_dbgmsg("\n");
360     cli_dbgmsg("Magic:\t\t\t0x%x%x%x%x%x%x%x%x\n",
361                hdr->magic[0], hdr->magic[1], hdr->magic[2], hdr->magic[3],
362                hdr->magic[4], hdr->magic[5], hdr->magic[6], hdr->magic[7]);
363 
364     cli_dbgmsg("CLSID:\t\t\t{%x%x%x%x-%x%x-%x%x-%x%x-%x%x%x%x%x%x}\n",
365                hdr->clsid[0], hdr->clsid[1], hdr->clsid[2], hdr->clsid[3],
366                hdr->clsid[4], hdr->clsid[5], hdr->clsid[6], hdr->clsid[7],
367                hdr->clsid[8], hdr->clsid[9], hdr->clsid[10], hdr->clsid[11],
368                hdr->clsid[12], hdr->clsid[13], hdr->clsid[14], hdr->clsid[15]);
369 
370     cli_dbgmsg("Minor version:\t\t0x%x\n", hdr->minor_version);
371     cli_dbgmsg("DLL version:\t\t0x%x\n", hdr->dll_version);
372     cli_dbgmsg("Byte Order:\t\t%d\n", hdr->byte_order);
373     cli_dbgmsg("Big Block Size:\t%i\n", hdr->log2_big_block_size);
374     cli_dbgmsg("Small Block Size:\t%i\n", hdr->log2_small_block_size);
375     cli_dbgmsg("BAT count:\t\t%d\n", hdr->bat_count);
376     cli_dbgmsg("Prop start:\t\t%d\n", hdr->prop_start);
377     cli_dbgmsg("SBAT cutoff:\t\t%d\n", hdr->sbat_cutoff);
378     cli_dbgmsg("SBat start:\t\t%d\n", hdr->sbat_start);
379     cli_dbgmsg("SBat block count:\t%d\n", hdr->sbat_block_count);
380     cli_dbgmsg("XBat start:\t\t%d\n", hdr->xbat_start);
381     cli_dbgmsg("XBat block count:\t%d\n", hdr->xbat_count);
382     cli_dbgmsg("\n");
383     return;
384 }
385 
386 static int
ole2_read_block(ole2_header_t * hdr,void * buff,unsigned int size,int32_t blockno)387 ole2_read_block(ole2_header_t *hdr, void *buff, unsigned int size, int32_t blockno)
388 {
389     off_t offset, offend;
390     const void *pblock;
391 
392     if (blockno < 0) {
393         return FALSE;
394     }
395     /* other methods: (blockno+1) * 512 or (blockno * block_size) + 512; */
396     if (((uint64_t)blockno << hdr->log2_big_block_size) < (INT32_MAX - MAX(512, (uint64_t)1 << hdr->log2_big_block_size))) {
397         /* 512 is header size */
398         offset = (blockno << hdr->log2_big_block_size) + MAX(512, 1 << hdr->log2_big_block_size);
399         offend = offset + size;
400     } else {
401         offset = INT32_MAX - size;
402         offend = INT32_MAX;
403     }
404 
405     if ((offend <= 0) || (offset < 0) || (offset >= hdr->m_length)) {
406         return FALSE;
407     } else if (offend > hdr->m_length) {
408         /* bb#11369 - ole2 files may not be a block multiple in size */
409         memset(buff, 0, size);
410         size = hdr->m_length - offset;
411     }
412     if (!(pblock = fmap_need_off_once(hdr->map, offset, size))) {
413         return FALSE;
414     }
415     memcpy(buff, pblock, size);
416     return TRUE;
417 }
418 
419 static int32_t
ole2_get_next_bat_block(ole2_header_t * hdr,int32_t current_block)420 ole2_get_next_bat_block(ole2_header_t *hdr, int32_t current_block)
421 {
422     int32_t bat_array_index;
423     uint32_t bat[128];
424 
425     if (current_block < 0) {
426         return -1;
427     }
428     bat_array_index = current_block / 128;
429     if (bat_array_index > hdr->bat_count) {
430         cli_dbgmsg("bat_array index error\n");
431         return -10;
432     }
433     if (!ole2_read_block(hdr, &bat, 512,
434                          ole2_endian_convert_32(hdr->bat_array[bat_array_index]))) {
435         return -1;
436     }
437     return ole2_endian_convert_32(bat[current_block - (bat_array_index * 128)]);
438 }
439 
440 static int32_t
ole2_get_next_xbat_block(ole2_header_t * hdr,int32_t current_block)441 ole2_get_next_xbat_block(ole2_header_t *hdr, int32_t current_block)
442 {
443     int32_t xbat_index, xbat_block_index, bat_index, bat_blockno;
444     uint32_t xbat[128], bat[128];
445 
446     if (current_block < 0) {
447         return -1;
448     }
449     xbat_index = current_block / 128;
450 
451     /*
452      * NB:	The last entry in each XBAT points to the next XBAT block.
453      * This reduces the number of entries in each block by 1.
454      */
455     xbat_block_index = (xbat_index - 109) / 127;
456     bat_blockno      = (xbat_index - 109) % 127;
457 
458     bat_index = current_block % 128;
459 
460     if (!ole2_read_block(hdr, &xbat, 512, hdr->xbat_start)) {
461         return -1;
462     }
463     /* Follow the chain of XBAT blocks */
464     while (xbat_block_index > 0) {
465         if (!ole2_read_block(hdr, &xbat, 512,
466                              ole2_endian_convert_32(xbat[127]))) {
467             return -1;
468         }
469         xbat_block_index--;
470     }
471 
472     if (!ole2_read_block(hdr, &bat, 512, ole2_endian_convert_32(xbat[bat_blockno]))) {
473         return -1;
474     }
475     return ole2_endian_convert_32(bat[bat_index]);
476 }
477 
478 static int32_t
ole2_get_next_block_number(ole2_header_t * hdr,int32_t current_block)479 ole2_get_next_block_number(ole2_header_t *hdr, int32_t current_block)
480 {
481     if (current_block < 0) {
482         return -1;
483     }
484     if ((current_block / 128) > 108) {
485         return ole2_get_next_xbat_block(hdr, current_block);
486     } else {
487         return ole2_get_next_bat_block(hdr, current_block);
488     }
489 }
490 
491 static int32_t
ole2_get_next_sbat_block(ole2_header_t * hdr,int32_t current_block)492 ole2_get_next_sbat_block(ole2_header_t *hdr, int32_t current_block)
493 {
494     int32_t iter, current_bat_block;
495     uint32_t sbat[128];
496 
497     if (current_block < 0) {
498         return -1;
499     }
500     current_bat_block = hdr->sbat_start;
501     iter              = current_block / 128;
502     while (iter > 0) {
503         current_bat_block = ole2_get_next_block_number(hdr, current_bat_block);
504         iter--;
505     }
506     if (!ole2_read_block(hdr, &sbat, 512, current_bat_block)) {
507         return -1;
508     }
509     return ole2_endian_convert_32(sbat[current_block % 128]);
510 }
511 
512 /* Retrieve the block containing the data for the given sbat index */
513 static int32_t
ole2_get_sbat_data_block(ole2_header_t * hdr,void * buff,int32_t sbat_index)514 ole2_get_sbat_data_block(ole2_header_t *hdr, void *buff, int32_t sbat_index)
515 {
516     int32_t block_count, current_block;
517 
518     if (sbat_index < 0) {
519         return FALSE;
520     }
521     if (hdr->sbat_root_start < 0) {
522         cli_dbgmsg("No root start block\n");
523         return FALSE;
524     }
525     block_count   = sbat_index / (1 << (hdr->log2_big_block_size - hdr->log2_small_block_size));
526     current_block = hdr->sbat_root_start;
527     while (block_count > 0) {
528         current_block = ole2_get_next_block_number(hdr, current_block);
529         block_count--;
530     }
531 
532     /*
533      * current_block now contains the block number of the sbat array
534      * containing the entry for the required small block
535      */
536 
537     return (ole2_read_block(hdr, buff, 1 << hdr->log2_big_block_size, current_block));
538 }
539 
540 static int
ole2_walk_property_tree(ole2_header_t * hdr,const char * dir,int32_t prop_index,int (* handler)(ole2_header_t * hdr,property_t * prop,const char * dir,cli_ctx * ctx),unsigned int rec_level,unsigned int * file_count,cli_ctx * ctx,unsigned long * scansize)541 ole2_walk_property_tree(ole2_header_t *hdr, const char *dir, int32_t prop_index,
542                         int (*handler)(ole2_header_t *hdr, property_t *prop, const char *dir, cli_ctx *ctx),
543                         unsigned int rec_level, unsigned int *file_count, cli_ctx *ctx, unsigned long *scansize)
544 {
545     property_t prop_block[4];
546     int32_t idx, current_block, i, curindex;
547     char *dirname;
548     ole2_list_t node_list;
549     int ret, func_ret;
550 #if HAVE_JSON
551     char *name;
552     int toval = 0;
553 #endif
554 
555     ole2_listmsg("ole2_walk_property_tree() called\n");
556     func_ret = CL_SUCCESS;
557     ole2_list_init(&node_list);
558 
559     ole2_listmsg("rec_level: %d\n", rec_level);
560     ole2_listmsg("file_count: %d\n", *file_count);
561 
562     if ((rec_level > 100) || (*file_count > 100000)) {
563         return CL_SUCCESS;
564     }
565 
566     if (ctx && ctx->engine->max_recursion_level && (rec_level > ctx->engine->max_recursion_level)) {
567         // Note: engine->max_recursion_level is re-purposed here out of convenience.
568         //       ole2 recursion does not leverage the ctx->recursion_stack stack.
569         cli_dbgmsg("OLE2: Recursion limit reached (max: %d)\n", ctx->engine->max_recursion_level);
570         cli_append_virus_if_heur_exceedsmax(ctx, "Heuristics.Limits.Exceeded.MaxRecursion");
571         return CL_EMAXREC;
572     }
573 
574     //push the 'root' node for the level onto the local list
575     if ((ret = ole2_list_push(&node_list, prop_index)) != CL_SUCCESS) {
576         ole2_list_delete(&node_list);
577         return ret;
578     }
579 
580     while (!ole2_list_is_empty(&node_list)) {
581         ole2_listmsg("within working loop, worklist size: %d\n", ole2_list_size(&node_list));
582 #if HAVE_JSON
583         if (cli_json_timeout_cycle_check(ctx, &toval) != CL_SUCCESS) {
584             ole2_list_delete(&node_list);
585             return CL_ETIMEOUT;
586         }
587 #endif
588 
589         current_block = hdr->prop_start;
590 
591         //pop off a node to work on
592         curindex = ole2_list_pop(&node_list);
593         ole2_listmsg("current index: %d\n", curindex);
594         if ((curindex < 0) || (curindex > (int32_t)hdr->max_block_no)) {
595             continue;
596         }
597         //read in the sector referenced by the current index
598         idx = curindex / 4;
599         for (i = 0; i < idx; i++) {
600             current_block = ole2_get_next_block_number(hdr, current_block);
601             if (current_block < 0) {
602                 continue;
603             }
604         }
605         idx = curindex % 4;
606         if (!ole2_read_block(hdr, prop_block, 512, current_block)) {
607             continue;
608         }
609         if (prop_block[idx].type <= 0) {
610             continue;
611         }
612         ole2_listmsg("reading prop block\n");
613 
614         prop_block[idx].name_size       = ole2_endian_convert_16(prop_block[idx].name_size);
615         prop_block[idx].prev            = ole2_endian_convert_32(prop_block[idx].prev);
616         prop_block[idx].next            = ole2_endian_convert_32(prop_block[idx].next);
617         prop_block[idx].child           = ole2_endian_convert_32(prop_block[idx].child);
618         prop_block[idx].user_flags      = ole2_endian_convert_32(prop_block[idx].user_flags);
619         prop_block[idx].create_lowdate  = ole2_endian_convert_32(prop_block[idx].create_lowdate);
620         prop_block[idx].create_highdate = ole2_endian_convert_32(prop_block[idx].create_highdate);
621         prop_block[idx].mod_lowdate     = ole2_endian_convert_32(prop_block[idx].mod_lowdate);
622         prop_block[idx].mod_highdate    = ole2_endian_convert_32(prop_block[idx].mod_highdate);
623         prop_block[idx].start_block     = ole2_endian_convert_32(prop_block[idx].start_block);
624         prop_block[idx].size            = ole2_endian_convert_32(prop_block[idx].size);
625 
626         ole2_listmsg("printing ole2 property\n");
627         if (dir)
628             print_ole2_property(&prop_block[idx]);
629 
630         ole2_listmsg("checking bitset\n");
631         /* Check we aren't in a loop */
632         if (cli_bitset_test(hdr->bitset, (unsigned long)curindex)) {
633             /* Loop in property tree detected */
634             cli_dbgmsg("OLE2: Property tree loop detected at index %d\n", curindex);
635             ole2_list_delete(&node_list);
636             return CL_BREAK;
637         }
638         ole2_listmsg("setting bitset\n");
639         if (!cli_bitset_set(hdr->bitset, (unsigned long)curindex)) {
640             continue;
641         }
642         ole2_listmsg("prev: %d next %d child %d\n", prop_block[idx].prev, prop_block[idx].next, prop_block[idx].child);
643 
644         ole2_listmsg("node type: %d\n", prop_block[idx].type);
645         switch (prop_block[idx].type) {
646             case 5: /* Root Entry */
647                 ole2_listmsg("root node\n");
648                 if ((curindex != 0) || (rec_level != 0) ||
649                     (*file_count != 0)) {
650                     /* Can only have RootEntry as the top */
651                     cli_dbgmsg("ERROR: illegal Root Entry\n");
652                     continue;
653                 }
654                 hdr->sbat_root_start = prop_block[idx].start_block;
655                 if ((int)(prop_block[idx].child) != -1) {
656                     ret = ole2_walk_property_tree(hdr, dir, prop_block[idx].child, handler, rec_level + 1, file_count, ctx, scansize);
657                     if (ret != CL_SUCCESS) {
658                         if (SCAN_ALLMATCHES && (ret == CL_VIRUS)) {
659                             func_ret = ret;
660                         } else {
661                             ole2_list_delete(&node_list);
662                             return ret;
663                         }
664                     }
665                 }
666                 if ((int)(prop_block[idx].prev) != -1) {
667                     if ((ret = ole2_list_push(&node_list, prop_block[idx].prev)) != CL_SUCCESS) {
668                         ole2_list_delete(&node_list);
669                         return ret;
670                     }
671                 }
672                 if ((int)(prop_block[idx].next) != -1) {
673                     if ((ret = ole2_list_push(&node_list, prop_block[idx].next)) != CL_SUCCESS) {
674                         ole2_list_delete(&node_list);
675                         return ret;
676                     }
677                 }
678                 break;
679             case 2: /* File */
680                 ole2_listmsg("file node\n");
681                 if (ctx && ctx->engine->maxfiles && ((*file_count > ctx->engine->maxfiles) || (ctx->scannedfiles > ctx->engine->maxfiles - *file_count))) {
682                     cli_dbgmsg("OLE2: files limit reached (max: %u)\n", ctx->engine->maxfiles);
683                     cli_append_virus_if_heur_exceedsmax(ctx, "Heuristics.Limits.Exceeded.MaxFiles");
684                     ole2_list_delete(&node_list);
685                     return CL_EMAXFILES;
686                 }
687                 if (!ctx || !(ctx->engine->maxfilesize) || prop_block[idx].size <= ctx->engine->maxfilesize || prop_block[idx].size <= *scansize) {
688                     (*file_count)++;
689                     *scansize -= prop_block[idx].size;
690                     ole2_listmsg("running file handler\n");
691                     ret = handler(hdr, &prop_block[idx], dir, ctx);
692                     if (ret != CL_SUCCESS) {
693                         if (SCAN_ALLMATCHES && (ret == CL_VIRUS)) {
694                             func_ret = ret;
695                         } else {
696                             ole2_listmsg("file handler returned %d\n", ret);
697                             ole2_list_delete(&node_list);
698                             return ret;
699                         }
700                     }
701                 } else {
702                     cli_dbgmsg("OLE2: filesize exceeded\n");
703                 }
704                 if ((int)(prop_block[idx].child) != -1) {
705                     ret = ole2_walk_property_tree(hdr, dir, prop_block[idx].child, handler, rec_level, file_count, ctx, scansize);
706                     if (ret != CL_SUCCESS) {
707                         if (SCAN_ALLMATCHES && (ret == CL_VIRUS)) {
708                             func_ret = ret;
709                         } else {
710                             ole2_list_delete(&node_list);
711                             return ret;
712                         }
713                     }
714                 }
715                 if ((int)(prop_block[idx].prev) != -1) {
716                     if ((ret = ole2_list_push(&node_list, prop_block[idx].prev)) != CL_SUCCESS) {
717                         ole2_list_delete(&node_list);
718                         return ret;
719                     }
720                 }
721                 if ((int)(prop_block[idx].next) != -1) {
722                     if ((ret = ole2_list_push(&node_list, prop_block[idx].next)) != CL_SUCCESS) {
723                         ole2_list_delete(&node_list);
724                         return ret;
725                     }
726                 }
727                 break;
728             case 1: /* Directory */
729                 ole2_listmsg("directory node\n");
730                 if (dir) {
731 #if HAVE_JSON
732                     if (SCAN_COLLECT_METADATA && (ctx->wrkproperty != NULL)) {
733                         if (!json_object_object_get_ex(ctx->wrkproperty, "DigitalSignatures", NULL)) {
734                             name = cli_ole2_get_property_name2(prop_block[idx].name, prop_block[idx].name_size);
735                             if (name) {
736                                 if (!strcmp(name, "_xmlsignatures") || !strcmp(name, "_signatures")) {
737                                     cli_jsonbool(ctx->wrkproperty, "HasDigitalSignatures", 1);
738                                 }
739                                 free(name);
740                             }
741                         }
742                     }
743 #endif
744                     dirname = (char *)cli_malloc(strlen(dir) + 8);
745                     if (!dirname) {
746                         ole2_listmsg("OLE2: malloc failed for dirname\n");
747                         ole2_list_delete(&node_list);
748                         return CL_EMEM;
749                     }
750                     snprintf(dirname, strlen(dir) + 8, "%s" PATHSEP "%.6d", dir, curindex);
751                     if (mkdir(dirname, 0700) != 0) {
752                         ole2_listmsg("OLE2: mkdir failed for directory %s\n", dirname);
753                         free(dirname);
754                         ole2_list_delete(&node_list);
755                         return CL_BREAK;
756                     }
757                     cli_dbgmsg("OLE2 dir entry: %s\n", dirname);
758                 } else
759                     dirname = NULL;
760                 if ((int)(prop_block[idx].child) != -1) {
761                     ret = ole2_walk_property_tree(hdr, dirname, prop_block[idx].child, handler, rec_level + 1, file_count, ctx, scansize);
762                     if (ret != CL_SUCCESS) {
763                         if (SCAN_ALLMATCHES && (ret == CL_VIRUS)) {
764                             func_ret = ret;
765                         } else {
766                             ole2_list_delete(&node_list);
767                             if (dirname)
768                                 free(dirname);
769                             return ret;
770                         }
771                     }
772                 }
773                 if (dirname) {
774                     free(dirname);
775                     dirname = NULL;
776                 }
777                 if ((int)(prop_block[idx].prev) != -1) {
778                     if ((ret = ole2_list_push(&node_list, prop_block[idx].prev)) != CL_SUCCESS) {
779                         ole2_list_delete(&node_list);
780                         return ret;
781                     }
782                 }
783                 if ((int)(prop_block[idx].next) != -1) {
784                     if ((ret = ole2_list_push(&node_list, prop_block[idx].next)) != CL_SUCCESS) {
785                         ole2_list_delete(&node_list);
786                         return ret;
787                     }
788                 }
789                 break;
790             default:
791                 cli_dbgmsg("ERROR: unknown OLE2 entry type: %d\n", prop_block[idx].type);
792                 break;
793         }
794         ole2_listmsg("loop ended: %d %d\n", ole2_list_size(&node_list), ole2_list_is_empty(&node_list));
795     }
796     ole2_list_delete(&node_list);
797     return func_ret;
798 }
799 
800 /* Write file Handler - write the contents of the entry to a file */
801 static int
handler_writefile(ole2_header_t * hdr,property_t * prop,const char * dir,cli_ctx * ctx)802 handler_writefile(ole2_header_t *hdr, property_t *prop, const char *dir, cli_ctx *ctx)
803 {
804     unsigned char *buff;
805     int32_t current_block, ofd;
806     size_t len, offset;
807     char *name, newname[1024];
808     bitset_t *blk_bitset;
809     char *hash;
810     uint32_t cnt;
811 
812     UNUSEDPARAM(ctx);
813 
814     if (prop->type != 2) {
815         /* Not a file */
816         return CL_SUCCESS;
817     }
818     if (prop->name_size > 64) {
819         cli_dbgmsg("OLE2 [handler_writefile]: property name too long: %d\n", prop->name_size);
820         return CL_SUCCESS;
821     }
822     name = cli_ole2_get_property_name2(prop->name, prop->name_size);
823     if (name) {
824         cli_dbgmsg("Storing %s in uniq\n", name);
825         if (CL_SUCCESS != uniq_add(hdr->U, name, strlen(name), &hash, &cnt)) {
826             free(name);
827             cli_dbgmsg("OLE2 [handler_writefile]: too many property names added to uniq store.\n");
828             return CL_BREAK;
829         }
830     } else {
831         if (CL_SUCCESS != uniq_add(hdr->U, NULL, 0, &hash, &cnt)) {
832             cli_dbgmsg("OLE2 [handler_writefile]: too many property names added to uniq store.\n");
833             return CL_BREAK;
834         }
835     }
836     snprintf(newname, sizeof(newname), "%s" PATHSEP "%s_%u", dir, hash, cnt);
837     newname[sizeof(newname) - 1] = '\0';
838     cli_dbgmsg("OLE2 [handler_writefile]: Dumping '%s' to '%s'\n", name ? name : "<empty>", newname);
839     if (name)
840         free(name);
841 
842     ofd = open(newname, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR);
843     if (ofd < 0) {
844         cli_errmsg("OLE2 [handler_writefile]: failed to create file: %s\n", newname);
845         return CL_SUCCESS;
846     }
847     current_block = prop->start_block;
848     len           = prop->size;
849 
850     buff = (unsigned char *)cli_malloc(1 << hdr->log2_big_block_size);
851     if (!buff) {
852         cli_errmsg("OLE2 [handler_writefile]: Unable to allocate memory for buff: %u\n", 1 << hdr->log2_big_block_size);
853         close(ofd);
854         return CL_BREAK;
855     }
856     blk_bitset = cli_bitset_init();
857     if (!blk_bitset) {
858         cli_errmsg("OLE2 [handler_writefile]: init bitset failed\n");
859         close(ofd);
860         free(buff);
861         return CL_BREAK;
862     }
863     while ((current_block >= 0) && (len > 0)) {
864         if (current_block > (int32_t)hdr->max_block_no) {
865             cli_dbgmsg("OLE2 [handler_writefile]: Max block number for file size exceeded: %d\n", current_block);
866             close(ofd);
867             free(buff);
868             cli_bitset_free(blk_bitset);
869             return CL_SUCCESS;
870         }
871         /* Check we aren't in a loop */
872         if (cli_bitset_test(blk_bitset, (unsigned long)current_block)) {
873             /* Loop in block list */
874             cli_dbgmsg("OLE2 [handler_writefile]: Block list loop detected\n");
875             close(ofd);
876             free(buff);
877             cli_bitset_free(blk_bitset);
878             return CL_BREAK;
879         }
880         if (!cli_bitset_set(blk_bitset, (unsigned long)current_block)) {
881             close(ofd);
882             free(buff);
883             cli_bitset_free(blk_bitset);
884             return CL_BREAK;
885         }
886         if (prop->size < (int64_t)hdr->sbat_cutoff) {
887             /* Small block file */
888             if (!ole2_get_sbat_data_block(hdr, buff, current_block)) {
889                 cli_dbgmsg("OLE2 [handler_writefile]: ole2_get_sbat_data_block failed\n");
890                 close(ofd);
891                 free(buff);
892                 cli_bitset_free(blk_bitset);
893                 return CL_SUCCESS;
894             }
895             /* buff now contains the block with N small blocks in it */
896             offset = (1 << hdr->log2_small_block_size) * (current_block % (1 << (hdr->log2_big_block_size - hdr->log2_small_block_size)));
897 
898             if (cli_writen(ofd, &buff[offset], MIN(len, 1 << hdr->log2_small_block_size)) != MIN(len, 1 << hdr->log2_small_block_size)) {
899                 close(ofd);
900                 free(buff);
901                 cli_bitset_free(blk_bitset);
902                 return CL_BREAK;
903             }
904             len -= MIN(len, 1 << hdr->log2_small_block_size);
905             current_block = ole2_get_next_sbat_block(hdr, current_block);
906         } else {
907             /* Big block file */
908             if (!ole2_read_block(hdr, buff, 1 << hdr->log2_big_block_size, current_block)) {
909                 close(ofd);
910                 free(buff);
911                 cli_bitset_free(blk_bitset);
912                 return CL_SUCCESS;
913             }
914             if (cli_writen(ofd, buff, MIN(len, (1 << hdr->log2_big_block_size))) != MIN(len, (1 << hdr->log2_big_block_size))) {
915                 close(ofd);
916                 free(buff);
917                 cli_bitset_free(blk_bitset);
918                 return CL_BREAK;
919             }
920             current_block = ole2_get_next_block_number(hdr, current_block);
921             len -= MIN(len, (1 << hdr->log2_big_block_size));
922         }
923     }
924     close(ofd);
925     free(buff);
926     cli_bitset_free(blk_bitset);
927     return CL_SUCCESS;
928 }
929 
930 enum biff_parser_states {
931     BIFF_PARSER_INITIAL,
932     BIFF_PARSER_EXPECTING_2ND_TAG_BYTE,
933     BIFF_PARSER_EXPECTING_1ST_LENGTH_BYTE,
934     BIFF_PARSER_EXPECTING_2ND_LENGTH_BYTE,
935     BIFF_PARSER_NAME_RECORD,
936     BIFF_PARSER_BOUNDSHEET_RECORD,
937     BIFF_PARSER_DATA,
938 };
939 
940 struct biff_parser_state {
941     enum biff_parser_states state;
942     uint16_t opcode;
943     uint16_t length;
944     uint16_t data_offset;
945     uint8_t tmp;
946 };
947 
948 /**
949  * Scan through a buffer of BIFF records and find PARSERNAME, BOUNDSHEET records (Which indicate XLM  macros).
950  * BIFF streams follow the format OOLLDDDDDDDDD..., where OO is the opcode (little endian 16 bit value),
951  * LL is the data length (little endian 16 bit value), followed by LL bytes of data. Records are defined in
952  * the MICROSOFT OFFICE EXCEL 97-2007 BINARY FILE FORMAT SPECIFICATION.
953  *
954  * \param state The parser state.
955  * \param buff The buffer.
956  * \param len The buffer's size in bytes.
957  * \param ctx The ClamAV context for emitting JSON about the document.
958  * \returns true if a macro has been found, false otherwise.
959  */
960 static bool
scan_biff_for_xlm_macros(struct biff_parser_state * state,unsigned char * buff,size_t len,cli_ctx * ctx)961 scan_biff_for_xlm_macros(struct biff_parser_state *state, unsigned char *buff, size_t len, cli_ctx *ctx)
962 {
963     size_t i;
964     bool found_macro = false;
965 
966     for (i = 0; i < len; ++i) {
967         switch (state->state) {
968             case BIFF_PARSER_INITIAL:
969                 state->opcode = buff[i];
970                 state->state  = BIFF_PARSER_EXPECTING_2ND_TAG_BYTE;
971                 break;
972             case BIFF_PARSER_EXPECTING_2ND_TAG_BYTE:
973                 state->opcode |= buff[i] << 8;
974                 state->state = BIFF_PARSER_EXPECTING_1ST_LENGTH_BYTE;
975                 break;
976             case BIFF_PARSER_EXPECTING_1ST_LENGTH_BYTE:
977                 state->length = buff[i];
978                 state->state  = BIFF_PARSER_EXPECTING_2ND_LENGTH_BYTE;
979                 break;
980             case BIFF_PARSER_EXPECTING_2ND_LENGTH_BYTE:
981                 state->length |= buff[i] << 8;
982                 state->data_offset = 0;
983                 switch (state->opcode) {
984                     case 0x85:
985                         state->state = BIFF_PARSER_BOUNDSHEET_RECORD;
986                         break;
987                     case 0x18:
988                         state->state = BIFF_PARSER_NAME_RECORD;
989                         break;
990                     default:
991                         state->state = BIFF_PARSER_DATA;
992                         break;
993                 }
994                 if (state->length == 0) {
995                     state->state = BIFF_PARSER_INITIAL;
996                 }
997                 break;
998             default:
999                 switch (state->state) {
1000                     case BIFF_PARSER_NAME_RECORD:
1001 #if HAVE_JSON
1002                         if (state->data_offset == 0) {
1003                             state->tmp = buff[i] & 0x20;
1004                         } else if ((state->data_offset == 14 || state->data_offset == 15) && state->tmp) {
1005                             if (buff[i] == 1 || buff[i] == 2) {
1006                                 if (SCAN_COLLECT_METADATA && (ctx->wrkproperty != NULL)) {
1007                                     json_object *indicators = cli_jsonarray(ctx->wrkproperty, "MacroIndicators");
1008                                     if (indicators) {
1009                                         cli_jsonstr(indicators, NULL, "autorun");
1010                                     } else {
1011                                         cli_dbgmsg("[scan_biff_for_xlm_macros] Failed to add \"autorun\" entry to MacroIndicators JSON array\n");
1012                                     }
1013                                 }
1014                             }
1015 
1016                             if (buff[i] != 0) {
1017                                 state->tmp = 0;
1018                             }
1019                         }
1020 #endif
1021                         break;
1022                     case BIFF_PARSER_BOUNDSHEET_RECORD:
1023                         if (state->data_offset == 4) {
1024                             state->tmp = buff[i];
1025                         } else if (state->data_offset == 5 && buff[i] == 1) { //Excel 4.0 macro sheet
1026                             cli_dbgmsg("[scan_biff_for_xlm_macros] Found XLM macro sheet\n");
1027 #if HAVE_JSON
1028                             if (SCAN_COLLECT_METADATA && (ctx->wrkproperty != NULL)) {
1029                                 cli_jsonbool(ctx->wrkproperty, "HasMacros", 1);
1030                                 json_object *macro_languages = cli_jsonarray(ctx->wrkproperty, "MacroLanguages");
1031                                 if (macro_languages) {
1032                                     cli_jsonstr(macro_languages, NULL, "XLM");
1033                                 } else {
1034                                     cli_dbgmsg("[scan_biff_for_xlm_macros] Failed to add \"XLM\" entry to MacroLanguages JSON array\n");
1035                                 }
1036                                 if (state->tmp == 1 || state->tmp == 2) {
1037                                     json_object *indicators = cli_jsonarray(ctx->wrkproperty, "MacroIndicators");
1038                                     if (indicators) {
1039                                         cli_jsonstr(indicators, NULL, "hidden");
1040                                     } else {
1041                                         cli_dbgmsg("[scan_biff_for_xlm_macros] Failed to add \"hidden\" entry to MacroIndicators JSON array\n");
1042                                     }
1043                                 }
1044                             }
1045 #endif
1046                             found_macro = true;
1047                         }
1048                         break;
1049                     case BIFF_PARSER_DATA:
1050                         break;
1051                     default:
1052                         //Should never arrive here
1053                         cli_dbgmsg("[scan_biff_for_xlm_macros] Unexpected state value %d\n", (int)state->state);
1054                         break;
1055                 }
1056                 state->data_offset += 1;
1057 
1058                 if (state->data_offset >= state->length) {
1059                     state->state = BIFF_PARSER_INITIAL;
1060                 }
1061         }
1062     }
1063     return found_macro;
1064 }
1065 
1066 /**
1067  * Scan for XLM (Excel 4.0) macro sheets in an OLE2 Workbook stream.
1068  * The stream should be encoded with <= BIFF8.
1069  */
1070 static int
scan_for_xlm_macros(ole2_header_t * hdr,property_t * prop,const char * dir,cli_ctx * ctx)1071 scan_for_xlm_macros(ole2_header_t *hdr, property_t *prop, const char *dir, cli_ctx *ctx)
1072 {
1073     unsigned char *buff = NULL;
1074     int32_t current_block;
1075     size_t len, offset;
1076     bitset_t *blk_bitset = NULL;
1077     struct biff_parser_state state;
1078     bool found_macro = false;
1079 
1080     UNUSEDPARAM(ctx);
1081     UNUSEDPARAM(dir);
1082 
1083     if (prop->type != 2) {
1084         /* Not a file */
1085         goto done;
1086     }
1087 
1088     state.state   = BIFF_PARSER_INITIAL;
1089     state.length  = 0;
1090     current_block = prop->start_block;
1091     len           = prop->size;
1092 
1093     buff = (unsigned char *)cli_malloc(1 << hdr->log2_big_block_size);
1094     if (!buff) {
1095         cli_errmsg("OLE2 [scan_for_xlm_macros]: Unable to allocate memory for buff: %u\n", 1 << hdr->log2_big_block_size);
1096         goto done;
1097     }
1098     blk_bitset = cli_bitset_init();
1099     if (!blk_bitset) {
1100         cli_errmsg("OLE2 [scan_for_xlm_macros]: init bitset failed\n");
1101         goto done;
1102     }
1103     while ((current_block >= 0) && (len > 0)) {
1104         if (current_block > (int32_t)hdr->max_block_no) {
1105             cli_dbgmsg("OLE2 [scan_for_xlm_macros]: Max block number for file size exceeded: %d\n", current_block);
1106             goto done;
1107         }
1108         /* Check we aren't in a loop */
1109         if (cli_bitset_test(blk_bitset, (unsigned long)current_block)) {
1110             /* Loop in block list */
1111             cli_dbgmsg("OLE2 [scan_for_xlm_macros]: Block list loop detected\n");
1112             goto done;
1113         }
1114         if (!cli_bitset_set(blk_bitset, (unsigned long)current_block)) {
1115             goto done;
1116         }
1117         if (prop->size < (int64_t)hdr->sbat_cutoff) {
1118             /* Small block file */
1119             if (!ole2_get_sbat_data_block(hdr, buff, current_block)) {
1120                 cli_dbgmsg("OLE2 [scan_for_xlm_macros]: ole2_get_sbat_data_block failed\n");
1121                 goto done;
1122             }
1123             /* buff now contains the block with N small blocks in it */
1124             offset = (1 << hdr->log2_small_block_size) * (current_block % (1 << (hdr->log2_big_block_size - hdr->log2_small_block_size)));
1125 
1126             found_macro = scan_biff_for_xlm_macros(&state, &buff[offset], MIN(len, 1 << hdr->log2_small_block_size), ctx) || found_macro;
1127             len -= MIN(len, 1 << hdr->log2_small_block_size);
1128             current_block = ole2_get_next_sbat_block(hdr, current_block);
1129         } else {
1130             /* Big block file */
1131             if (!ole2_read_block(hdr, buff, 1 << hdr->log2_big_block_size, current_block)) {
1132                 goto done;
1133             }
1134 
1135             found_macro   = scan_biff_for_xlm_macros(&state, buff, MIN(len, (1 << hdr->log2_big_block_size)), ctx) || found_macro;
1136             current_block = ole2_get_next_block_number(hdr, current_block);
1137             len -= MIN(len, (1 << hdr->log2_big_block_size));
1138         }
1139     }
1140 
1141 done:
1142     if (buff) {
1143         free(buff);
1144     }
1145     if (blk_bitset) {
1146         cli_bitset_free(blk_bitset);
1147     }
1148     return found_macro;
1149 }
1150 
1151 /* enum file Handler - checks for VBA presence */
1152 static int
handler_enum(ole2_header_t * hdr,property_t * prop,const char * dir,cli_ctx * ctx)1153 handler_enum(ole2_header_t *hdr, property_t *prop, const char *dir, cli_ctx *ctx)
1154 {
1155     char *name = NULL;
1156     unsigned char *hwp_check;
1157     int32_t offset;
1158     int ret = CL_SUCCESS;
1159 #if HAVE_JSON
1160     json_object *arrobj, *strmobj;
1161 
1162     name = cli_ole2_get_property_name2(prop->name, prop->name_size);
1163     if (name) {
1164         if (SCAN_COLLECT_METADATA && ctx->wrkproperty != NULL) {
1165             arrobj = cli_jsonarray(ctx->wrkproperty, "Streams");
1166             if (NULL == arrobj) {
1167                 cli_warnmsg("ole2: no memory for streams list or streams is not an array\n");
1168             } else {
1169                 strmobj = json_object_new_string(name);
1170                 json_object_array_add(arrobj, strmobj);
1171             }
1172 
1173             if (!strcmp(name, "powerpoint document")) {
1174                 cli_jsonstr(ctx->wrkproperty, "FileType", "CL_TYPE_MSPPT");
1175             }
1176             if (!strcmp(name, "worddocument")) {
1177                 cli_jsonstr(ctx->wrkproperty, "FileType", "CL_TYPE_MSWORD");
1178             }
1179             if (!strcmp(name, "workbook")) {
1180                 cli_jsonstr(ctx->wrkproperty, "FileType", "CL_TYPE_MSXL");
1181             }
1182         }
1183     }
1184 #else
1185     UNUSEDPARAM(ctx);
1186 #endif
1187     UNUSEDPARAM(dir);
1188 
1189     if (!hdr->has_vba) {
1190         if (!name)
1191             name = cli_ole2_get_property_name2(prop->name, prop->name_size);
1192         if (name) {
1193             if (!strcmp(name, "_vba_project") || !strcmp(name, "powerpoint document") || !strcmp(name, "worddocument") || !strcmp(name, "_1_ole10native"))
1194                 hdr->has_vba = 1;
1195         }
1196     }
1197 
1198     /*
1199      * if we can find a root entry fileheader, it may be a HWP file
1200      * identify the HWP signature "HWP Document File" at offset 0 stream
1201      */
1202     if (!hdr->is_hwp) {
1203         if (!name)
1204             name = cli_ole2_get_property_name2(prop->name, prop->name_size);
1205         if (name) {
1206             if (!strcmp(name, "fileheader")) {
1207                 hwp_check = (unsigned char *)cli_calloc(1, 1 << hdr->log2_big_block_size);
1208                 if (!hwp_check) {
1209                     free(name);
1210                     return CL_EMEM;
1211                 }
1212 
1213                 /* reading safety checks; do-while used for breaks */
1214                 do {
1215                     if (prop->size == 0)
1216                         break;
1217 
1218                     if (prop->start_block > hdr->max_block_no)
1219                         break;
1220 
1221                     /* read the header block (~256 bytes) */
1222                     offset = 0;
1223                     if (prop->size < (int64_t)hdr->sbat_cutoff) {
1224                         if (!ole2_get_sbat_data_block(hdr, hwp_check, prop->start_block)) {
1225                             ret = CL_EREAD;
1226                             break;
1227                         }
1228                         offset = (1 << hdr->log2_small_block_size) *
1229                                  (prop->start_block % (1 << (hdr->log2_big_block_size - hdr->log2_small_block_size)));
1230 
1231                         /* reading safety */
1232                         if (offset + 40 >= 1 << hdr->log2_big_block_size)
1233                             break;
1234                     } else {
1235                         if (!ole2_read_block(hdr, hwp_check, 1 << hdr->log2_big_block_size, prop->start_block)) {
1236                             ret = CL_EREAD;
1237                             break;
1238                         }
1239                     }
1240 
1241                     /* compare against HWP signature; we could add the 15 padding NULLs too */
1242                     if (!memcmp(hwp_check + offset, "HWP Document File", 17)) {
1243                         hwp5_header_t *hwp_new;
1244 #if HAVE_JSON
1245                         cli_jsonstr(ctx->wrkproperty, "FileType", "CL_TYPE_HWP5");
1246 #endif
1247                         hwp_new = cli_calloc(1, sizeof(hwp5_header_t));
1248                         if (!(hwp_new)) {
1249                             ret = CL_EMEM;
1250                             break;
1251                         }
1252 
1253                         memcpy(hwp_new, hwp_check + offset, sizeof(hwp5_header_t));
1254 
1255                         hwp_new->version = ole2_endian_convert_32(hwp_new->version);
1256                         hwp_new->flags   = ole2_endian_convert_32(hwp_new->flags);
1257 
1258                         hdr->is_hwp = hwp_new;
1259                     }
1260                 } while (0);
1261 
1262                 free(hwp_check);
1263             }
1264         }
1265     }
1266 
1267     if (!hdr->has_xlm) {
1268         if (!name) {
1269             name = cli_ole2_get_property_name2(prop->name, prop->name_size);
1270         }
1271 
1272         if (name && (strcmp(name, "workbook") == 0 || strcmp(name, "book") == 0)) {
1273             hdr->has_xlm = scan_for_xlm_macros(hdr, prop, dir, ctx);
1274         }
1275     }
1276 
1277     if (name)
1278         free(name);
1279     return ret;
1280 }
1281 
1282 static int
likely_mso_stream(int fd)1283 likely_mso_stream(int fd)
1284 {
1285     off_t fsize;
1286     unsigned char check[2];
1287 
1288     fsize = lseek(fd, 0, SEEK_END);
1289     if (fsize == -1) {
1290         cli_dbgmsg("likely_mso_stream: call to lseek() failed\n");
1291         return 0;
1292     } else if (fsize < 6) {
1293         return 0;
1294     }
1295 
1296     if (lseek(fd, 4, SEEK_SET) == -1) {
1297         cli_dbgmsg("likely_mso_stream: call to lseek() failed\n");
1298         return 0;
1299     }
1300 
1301     if (cli_readn(fd, check, 2) != 2) {
1302         cli_dbgmsg("likely_mso_stream: reading from fd failed\n");
1303         return 0;
1304     }
1305 
1306     if (check[0] == 0x78 && check[1] == 0x9C)
1307         return 1;
1308 
1309     return 0;
1310 }
1311 
scan_mso_stream(int fd,cli_ctx * ctx)1312 static cl_error_t scan_mso_stream(int fd, cli_ctx *ctx)
1313 {
1314     int zret, ofd;
1315     cl_error_t ret = CL_SUCCESS;
1316     fmap_t *input;
1317     off_t off_in = 0;
1318     size_t count, outsize = 0;
1319     z_stream zstrm;
1320     char *tmpname;
1321     uint32_t prefix;
1322     unsigned char inbuf[FILEBUFF], outbuf[FILEBUFF];
1323 
1324     /* fmap the input file for easier manipulation */
1325     if (fd < 0) {
1326         cli_dbgmsg("scan_mso_stream: Invalid file descriptor argument\n");
1327         return CL_ENULLARG;
1328     } else {
1329         STATBUF statbuf;
1330 
1331         if (FSTAT(fd, &statbuf) == -1) {
1332             cli_dbgmsg("scan_mso_stream: Can't stat file descriptor\n");
1333             return CL_ESTAT;
1334         }
1335 
1336         input = fmap(fd, 0, statbuf.st_size, NULL);
1337         if (!input) {
1338             cli_dbgmsg("scan_mso_stream: Failed to get fmap for input stream\n");
1339             return CL_EMAP;
1340         }
1341     }
1342 
1343     /* reserve tempfile for output and scanning */
1344     if ((ret = cli_gentempfd(ctx->sub_tmpdir, &tmpname, &ofd)) != CL_SUCCESS) {
1345         cli_errmsg("scan_mso_stream: Can't generate temporary file\n");
1346         funmap(input);
1347         return ret;
1348     }
1349 
1350     /* initialize zlib inflation stream */
1351     memset(&zstrm, 0, sizeof(zstrm));
1352     zstrm.zalloc    = Z_NULL;
1353     zstrm.zfree     = Z_NULL;
1354     zstrm.opaque    = Z_NULL;
1355     zstrm.next_in   = inbuf;
1356     zstrm.next_out  = outbuf;
1357     zstrm.avail_in  = 0;
1358     zstrm.avail_out = FILEBUFF;
1359 
1360     zret = inflateInit(&zstrm);
1361     if (zret != Z_OK) {
1362         cli_dbgmsg("scan_mso_stream: Can't initialize zlib inflation stream\n");
1363         ret = CL_EUNPACK;
1364         goto mso_end;
1365     }
1366 
1367     /* extract 32-bit prefix */
1368     if (fmap_readn(input, &prefix, off_in, sizeof(prefix)) != sizeof(prefix)) {
1369         cli_dbgmsg("scan_mso_stream: Can't extract 4-byte prefix\n");
1370         ret = CL_EREAD;
1371         goto mso_end;
1372     }
1373 
1374     /* RFC1952 says numbers are stored with least significant byte first */
1375     prefix = le32_to_host(prefix);
1376 
1377     off_in += sizeof(uint32_t);
1378     cli_dbgmsg("scan_mso_stream: stream prefix = %08x(%d)\n", prefix, prefix);
1379 
1380     /* inflation loop */
1381     do {
1382         if (zstrm.avail_in == 0) {
1383             size_t bytes_read;
1384 
1385             zstrm.next_in = inbuf;
1386             bytes_read    = fmap_readn(input, inbuf, off_in, FILEBUFF);
1387             if (bytes_read == (size_t)-1) {
1388                 cli_errmsg("scan_mso_stream: Error reading MSO file\n");
1389                 ret = CL_EUNPACK;
1390                 goto mso_end;
1391             }
1392             if (bytes_read == 0)
1393                 break;
1394 
1395             zstrm.avail_in = bytes_read;
1396             off_in += bytes_read;
1397         }
1398         zret  = inflate(&zstrm, Z_SYNC_FLUSH);
1399         count = FILEBUFF - zstrm.avail_out;
1400         if (count) {
1401             if (cli_checklimits("MSO", ctx, outsize + count, 0, 0) != CL_SUCCESS)
1402                 break;
1403             if (cli_writen(ofd, outbuf, count) != count) {
1404                 cli_errmsg("scan_mso_stream: Can't write to file %s\n", tmpname);
1405                 ret = CL_EWRITE;
1406                 goto mso_end;
1407             }
1408             outsize += count;
1409         }
1410         zstrm.next_out  = outbuf;
1411         zstrm.avail_out = FILEBUFF;
1412     } while (zret == Z_OK);
1413 
1414     /* post inflation checks */
1415     if (zret != Z_STREAM_END && zret != Z_OK) {
1416         if (outsize == 0) {
1417             cli_infomsg(ctx, "scan_mso_stream: Error decompressing MSO file. No data decompressed.\n");
1418             ret = CL_EUNPACK;
1419             goto mso_end;
1420         }
1421 
1422         cli_infomsg(ctx, "scan_mso_stream: Error decompressing MSO file. Scanning what was decompressed.\n");
1423     }
1424     cli_dbgmsg("scan_mso_stream: Decompressed %llu bytes to %s\n", (long long unsigned)outsize, tmpname);
1425 
1426     if (outsize != prefix) {
1427         cli_warnmsg("scan_mso_stream: declared prefix != inflated stream size, %llu != %llu\n",
1428                     (long long unsigned)prefix, (long long unsigned)outsize);
1429     } else {
1430         cli_dbgmsg("scan_mso_stream: declared prefix == inflated stream size, %llu == %llu\n",
1431                    (long long unsigned)prefix, (long long unsigned)outsize);
1432     }
1433 
1434     /* scanning inflated stream */
1435     ret = cli_magic_scan_desc(ofd, tmpname, ctx, NULL);
1436 
1437     /* clean-up */
1438 mso_end:
1439     zret = inflateEnd(&zstrm);
1440     if (zret != Z_OK)
1441         ret = CL_EUNPACK;
1442     close(ofd);
1443     if (!ctx->engine->keeptmp)
1444         if (cli_unlink(tmpname))
1445             ret = CL_EUNLINK;
1446     free(tmpname);
1447     funmap(input);
1448     return ret;
1449 }
1450 
1451 static int
handler_otf(ole2_header_t * hdr,property_t * prop,const char * dir,cli_ctx * ctx)1452 handler_otf(ole2_header_t *hdr, property_t *prop, const char *dir, cli_ctx *ctx)
1453 {
1454     char *tempfile, *name = NULL;
1455     unsigned char *buff;
1456     int32_t current_block;
1457     size_t len, offset;
1458     int ofd, is_mso, ret;
1459     bitset_t *blk_bitset;
1460 
1461     UNUSEDPARAM(dir);
1462 
1463     if (prop->type != 2) {
1464         /* Not a file */
1465         return CL_SUCCESS;
1466     }
1467     print_ole2_property(prop);
1468 
1469     if (!(tempfile = cli_gentemp(ctx ? ctx->sub_tmpdir : NULL)))
1470         return CL_EMEM;
1471 
1472     if ((ofd = open(tempfile, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR)) < 0) {
1473         cli_dbgmsg("OLE2: Can't create file %s\n", tempfile);
1474         free(tempfile);
1475         return CL_ECREAT;
1476     }
1477     current_block = prop->start_block;
1478     len           = prop->size;
1479 
1480     if (cli_debug_flag) {
1481         if (!name)
1482             name = cli_ole2_get_property_name2(prop->name, prop->name_size);
1483         cli_dbgmsg("OLE2 [handler_otf]: Dumping '%s' to '%s'\n", name, tempfile);
1484     }
1485 
1486     buff = (unsigned char *)cli_malloc(1 << hdr->log2_big_block_size);
1487     if (!buff) {
1488         close(ofd);
1489         if (name)
1490             free(name);
1491         cli_unlink(tempfile);
1492         free(tempfile);
1493         return CL_EMEM;
1494     }
1495     blk_bitset = cli_bitset_init();
1496 
1497     if (!blk_bitset) {
1498         cli_errmsg("OLE2: OTF handler init bitset failed\n");
1499         free(buff);
1500         close(ofd);
1501         if (name)
1502             free(name);
1503         if (cli_unlink(tempfile)) {
1504             free(tempfile);
1505             return CL_EUNLINK;
1506         }
1507         free(tempfile);
1508         return CL_BREAK;
1509     }
1510     while ((current_block >= 0) && (len > 0)) {
1511         if (current_block > (int32_t)hdr->max_block_no) {
1512             cli_dbgmsg("OLE2: Max block number for file size exceeded: %d\n", current_block);
1513             break;
1514         }
1515         /* Check we aren't in a loop */
1516         if (cli_bitset_test(blk_bitset, (unsigned long)current_block)) {
1517             /* Loop in block list */
1518             cli_dbgmsg("OLE2: Block list loop detected\n");
1519             break;
1520         }
1521         if (!cli_bitset_set(blk_bitset, (unsigned long)current_block)) {
1522             break;
1523         }
1524         if (prop->size < (int64_t)hdr->sbat_cutoff) {
1525             /* Small block file */
1526             if (!ole2_get_sbat_data_block(hdr, buff, current_block)) {
1527                 cli_dbgmsg("ole2_get_sbat_data_block failed\n");
1528                 break;
1529             }
1530             /* buff now contains the block with N small blocks in it */
1531             offset = (1 << hdr->log2_small_block_size) * (current_block % (1 << (hdr->log2_big_block_size - hdr->log2_small_block_size)));
1532             if (cli_writen(ofd, &buff[offset], MIN(len, 1 << hdr->log2_small_block_size)) != MIN(len, 1 << hdr->log2_small_block_size)) {
1533                 close(ofd);
1534                 if (name)
1535                     free(name);
1536                 free(buff);
1537                 cli_bitset_free(blk_bitset);
1538                 if (cli_unlink(tempfile)) {
1539                     free(tempfile);
1540                     return CL_EUNLINK;
1541                 }
1542                 free(tempfile);
1543                 return CL_BREAK;
1544             }
1545             len -= MIN(len, 1 << hdr->log2_small_block_size);
1546             current_block = ole2_get_next_sbat_block(hdr, current_block);
1547         } else {
1548             /* Big block file */
1549             if (!ole2_read_block(hdr, buff, 1 << hdr->log2_big_block_size, current_block)) {
1550                 break;
1551             }
1552             if (cli_writen(ofd, buff, MIN(len, (1 << hdr->log2_big_block_size))) != MIN(len, (1 << hdr->log2_big_block_size))) {
1553                 close(ofd);
1554                 if (name)
1555                     free(name);
1556                 free(buff);
1557                 cli_bitset_free(blk_bitset);
1558                 if (cli_unlink(tempfile)) {
1559                     free(tempfile);
1560                     return CL_EUNLINK;
1561                 }
1562                 free(tempfile);
1563                 return CL_EWRITE;
1564             }
1565             current_block = ole2_get_next_block_number(hdr, current_block);
1566             len -= MIN(len, (1 << hdr->log2_big_block_size));
1567         }
1568     }
1569 
1570     /* defragmenting of ole2 stream complete */
1571 
1572     is_mso = likely_mso_stream(ofd);
1573     if (lseek(ofd, 0, SEEK_SET) == -1) {
1574         close(ofd);
1575         if (name)
1576             free(name);
1577         if (ctx && !(ctx->engine->keeptmp))
1578             cli_unlink(tempfile);
1579 
1580         free(tempfile);
1581         free(buff);
1582         cli_bitset_free(blk_bitset);
1583         return CL_ESEEK;
1584     }
1585 
1586 #if HAVE_JSON
1587     /* JSON Output Summary Information */
1588     if (SCAN_COLLECT_METADATA && (ctx->properties != NULL)) {
1589         if (!name)
1590             name = cli_ole2_get_property_name2(prop->name, prop->name_size);
1591         if (name) {
1592             if (!strncmp(name, "_5_summaryinformation", 21)) {
1593                 cli_dbgmsg("OLE2: detected a '_5_summaryinformation' stream\n");
1594                 /* JSONOLE2 - what to do if something breaks? */
1595                 if (cli_ole2_summary_json(ctx, ofd, 0) == CL_ETIMEOUT) {
1596                     free(name);
1597                     close(ofd);
1598                     if (ctx && !(ctx->engine->keeptmp))
1599                         cli_unlink(tempfile);
1600 
1601                     free(tempfile);
1602                     free(buff);
1603                     cli_bitset_free(blk_bitset);
1604                     return CL_ETIMEOUT;
1605                 }
1606             }
1607             if (!strncmp(name, "_5_documentsummaryinformation", 29)) {
1608                 cli_dbgmsg("OLE2: detected a '_5_documentsummaryinformation' stream\n");
1609                 /* JSONOLE2 - what to do if something breaks? */
1610                 if (cli_ole2_summary_json(ctx, ofd, 1) == CL_ETIMEOUT) {
1611                     free(name);
1612                     close(ofd);
1613                     if (ctx && !(ctx->engine->keeptmp))
1614                         cli_unlink(tempfile);
1615 
1616                     free(tempfile);
1617                     free(buff);
1618                     cli_bitset_free(blk_bitset);
1619                     return CL_ETIMEOUT;
1620                 }
1621             }
1622         }
1623     }
1624 #endif
1625 
1626     if (hdr->is_hwp) {
1627         if (!name)
1628             name = cli_ole2_get_property_name2(prop->name, prop->name_size);
1629         ret = cli_scanhwp5_stream(ctx, hdr->is_hwp, name, ofd, tempfile);
1630     } else if (is_mso < 0) {
1631         ret = CL_ESEEK;
1632     } else if (is_mso) {
1633         /* MSO Stream Scan */
1634         ret = scan_mso_stream(ofd, ctx);
1635     } else {
1636         /* Normal File Scan */
1637         ret = cli_magic_scan_desc(ofd, tempfile, ctx, NULL);
1638     }
1639     if (name)
1640         free(name);
1641     close(ofd);
1642     free(buff);
1643     cli_bitset_free(blk_bitset);
1644     if (ctx && !ctx->engine->keeptmp) {
1645         if (cli_unlink(tempfile)) {
1646             free(tempfile);
1647             return CL_EUNLINK;
1648         }
1649     }
1650     free(tempfile);
1651     return ret == CL_VIRUS ? CL_VIRUS : CL_SUCCESS;
1652 }
1653 
1654 #if !defined(HAVE_ATTRIB_PACKED) && !defined(HAVE_PRAGMA_PACK) && !defined(HAVE_PRAGMA_PACK_HPPA)
1655 static int
ole2_read_header(int fd,ole2_header_t * hdr)1656 ole2_read_header(int fd, ole2_header_t *hdr)
1657 {
1658     int i;
1659 
1660     if (cli_readn(fd, &hdr->magic, 8) != 8) {
1661         return FALSE;
1662     }
1663     if (cli_readn(fd, &hdr->clsid, 16) != 16) {
1664         return FALSE;
1665     }
1666     if (cli_readn(fd, &hdr->minor_version, 2) != 2) {
1667         return FALSE;
1668     }
1669     if (cli_readn(fd, &hdr->dll_version, 2) != 2) {
1670         return FALSE;
1671     }
1672     if (cli_readn(fd, &hdr->byte_order, 2) != 2) {
1673         return FALSE;
1674     }
1675     if (cli_readn(fd, &hdr->log2_big_block_size, 2) != 2) {
1676         return FALSE;
1677     }
1678     if (cli_readn(fd, &hdr->log2_small_block_size, 4) != 4) {
1679         return FALSE;
1680     }
1681     if (cli_readn(fd, &hdr->reserved, 8) != 8) {
1682         return FALSE;
1683     }
1684     if (cli_readn(fd, &hdr->bat_count, 4) != 4) {
1685         return FALSE;
1686     }
1687     if (cli_readn(fd, &hdr->prop_start, 4) != 4) {
1688         return FALSE;
1689     }
1690     if (cli_readn(fd, &hdr->signature, 4) != 4) {
1691         return FALSE;
1692     }
1693     if (cli_readn(fd, &hdr->sbat_cutoff, 4) != 4) {
1694         return FALSE;
1695     }
1696     if (cli_readn(fd, &hdr->sbat_start, 4) != 4) {
1697         return FALSE;
1698     }
1699     if (cli_readn(fd, &hdr->sbat_block_count, 4) != 4) {
1700         return FALSE;
1701     }
1702     if (cli_readn(fd, &hdr->xbat_start, 4) != 4) {
1703         return FALSE;
1704     }
1705     if (cli_readn(fd, &hdr->xbat_count, 4) != 4) {
1706         return FALSE;
1707     }
1708     for (i = 0; i < 109; i++) {
1709         if (cli_readn(fd, &hdr->bat_array[i], 4) != 4) {
1710             return FALSE;
1711         }
1712     }
1713     return TRUE;
1714 }
1715 #endif
1716 
cli_ole2_extract(const char * dirname,cli_ctx * ctx,struct uniq ** files,int * has_vba,int * has_xlm)1717 int cli_ole2_extract(const char *dirname, cli_ctx *ctx, struct uniq **files, int *has_vba, int *has_xlm)
1718 {
1719     ole2_header_t hdr;
1720     int ret = CL_CLEAN;
1721     size_t hdr_size;
1722     unsigned int file_count = 0;
1723     unsigned long scansize, scansize2;
1724     const void *phdr;
1725 
1726     cli_dbgmsg("in cli_ole2_extract()\n");
1727     if (!ctx)
1728         return CL_ENULLARG;
1729 
1730     hdr.is_hwp = NULL;
1731     hdr.bitset = NULL;
1732     if (ctx->engine->maxscansize) {
1733         if (ctx->engine->maxscansize > ctx->scansize)
1734             scansize = ctx->engine->maxscansize - ctx->scansize;
1735         else
1736             return CL_EMAXSIZE;
1737     } else
1738         scansize = -1;
1739 
1740     scansize2 = scansize;
1741 
1742     /* size of header - size of other values in struct */
1743     hdr_size = sizeof(struct ole2_header_tag) - sizeof(int32_t) - sizeof(uint32_t) -
1744                sizeof(off_t) - sizeof(bitset_t *) -
1745                sizeof(struct uniq *) - sizeof(fmap_t *) - sizeof(int) - sizeof(hwp5_header_t *);
1746 
1747     if ((size_t)(ctx->fmap->len) < (size_t)(hdr_size)) {
1748         return CL_CLEAN;
1749     }
1750     hdr.map      = ctx->fmap;
1751     hdr.m_length = hdr.map->len;
1752     phdr         = fmap_need_off_once(hdr.map, 0, hdr_size);
1753     if (phdr) {
1754         memcpy(&hdr, phdr, hdr_size);
1755     } else {
1756         cli_dbgmsg("cli_ole2_extract: failed to read header\n");
1757         goto abort;
1758     }
1759 
1760     hdr.minor_version         = ole2_endian_convert_16(hdr.minor_version);
1761     hdr.dll_version           = ole2_endian_convert_16(hdr.dll_version);
1762     hdr.byte_order            = ole2_endian_convert_16(hdr.byte_order);
1763     hdr.log2_big_block_size   = ole2_endian_convert_16(hdr.log2_big_block_size);
1764     hdr.log2_small_block_size = ole2_endian_convert_32(hdr.log2_small_block_size);
1765     hdr.bat_count             = ole2_endian_convert_32(hdr.bat_count);
1766     hdr.prop_start            = ole2_endian_convert_32(hdr.prop_start);
1767     hdr.sbat_cutoff           = ole2_endian_convert_32(hdr.sbat_cutoff);
1768     hdr.sbat_start            = ole2_endian_convert_32(hdr.sbat_start);
1769     hdr.sbat_block_count      = ole2_endian_convert_32(hdr.sbat_block_count);
1770     hdr.xbat_start            = ole2_endian_convert_32(hdr.xbat_start);
1771     hdr.xbat_count            = ole2_endian_convert_32(hdr.xbat_count);
1772 
1773     hdr.sbat_root_start = -1;
1774 
1775     hdr.bitset = cli_bitset_init();
1776     if (!hdr.bitset) {
1777         ret = CL_EMEM;
1778         goto abort;
1779     }
1780     if (memcmp(hdr.magic, magic_id, 8) != 0) {
1781         cli_dbgmsg("OLE2 magic failed!\n");
1782         ret = CL_EFORMAT;
1783         goto abort;
1784     }
1785     if (hdr.log2_big_block_size < 6 || hdr.log2_big_block_size > 30) {
1786         cli_dbgmsg("CAN'T PARSE: Invalid big block size (2^%u)\n", hdr.log2_big_block_size);
1787         goto abort;
1788     }
1789     if (!hdr.log2_small_block_size || hdr.log2_small_block_size > hdr.log2_big_block_size) {
1790         cli_dbgmsg("CAN'T PARSE: Invalid small block size (2^%u)\n", hdr.log2_small_block_size);
1791         goto abort;
1792     }
1793     if (hdr.sbat_cutoff != 4096) {
1794         cli_dbgmsg("WARNING: Untested sbat cutoff (%u); data may not extract correctly\n", hdr.sbat_cutoff);
1795     }
1796 
1797     if (hdr.map->len > INT32_MAX) {
1798         cli_dbgmsg("OLE2 extract: Overflow detected\n");
1799         ret = CL_EFORMAT;
1800         goto abort;
1801     }
1802     /* 8 SBAT blocks per file block */
1803     hdr.max_block_no = (hdr.map->len - MAX(512, 1 << hdr.log2_big_block_size)) / (1 << hdr.log2_small_block_size);
1804 
1805     print_ole2_header(&hdr);
1806     cli_dbgmsg("Max block number: %lu\n", (unsigned long int)hdr.max_block_no);
1807 
1808     /* PASS 1 : Count files and check for VBA */
1809     hdr.has_vba = 0;
1810     hdr.has_xlm = 0;
1811     ret         = ole2_walk_property_tree(&hdr, NULL, 0, handler_enum, 0, &file_count, ctx, &scansize);
1812     cli_bitset_free(hdr.bitset);
1813     hdr.bitset = NULL;
1814     if (!file_count || !(hdr.bitset = cli_bitset_init()))
1815         goto abort;
1816 
1817     if (hdr.is_hwp) {
1818         cli_dbgmsg("OLE2: identified HWP document\n");
1819         cli_dbgmsg("OLE2: HWP signature: %.17s\n", hdr.is_hwp->signature);
1820         cli_dbgmsg("OLE2: HWP version: 0x%08x\n", hdr.is_hwp->version);
1821         cli_dbgmsg("OLE2: HWP flags:   0x%08x\n", hdr.is_hwp->flags);
1822 
1823         ret = cli_hwp5header(ctx, hdr.is_hwp);
1824         if (ret != CL_SUCCESS)
1825             goto abort;
1826     }
1827 
1828     /* If there's no VBA we scan OTF */
1829     if (hdr.has_vba || hdr.has_xlm) {
1830         /* PASS 2/A : VBA scan */
1831         cli_dbgmsg("OLE2: VBA project found\n");
1832         if (!(hdr.U = uniq_init(file_count))) {
1833             cli_dbgmsg("OLE2: uniq_init() failed\n");
1834             ret = CL_EMEM;
1835             goto abort;
1836         }
1837         file_count = 0;
1838         ole2_walk_property_tree(&hdr, dirname, 0, handler_writefile, 0, &file_count, ctx, &scansize2);
1839         ret    = CL_CLEAN;
1840         *files = hdr.U;
1841         if (has_vba) {
1842             *has_vba = hdr.has_vba;
1843         }
1844         if (has_xlm) {
1845             *has_xlm = hdr.has_xlm;
1846         }
1847     } else {
1848         cli_dbgmsg("OLE2: no VBA projects found\n");
1849         /* PASS 2/B : OTF scan */
1850         file_count = 0;
1851         ret        = ole2_walk_property_tree(&hdr, NULL, 0, handler_otf, 0, &file_count, ctx, &scansize2);
1852     }
1853 
1854 abort:
1855     if (hdr.bitset)
1856         cli_bitset_free(hdr.bitset);
1857 
1858     if (hdr.is_hwp)
1859         free(hdr.is_hwp);
1860 
1861     return ret == CL_BREAK ? CL_CLEAN : ret;
1862 }
1863