1 /*
2 * Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
3 * Copyright (C) 2007-2013 Sourcefire, Inc.
4 *
5 * Authors: Trog
6 *
7 * Summary: Extract component parts of OLE2 files (e.g. MS Office Documents).
8 *
9 * Acknowledgements: Some ideas and algorithms were based upon OpenOffice and libgsf.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as
13 * published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
23 * MA 02110-1301, USA.
24 */
25
26 #if HAVE_CONFIG_H
27 #include "clamav-config.h"
28 #endif
29
30 #include <sys/types.h>
31 #include <sys/stat.h>
32 #include <fcntl.h>
33 #include <stdio.h>
34 #include <string.h>
35 #include <ctype.h>
36 #include <stdlib.h>
37 #include <errno.h>
38 #include <conv.h>
39 #include <zlib.h>
40 #ifdef HAVE_UNISTD_H
41 #include <unistd.h>
42 #endif
43 #include <stdbool.h>
44
45 #include "clamav.h"
46 #include "others.h"
47 #include "hwp.h"
48 #include "ole2_extract.h"
49 #include "scanners.h"
50 #include "fmap.h"
51 #include "json_api.h"
52 #if HAVE_JSON
53 #include "msdoc.h"
54 #endif
55
56 #ifdef DEBUG_OLE2_LIST
57 #define ole2_listmsg(...) cli_dbgmsg(__VA_ARGS__)
58 #else
59 #define ole2_listmsg(...) ;
60 #endif
61
62 #define ole2_endian_convert_16(v) le16_to_host((uint16_t)(v))
63 #define ole2_endian_convert_32(v) le32_to_host((uint32_t)(v))
64
65 #ifndef HAVE_ATTRIB_PACKED
66 #define __attribute__(x)
67 #endif
68
69 #ifdef HAVE_PRAGMA_PACK
70 #pragma pack(1)
71 #endif
72
73 #ifdef HAVE_PRAGMA_PACK_HPPA
74 #pragma pack 1
75 #endif
76
77 typedef struct ole2_header_tag {
78 unsigned char magic[8]; /* should be: 0xd0cf11e0a1b11ae1 */
79 unsigned char clsid[16];
80 uint16_t minor_version __attribute__((packed));
81 uint16_t dll_version __attribute__((packed));
82 int16_t byte_order __attribute__((packed)); /* -2=intel */
83
84 uint16_t log2_big_block_size __attribute__((packed)); /* usually 9 (2^9 = 512) */
85 uint32_t log2_small_block_size __attribute__((packed)); /* usually 6 (2^6 = 64) */
86
87 int32_t reserved[2] __attribute__((packed));
88 int32_t bat_count __attribute__((packed));
89 int32_t prop_start __attribute__((packed));
90
91 uint32_t signature __attribute__((packed));
92 uint32_t sbat_cutoff __attribute__((packed)); /* cutoff for files held
93 * in small blocks
94 * (4096) */
95
96 int32_t sbat_start __attribute__((packed));
97 int32_t sbat_block_count __attribute__((packed));
98 int32_t xbat_start __attribute__((packed));
99 int32_t xbat_count __attribute__((packed));
100 int32_t bat_array[109] __attribute__((packed));
101
102 /* not part of the ole2 header, but stuff we need in order to decode */
103
104 /*
105 * must take account of the size of variables below here when reading the
106 * header
107 */
108 int32_t sbat_root_start __attribute__((packed));
109 uint32_t max_block_no;
110 off_t m_length;
111 bitset_t *bitset;
112 struct uniq *U;
113 fmap_t *map;
114 int has_vba;
115 int has_xlm;
116 hwp5_header_t *is_hwp;
117 } ole2_header_t;
118
119 typedef struct property_tag {
120 char name[64]; /* in unicode */
121 uint16_t name_size __attribute__((packed));
122 unsigned char type; /* 1=dir 2=file 5=root */
123 unsigned char color; /* black or red */
124 uint32_t prev __attribute__((packed));
125 uint32_t next __attribute__((packed));
126 uint32_t child __attribute__((packed));
127
128 unsigned char clsid[16];
129 uint32_t user_flags __attribute__((packed));
130
131 uint32_t create_lowdate __attribute__((packed));
132 uint32_t create_highdate __attribute__((packed));
133 uint32_t mod_lowdate __attribute__((packed));
134 uint32_t mod_highdate __attribute__((packed));
135 uint32_t start_block __attribute__((packed));
136 uint32_t size __attribute__((packed));
137 unsigned char reserved[4];
138 } property_t;
139
140 struct ole2_list_node;
141
142 typedef struct ole2_list_node {
143 uint32_t Val;
144 struct ole2_list_node *Next;
145 } ole2_list_node_t;
146
147 typedef struct ole2_list {
148 uint32_t Size;
149 ole2_list_node_t *Head;
150 } ole2_list_t;
151
152 int ole2_list_init(ole2_list_t *list);
153 int ole2_list_is_empty(ole2_list_t *list);
154 uint32_t ole2_list_size(ole2_list_t *list);
155 int ole2_list_push(ole2_list_t *list, uint32_t val);
156 uint32_t ole2_list_pop(ole2_list_t *list);
157 int ole2_list_delete(ole2_list_t *list);
158
ole2_list_init(ole2_list_t * list)159 int ole2_list_init(ole2_list_t *list)
160 {
161 list->Head = NULL;
162 list->Size = 0;
163 return CL_SUCCESS;
164 }
165
ole2_list_is_empty(ole2_list_t * list)166 int ole2_list_is_empty(ole2_list_t *list)
167 {
168 return (list->Head == NULL);
169 }
170
171 uint32_t
ole2_list_size(ole2_list_t * list)172 ole2_list_size(ole2_list_t *list)
173 {
174 return (list->Size);
175 }
176
ole2_list_push(ole2_list_t * list,uint32_t val)177 int ole2_list_push(ole2_list_t *list, uint32_t val)
178 {
179 //check the cli - malloc ?
180 ole2_list_node_t *new_node;
181
182 new_node = (ole2_list_node_t *)cli_malloc(sizeof(ole2_list_node_t));
183 if (!new_node) {
184 cli_dbgmsg("OLE2: could not allocate new node for worklist!\n");
185 return CL_EMEM;
186 }
187 new_node->Val = val;
188 new_node->Next = list->Head;
189
190 list->Head = new_node;
191 (list->Size)++;
192 return CL_SUCCESS;
193 }
194
195 uint32_t
ole2_list_pop(ole2_list_t * list)196 ole2_list_pop(ole2_list_t *list)
197 {
198 uint32_t val;
199 ole2_list_node_t *next;
200
201 if (ole2_list_is_empty(list)) {
202 cli_dbgmsg("OLE2: work list is empty and ole2_list_pop() called!\n");
203 return -1;
204 }
205 val = list->Head->Val;
206 next = list->Head->Next;
207
208 free(list->Head);
209 list->Head = next;
210
211 (list->Size)--;
212 return val;
213 }
214
ole2_list_delete(ole2_list_t * list)215 int ole2_list_delete(ole2_list_t *list)
216 {
217 while (!ole2_list_is_empty(list))
218 ole2_list_pop(list);
219 return CL_SUCCESS;
220 }
221
222 #ifdef HAVE_PRAGMA_PACK
223 #pragma pack()
224 #endif
225
226 #ifdef HAVE_PRAGMA_PACK_HPPA
227 #pragma pack
228 #endif
229
230 static unsigned char magic_id[] = {0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1};
231
232 char *
cli_ole2_get_property_name2(const char * name,int size)233 cli_ole2_get_property_name2(const char *name, int size)
234 {
235 int i, j;
236 char *newname;
237
238 if ((name[0] == 0 && name[1] == 0) || size <= 0 || size > 128) {
239 return NULL;
240 }
241 newname = (char *)cli_malloc(size * 7);
242 if (!newname) {
243 cli_errmsg("OLE2 [cli_ole2_get_property_name2]: Unable to allocate memory for newname: %u\n", size * 7);
244 return NULL;
245 }
246 j = 0;
247 /* size-2 to ignore trailing NULL */
248 for (i = 0; i < size - 2; i += 2) {
249 if ((!(name[i] & 0x80)) && isprint(name[i]) && name[i + 1] == 0) {
250 newname[j++] = tolower(name[i]);
251 } else {
252 if (name[i] < 10 && name[i] >= 0 && name[i + 1] == 0) {
253 newname[j++] = '_';
254 newname[j++] = name[i] + '0';
255 } else {
256 const uint16_t x = (((uint16_t)name[i]) << 8) | name[i + 1];
257
258 newname[j++] = '_';
259 newname[j++] = 'a' + ((x & 0xF));
260 newname[j++] = 'a' + ((x >> 4) & 0xF);
261 newname[j++] = 'a' + ((x >> 8) & 0xF);
262 newname[j++] = 'a' + ((x >> 16) & 0xF);
263 newname[j++] = 'a' + ((x >> 24) & 0xF);
264 }
265 newname[j++] = '_';
266 }
267 }
268 newname[j] = '\0';
269 if (strlen(newname) == 0) {
270 free(newname);
271 return NULL;
272 }
273 return newname;
274 }
275
276 static char *
get_property_name(char * name,int size)277 get_property_name(char *name, int size)
278 {
279 const char *carray = "0123456789abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz._";
280 int csize = size >> 1;
281 char *newname, *cname;
282 char *oname = name;
283
284 if (csize <= 0)
285 return NULL;
286
287 newname = cname = (char *)cli_malloc(size);
288 if (!newname) {
289 cli_errmsg("OLE2 [get_property_name]: Unable to allocate memory for newname %u\n", size);
290 return NULL;
291 }
292 while (--csize) {
293 uint16_t lo, hi, u = cli_readint16(oname) - 0x3800;
294
295 oname += 2;
296 if (u > 0x1040) {
297 free(newname);
298 return cli_ole2_get_property_name2(name, size);
299 }
300 lo = u % 64;
301 u >>= 6;
302 hi = u % 64;
303 *cname++ = carray[lo];
304 if (csize != 1 || u != 64)
305 *cname++ = carray[hi];
306 }
307 *cname = '\0';
308 return newname;
309 }
310
311 static void
print_ole2_property(property_t * property)312 print_ole2_property(property_t *property)
313 {
314 char spam[128], *buf;
315
316 if (property->name_size > 64) {
317 cli_dbgmsg("[err name len: %d]\n", property->name_size);
318 return;
319 }
320 buf = get_property_name(property->name, property->name_size);
321 snprintf(spam, sizeof(spam), "OLE2: %s ", buf ? buf : "<noname>");
322 spam[sizeof(spam) - 1] = '\0';
323 if (buf)
324 free(buf);
325 switch (property->type) {
326 case 2:
327 strncat(spam, " [file] ", sizeof(spam) - 1 - strlen(spam));
328 break;
329 case 1:
330 strncat(spam, " [dir ] ", sizeof(spam) - 1 - strlen(spam));
331 break;
332 case 5:
333 strncat(spam, " [root] ", sizeof(spam) - 1 - strlen(spam));
334 break;
335 default:
336 strncat(spam, " [unkn] ", sizeof(spam) - 1 - strlen(spam));
337 }
338 spam[sizeof(spam) - 1] = '\0';
339 switch (property->color) {
340 case 0:
341 strncat(spam, " r ", sizeof(spam) - 1 - strlen(spam));
342 break;
343 case 1:
344 strncat(spam, " b ", sizeof(spam) - 1 - strlen(spam));
345 break;
346 default:
347 strncat(spam, " u ", sizeof(spam) - 1 - strlen(spam));
348 }
349 spam[sizeof(spam) - 1] = '\0';
350 cli_dbgmsg("%s size:0x%.8x flags:0x%.8x\n", spam, property->size, property->user_flags);
351 }
352
353 static void
print_ole2_header(ole2_header_t * hdr)354 print_ole2_header(ole2_header_t *hdr)
355 {
356 if (!hdr || !cli_debug_flag) {
357 return;
358 }
359 cli_dbgmsg("\n");
360 cli_dbgmsg("Magic:\t\t\t0x%x%x%x%x%x%x%x%x\n",
361 hdr->magic[0], hdr->magic[1], hdr->magic[2], hdr->magic[3],
362 hdr->magic[4], hdr->magic[5], hdr->magic[6], hdr->magic[7]);
363
364 cli_dbgmsg("CLSID:\t\t\t{%x%x%x%x-%x%x-%x%x-%x%x-%x%x%x%x%x%x}\n",
365 hdr->clsid[0], hdr->clsid[1], hdr->clsid[2], hdr->clsid[3],
366 hdr->clsid[4], hdr->clsid[5], hdr->clsid[6], hdr->clsid[7],
367 hdr->clsid[8], hdr->clsid[9], hdr->clsid[10], hdr->clsid[11],
368 hdr->clsid[12], hdr->clsid[13], hdr->clsid[14], hdr->clsid[15]);
369
370 cli_dbgmsg("Minor version:\t\t0x%x\n", hdr->minor_version);
371 cli_dbgmsg("DLL version:\t\t0x%x\n", hdr->dll_version);
372 cli_dbgmsg("Byte Order:\t\t%d\n", hdr->byte_order);
373 cli_dbgmsg("Big Block Size:\t%i\n", hdr->log2_big_block_size);
374 cli_dbgmsg("Small Block Size:\t%i\n", hdr->log2_small_block_size);
375 cli_dbgmsg("BAT count:\t\t%d\n", hdr->bat_count);
376 cli_dbgmsg("Prop start:\t\t%d\n", hdr->prop_start);
377 cli_dbgmsg("SBAT cutoff:\t\t%d\n", hdr->sbat_cutoff);
378 cli_dbgmsg("SBat start:\t\t%d\n", hdr->sbat_start);
379 cli_dbgmsg("SBat block count:\t%d\n", hdr->sbat_block_count);
380 cli_dbgmsg("XBat start:\t\t%d\n", hdr->xbat_start);
381 cli_dbgmsg("XBat block count:\t%d\n", hdr->xbat_count);
382 cli_dbgmsg("\n");
383 return;
384 }
385
386 static int
ole2_read_block(ole2_header_t * hdr,void * buff,unsigned int size,int32_t blockno)387 ole2_read_block(ole2_header_t *hdr, void *buff, unsigned int size, int32_t blockno)
388 {
389 off_t offset, offend;
390 const void *pblock;
391
392 if (blockno < 0) {
393 return FALSE;
394 }
395 /* other methods: (blockno+1) * 512 or (blockno * block_size) + 512; */
396 if (((uint64_t)blockno << hdr->log2_big_block_size) < (INT32_MAX - MAX(512, (uint64_t)1 << hdr->log2_big_block_size))) {
397 /* 512 is header size */
398 offset = (blockno << hdr->log2_big_block_size) + MAX(512, 1 << hdr->log2_big_block_size);
399 offend = offset + size;
400 } else {
401 offset = INT32_MAX - size;
402 offend = INT32_MAX;
403 }
404
405 if ((offend <= 0) || (offset < 0) || (offset >= hdr->m_length)) {
406 return FALSE;
407 } else if (offend > hdr->m_length) {
408 /* bb#11369 - ole2 files may not be a block multiple in size */
409 memset(buff, 0, size);
410 size = hdr->m_length - offset;
411 }
412 if (!(pblock = fmap_need_off_once(hdr->map, offset, size))) {
413 return FALSE;
414 }
415 memcpy(buff, pblock, size);
416 return TRUE;
417 }
418
419 static int32_t
ole2_get_next_bat_block(ole2_header_t * hdr,int32_t current_block)420 ole2_get_next_bat_block(ole2_header_t *hdr, int32_t current_block)
421 {
422 int32_t bat_array_index;
423 uint32_t bat[128];
424
425 if (current_block < 0) {
426 return -1;
427 }
428 bat_array_index = current_block / 128;
429 if (bat_array_index > hdr->bat_count) {
430 cli_dbgmsg("bat_array index error\n");
431 return -10;
432 }
433 if (!ole2_read_block(hdr, &bat, 512,
434 ole2_endian_convert_32(hdr->bat_array[bat_array_index]))) {
435 return -1;
436 }
437 return ole2_endian_convert_32(bat[current_block - (bat_array_index * 128)]);
438 }
439
440 static int32_t
ole2_get_next_xbat_block(ole2_header_t * hdr,int32_t current_block)441 ole2_get_next_xbat_block(ole2_header_t *hdr, int32_t current_block)
442 {
443 int32_t xbat_index, xbat_block_index, bat_index, bat_blockno;
444 uint32_t xbat[128], bat[128];
445
446 if (current_block < 0) {
447 return -1;
448 }
449 xbat_index = current_block / 128;
450
451 /*
452 * NB: The last entry in each XBAT points to the next XBAT block.
453 * This reduces the number of entries in each block by 1.
454 */
455 xbat_block_index = (xbat_index - 109) / 127;
456 bat_blockno = (xbat_index - 109) % 127;
457
458 bat_index = current_block % 128;
459
460 if (!ole2_read_block(hdr, &xbat, 512, hdr->xbat_start)) {
461 return -1;
462 }
463 /* Follow the chain of XBAT blocks */
464 while (xbat_block_index > 0) {
465 if (!ole2_read_block(hdr, &xbat, 512,
466 ole2_endian_convert_32(xbat[127]))) {
467 return -1;
468 }
469 xbat_block_index--;
470 }
471
472 if (!ole2_read_block(hdr, &bat, 512, ole2_endian_convert_32(xbat[bat_blockno]))) {
473 return -1;
474 }
475 return ole2_endian_convert_32(bat[bat_index]);
476 }
477
478 static int32_t
ole2_get_next_block_number(ole2_header_t * hdr,int32_t current_block)479 ole2_get_next_block_number(ole2_header_t *hdr, int32_t current_block)
480 {
481 if (current_block < 0) {
482 return -1;
483 }
484 if ((current_block / 128) > 108) {
485 return ole2_get_next_xbat_block(hdr, current_block);
486 } else {
487 return ole2_get_next_bat_block(hdr, current_block);
488 }
489 }
490
491 static int32_t
ole2_get_next_sbat_block(ole2_header_t * hdr,int32_t current_block)492 ole2_get_next_sbat_block(ole2_header_t *hdr, int32_t current_block)
493 {
494 int32_t iter, current_bat_block;
495 uint32_t sbat[128];
496
497 if (current_block < 0) {
498 return -1;
499 }
500 current_bat_block = hdr->sbat_start;
501 iter = current_block / 128;
502 while (iter > 0) {
503 current_bat_block = ole2_get_next_block_number(hdr, current_bat_block);
504 iter--;
505 }
506 if (!ole2_read_block(hdr, &sbat, 512, current_bat_block)) {
507 return -1;
508 }
509 return ole2_endian_convert_32(sbat[current_block % 128]);
510 }
511
512 /* Retrieve the block containing the data for the given sbat index */
513 static int32_t
ole2_get_sbat_data_block(ole2_header_t * hdr,void * buff,int32_t sbat_index)514 ole2_get_sbat_data_block(ole2_header_t *hdr, void *buff, int32_t sbat_index)
515 {
516 int32_t block_count, current_block;
517
518 if (sbat_index < 0) {
519 return FALSE;
520 }
521 if (hdr->sbat_root_start < 0) {
522 cli_dbgmsg("No root start block\n");
523 return FALSE;
524 }
525 block_count = sbat_index / (1 << (hdr->log2_big_block_size - hdr->log2_small_block_size));
526 current_block = hdr->sbat_root_start;
527 while (block_count > 0) {
528 current_block = ole2_get_next_block_number(hdr, current_block);
529 block_count--;
530 }
531
532 /*
533 * current_block now contains the block number of the sbat array
534 * containing the entry for the required small block
535 */
536
537 return (ole2_read_block(hdr, buff, 1 << hdr->log2_big_block_size, current_block));
538 }
539
540 static int
ole2_walk_property_tree(ole2_header_t * hdr,const char * dir,int32_t prop_index,int (* handler)(ole2_header_t * hdr,property_t * prop,const char * dir,cli_ctx * ctx),unsigned int rec_level,unsigned int * file_count,cli_ctx * ctx,unsigned long * scansize)541 ole2_walk_property_tree(ole2_header_t *hdr, const char *dir, int32_t prop_index,
542 int (*handler)(ole2_header_t *hdr, property_t *prop, const char *dir, cli_ctx *ctx),
543 unsigned int rec_level, unsigned int *file_count, cli_ctx *ctx, unsigned long *scansize)
544 {
545 property_t prop_block[4];
546 int32_t idx, current_block, i, curindex;
547 char *dirname;
548 ole2_list_t node_list;
549 int ret, func_ret;
550 #if HAVE_JSON
551 char *name;
552 int toval = 0;
553 #endif
554
555 ole2_listmsg("ole2_walk_property_tree() called\n");
556 func_ret = CL_SUCCESS;
557 ole2_list_init(&node_list);
558
559 ole2_listmsg("rec_level: %d\n", rec_level);
560 ole2_listmsg("file_count: %d\n", *file_count);
561
562 if ((rec_level > 100) || (*file_count > 100000)) {
563 return CL_SUCCESS;
564 }
565
566 if (ctx && ctx->engine->max_recursion_level && (rec_level > ctx->engine->max_recursion_level)) {
567 // Note: engine->max_recursion_level is re-purposed here out of convenience.
568 // ole2 recursion does not leverage the ctx->recursion_stack stack.
569 cli_dbgmsg("OLE2: Recursion limit reached (max: %d)\n", ctx->engine->max_recursion_level);
570 cli_append_virus_if_heur_exceedsmax(ctx, "Heuristics.Limits.Exceeded.MaxRecursion");
571 return CL_EMAXREC;
572 }
573
574 //push the 'root' node for the level onto the local list
575 if ((ret = ole2_list_push(&node_list, prop_index)) != CL_SUCCESS) {
576 ole2_list_delete(&node_list);
577 return ret;
578 }
579
580 while (!ole2_list_is_empty(&node_list)) {
581 ole2_listmsg("within working loop, worklist size: %d\n", ole2_list_size(&node_list));
582 #if HAVE_JSON
583 if (cli_json_timeout_cycle_check(ctx, &toval) != CL_SUCCESS) {
584 ole2_list_delete(&node_list);
585 return CL_ETIMEOUT;
586 }
587 #endif
588
589 current_block = hdr->prop_start;
590
591 //pop off a node to work on
592 curindex = ole2_list_pop(&node_list);
593 ole2_listmsg("current index: %d\n", curindex);
594 if ((curindex < 0) || (curindex > (int32_t)hdr->max_block_no)) {
595 continue;
596 }
597 //read in the sector referenced by the current index
598 idx = curindex / 4;
599 for (i = 0; i < idx; i++) {
600 current_block = ole2_get_next_block_number(hdr, current_block);
601 if (current_block < 0) {
602 continue;
603 }
604 }
605 idx = curindex % 4;
606 if (!ole2_read_block(hdr, prop_block, 512, current_block)) {
607 continue;
608 }
609 if (prop_block[idx].type <= 0) {
610 continue;
611 }
612 ole2_listmsg("reading prop block\n");
613
614 prop_block[idx].name_size = ole2_endian_convert_16(prop_block[idx].name_size);
615 prop_block[idx].prev = ole2_endian_convert_32(prop_block[idx].prev);
616 prop_block[idx].next = ole2_endian_convert_32(prop_block[idx].next);
617 prop_block[idx].child = ole2_endian_convert_32(prop_block[idx].child);
618 prop_block[idx].user_flags = ole2_endian_convert_32(prop_block[idx].user_flags);
619 prop_block[idx].create_lowdate = ole2_endian_convert_32(prop_block[idx].create_lowdate);
620 prop_block[idx].create_highdate = ole2_endian_convert_32(prop_block[idx].create_highdate);
621 prop_block[idx].mod_lowdate = ole2_endian_convert_32(prop_block[idx].mod_lowdate);
622 prop_block[idx].mod_highdate = ole2_endian_convert_32(prop_block[idx].mod_highdate);
623 prop_block[idx].start_block = ole2_endian_convert_32(prop_block[idx].start_block);
624 prop_block[idx].size = ole2_endian_convert_32(prop_block[idx].size);
625
626 ole2_listmsg("printing ole2 property\n");
627 if (dir)
628 print_ole2_property(&prop_block[idx]);
629
630 ole2_listmsg("checking bitset\n");
631 /* Check we aren't in a loop */
632 if (cli_bitset_test(hdr->bitset, (unsigned long)curindex)) {
633 /* Loop in property tree detected */
634 cli_dbgmsg("OLE2: Property tree loop detected at index %d\n", curindex);
635 ole2_list_delete(&node_list);
636 return CL_BREAK;
637 }
638 ole2_listmsg("setting bitset\n");
639 if (!cli_bitset_set(hdr->bitset, (unsigned long)curindex)) {
640 continue;
641 }
642 ole2_listmsg("prev: %d next %d child %d\n", prop_block[idx].prev, prop_block[idx].next, prop_block[idx].child);
643
644 ole2_listmsg("node type: %d\n", prop_block[idx].type);
645 switch (prop_block[idx].type) {
646 case 5: /* Root Entry */
647 ole2_listmsg("root node\n");
648 if ((curindex != 0) || (rec_level != 0) ||
649 (*file_count != 0)) {
650 /* Can only have RootEntry as the top */
651 cli_dbgmsg("ERROR: illegal Root Entry\n");
652 continue;
653 }
654 hdr->sbat_root_start = prop_block[idx].start_block;
655 if ((int)(prop_block[idx].child) != -1) {
656 ret = ole2_walk_property_tree(hdr, dir, prop_block[idx].child, handler, rec_level + 1, file_count, ctx, scansize);
657 if (ret != CL_SUCCESS) {
658 if (SCAN_ALLMATCHES && (ret == CL_VIRUS)) {
659 func_ret = ret;
660 } else {
661 ole2_list_delete(&node_list);
662 return ret;
663 }
664 }
665 }
666 if ((int)(prop_block[idx].prev) != -1) {
667 if ((ret = ole2_list_push(&node_list, prop_block[idx].prev)) != CL_SUCCESS) {
668 ole2_list_delete(&node_list);
669 return ret;
670 }
671 }
672 if ((int)(prop_block[idx].next) != -1) {
673 if ((ret = ole2_list_push(&node_list, prop_block[idx].next)) != CL_SUCCESS) {
674 ole2_list_delete(&node_list);
675 return ret;
676 }
677 }
678 break;
679 case 2: /* File */
680 ole2_listmsg("file node\n");
681 if (ctx && ctx->engine->maxfiles && ((*file_count > ctx->engine->maxfiles) || (ctx->scannedfiles > ctx->engine->maxfiles - *file_count))) {
682 cli_dbgmsg("OLE2: files limit reached (max: %u)\n", ctx->engine->maxfiles);
683 cli_append_virus_if_heur_exceedsmax(ctx, "Heuristics.Limits.Exceeded.MaxFiles");
684 ole2_list_delete(&node_list);
685 return CL_EMAXFILES;
686 }
687 if (!ctx || !(ctx->engine->maxfilesize) || prop_block[idx].size <= ctx->engine->maxfilesize || prop_block[idx].size <= *scansize) {
688 (*file_count)++;
689 *scansize -= prop_block[idx].size;
690 ole2_listmsg("running file handler\n");
691 ret = handler(hdr, &prop_block[idx], dir, ctx);
692 if (ret != CL_SUCCESS) {
693 if (SCAN_ALLMATCHES && (ret == CL_VIRUS)) {
694 func_ret = ret;
695 } else {
696 ole2_listmsg("file handler returned %d\n", ret);
697 ole2_list_delete(&node_list);
698 return ret;
699 }
700 }
701 } else {
702 cli_dbgmsg("OLE2: filesize exceeded\n");
703 }
704 if ((int)(prop_block[idx].child) != -1) {
705 ret = ole2_walk_property_tree(hdr, dir, prop_block[idx].child, handler, rec_level, file_count, ctx, scansize);
706 if (ret != CL_SUCCESS) {
707 if (SCAN_ALLMATCHES && (ret == CL_VIRUS)) {
708 func_ret = ret;
709 } else {
710 ole2_list_delete(&node_list);
711 return ret;
712 }
713 }
714 }
715 if ((int)(prop_block[idx].prev) != -1) {
716 if ((ret = ole2_list_push(&node_list, prop_block[idx].prev)) != CL_SUCCESS) {
717 ole2_list_delete(&node_list);
718 return ret;
719 }
720 }
721 if ((int)(prop_block[idx].next) != -1) {
722 if ((ret = ole2_list_push(&node_list, prop_block[idx].next)) != CL_SUCCESS) {
723 ole2_list_delete(&node_list);
724 return ret;
725 }
726 }
727 break;
728 case 1: /* Directory */
729 ole2_listmsg("directory node\n");
730 if (dir) {
731 #if HAVE_JSON
732 if (SCAN_COLLECT_METADATA && (ctx->wrkproperty != NULL)) {
733 if (!json_object_object_get_ex(ctx->wrkproperty, "DigitalSignatures", NULL)) {
734 name = cli_ole2_get_property_name2(prop_block[idx].name, prop_block[idx].name_size);
735 if (name) {
736 if (!strcmp(name, "_xmlsignatures") || !strcmp(name, "_signatures")) {
737 cli_jsonbool(ctx->wrkproperty, "HasDigitalSignatures", 1);
738 }
739 free(name);
740 }
741 }
742 }
743 #endif
744 dirname = (char *)cli_malloc(strlen(dir) + 8);
745 if (!dirname) {
746 ole2_listmsg("OLE2: malloc failed for dirname\n");
747 ole2_list_delete(&node_list);
748 return CL_EMEM;
749 }
750 snprintf(dirname, strlen(dir) + 8, "%s" PATHSEP "%.6d", dir, curindex);
751 if (mkdir(dirname, 0700) != 0) {
752 ole2_listmsg("OLE2: mkdir failed for directory %s\n", dirname);
753 free(dirname);
754 ole2_list_delete(&node_list);
755 return CL_BREAK;
756 }
757 cli_dbgmsg("OLE2 dir entry: %s\n", dirname);
758 } else
759 dirname = NULL;
760 if ((int)(prop_block[idx].child) != -1) {
761 ret = ole2_walk_property_tree(hdr, dirname, prop_block[idx].child, handler, rec_level + 1, file_count, ctx, scansize);
762 if (ret != CL_SUCCESS) {
763 if (SCAN_ALLMATCHES && (ret == CL_VIRUS)) {
764 func_ret = ret;
765 } else {
766 ole2_list_delete(&node_list);
767 if (dirname)
768 free(dirname);
769 return ret;
770 }
771 }
772 }
773 if (dirname) {
774 free(dirname);
775 dirname = NULL;
776 }
777 if ((int)(prop_block[idx].prev) != -1) {
778 if ((ret = ole2_list_push(&node_list, prop_block[idx].prev)) != CL_SUCCESS) {
779 ole2_list_delete(&node_list);
780 return ret;
781 }
782 }
783 if ((int)(prop_block[idx].next) != -1) {
784 if ((ret = ole2_list_push(&node_list, prop_block[idx].next)) != CL_SUCCESS) {
785 ole2_list_delete(&node_list);
786 return ret;
787 }
788 }
789 break;
790 default:
791 cli_dbgmsg("ERROR: unknown OLE2 entry type: %d\n", prop_block[idx].type);
792 break;
793 }
794 ole2_listmsg("loop ended: %d %d\n", ole2_list_size(&node_list), ole2_list_is_empty(&node_list));
795 }
796 ole2_list_delete(&node_list);
797 return func_ret;
798 }
799
800 /* Write file Handler - write the contents of the entry to a file */
801 static int
handler_writefile(ole2_header_t * hdr,property_t * prop,const char * dir,cli_ctx * ctx)802 handler_writefile(ole2_header_t *hdr, property_t *prop, const char *dir, cli_ctx *ctx)
803 {
804 unsigned char *buff;
805 int32_t current_block, ofd;
806 size_t len, offset;
807 char *name, newname[1024];
808 bitset_t *blk_bitset;
809 char *hash;
810 uint32_t cnt;
811
812 UNUSEDPARAM(ctx);
813
814 if (prop->type != 2) {
815 /* Not a file */
816 return CL_SUCCESS;
817 }
818 if (prop->name_size > 64) {
819 cli_dbgmsg("OLE2 [handler_writefile]: property name too long: %d\n", prop->name_size);
820 return CL_SUCCESS;
821 }
822 name = cli_ole2_get_property_name2(prop->name, prop->name_size);
823 if (name) {
824 cli_dbgmsg("Storing %s in uniq\n", name);
825 if (CL_SUCCESS != uniq_add(hdr->U, name, strlen(name), &hash, &cnt)) {
826 free(name);
827 cli_dbgmsg("OLE2 [handler_writefile]: too many property names added to uniq store.\n");
828 return CL_BREAK;
829 }
830 } else {
831 if (CL_SUCCESS != uniq_add(hdr->U, NULL, 0, &hash, &cnt)) {
832 cli_dbgmsg("OLE2 [handler_writefile]: too many property names added to uniq store.\n");
833 return CL_BREAK;
834 }
835 }
836 snprintf(newname, sizeof(newname), "%s" PATHSEP "%s_%u", dir, hash, cnt);
837 newname[sizeof(newname) - 1] = '\0';
838 cli_dbgmsg("OLE2 [handler_writefile]: Dumping '%s' to '%s'\n", name ? name : "<empty>", newname);
839 if (name)
840 free(name);
841
842 ofd = open(newname, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR);
843 if (ofd < 0) {
844 cli_errmsg("OLE2 [handler_writefile]: failed to create file: %s\n", newname);
845 return CL_SUCCESS;
846 }
847 current_block = prop->start_block;
848 len = prop->size;
849
850 buff = (unsigned char *)cli_malloc(1 << hdr->log2_big_block_size);
851 if (!buff) {
852 cli_errmsg("OLE2 [handler_writefile]: Unable to allocate memory for buff: %u\n", 1 << hdr->log2_big_block_size);
853 close(ofd);
854 return CL_BREAK;
855 }
856 blk_bitset = cli_bitset_init();
857 if (!blk_bitset) {
858 cli_errmsg("OLE2 [handler_writefile]: init bitset failed\n");
859 close(ofd);
860 free(buff);
861 return CL_BREAK;
862 }
863 while ((current_block >= 0) && (len > 0)) {
864 if (current_block > (int32_t)hdr->max_block_no) {
865 cli_dbgmsg("OLE2 [handler_writefile]: Max block number for file size exceeded: %d\n", current_block);
866 close(ofd);
867 free(buff);
868 cli_bitset_free(blk_bitset);
869 return CL_SUCCESS;
870 }
871 /* Check we aren't in a loop */
872 if (cli_bitset_test(blk_bitset, (unsigned long)current_block)) {
873 /* Loop in block list */
874 cli_dbgmsg("OLE2 [handler_writefile]: Block list loop detected\n");
875 close(ofd);
876 free(buff);
877 cli_bitset_free(blk_bitset);
878 return CL_BREAK;
879 }
880 if (!cli_bitset_set(blk_bitset, (unsigned long)current_block)) {
881 close(ofd);
882 free(buff);
883 cli_bitset_free(blk_bitset);
884 return CL_BREAK;
885 }
886 if (prop->size < (int64_t)hdr->sbat_cutoff) {
887 /* Small block file */
888 if (!ole2_get_sbat_data_block(hdr, buff, current_block)) {
889 cli_dbgmsg("OLE2 [handler_writefile]: ole2_get_sbat_data_block failed\n");
890 close(ofd);
891 free(buff);
892 cli_bitset_free(blk_bitset);
893 return CL_SUCCESS;
894 }
895 /* buff now contains the block with N small blocks in it */
896 offset = (1 << hdr->log2_small_block_size) * (current_block % (1 << (hdr->log2_big_block_size - hdr->log2_small_block_size)));
897
898 if (cli_writen(ofd, &buff[offset], MIN(len, 1 << hdr->log2_small_block_size)) != MIN(len, 1 << hdr->log2_small_block_size)) {
899 close(ofd);
900 free(buff);
901 cli_bitset_free(blk_bitset);
902 return CL_BREAK;
903 }
904 len -= MIN(len, 1 << hdr->log2_small_block_size);
905 current_block = ole2_get_next_sbat_block(hdr, current_block);
906 } else {
907 /* Big block file */
908 if (!ole2_read_block(hdr, buff, 1 << hdr->log2_big_block_size, current_block)) {
909 close(ofd);
910 free(buff);
911 cli_bitset_free(blk_bitset);
912 return CL_SUCCESS;
913 }
914 if (cli_writen(ofd, buff, MIN(len, (1 << hdr->log2_big_block_size))) != MIN(len, (1 << hdr->log2_big_block_size))) {
915 close(ofd);
916 free(buff);
917 cli_bitset_free(blk_bitset);
918 return CL_BREAK;
919 }
920 current_block = ole2_get_next_block_number(hdr, current_block);
921 len -= MIN(len, (1 << hdr->log2_big_block_size));
922 }
923 }
924 close(ofd);
925 free(buff);
926 cli_bitset_free(blk_bitset);
927 return CL_SUCCESS;
928 }
929
930 enum biff_parser_states {
931 BIFF_PARSER_INITIAL,
932 BIFF_PARSER_EXPECTING_2ND_TAG_BYTE,
933 BIFF_PARSER_EXPECTING_1ST_LENGTH_BYTE,
934 BIFF_PARSER_EXPECTING_2ND_LENGTH_BYTE,
935 BIFF_PARSER_NAME_RECORD,
936 BIFF_PARSER_BOUNDSHEET_RECORD,
937 BIFF_PARSER_DATA,
938 };
939
940 struct biff_parser_state {
941 enum biff_parser_states state;
942 uint16_t opcode;
943 uint16_t length;
944 uint16_t data_offset;
945 uint8_t tmp;
946 };
947
948 /**
949 * Scan through a buffer of BIFF records and find PARSERNAME, BOUNDSHEET records (Which indicate XLM macros).
950 * BIFF streams follow the format OOLLDDDDDDDDD..., where OO is the opcode (little endian 16 bit value),
951 * LL is the data length (little endian 16 bit value), followed by LL bytes of data. Records are defined in
952 * the MICROSOFT OFFICE EXCEL 97-2007 BINARY FILE FORMAT SPECIFICATION.
953 *
954 * \param state The parser state.
955 * \param buff The buffer.
956 * \param len The buffer's size in bytes.
957 * \param ctx The ClamAV context for emitting JSON about the document.
958 * \returns true if a macro has been found, false otherwise.
959 */
960 static bool
scan_biff_for_xlm_macros(struct biff_parser_state * state,unsigned char * buff,size_t len,cli_ctx * ctx)961 scan_biff_for_xlm_macros(struct biff_parser_state *state, unsigned char *buff, size_t len, cli_ctx *ctx)
962 {
963 size_t i;
964 bool found_macro = false;
965
966 for (i = 0; i < len; ++i) {
967 switch (state->state) {
968 case BIFF_PARSER_INITIAL:
969 state->opcode = buff[i];
970 state->state = BIFF_PARSER_EXPECTING_2ND_TAG_BYTE;
971 break;
972 case BIFF_PARSER_EXPECTING_2ND_TAG_BYTE:
973 state->opcode |= buff[i] << 8;
974 state->state = BIFF_PARSER_EXPECTING_1ST_LENGTH_BYTE;
975 break;
976 case BIFF_PARSER_EXPECTING_1ST_LENGTH_BYTE:
977 state->length = buff[i];
978 state->state = BIFF_PARSER_EXPECTING_2ND_LENGTH_BYTE;
979 break;
980 case BIFF_PARSER_EXPECTING_2ND_LENGTH_BYTE:
981 state->length |= buff[i] << 8;
982 state->data_offset = 0;
983 switch (state->opcode) {
984 case 0x85:
985 state->state = BIFF_PARSER_BOUNDSHEET_RECORD;
986 break;
987 case 0x18:
988 state->state = BIFF_PARSER_NAME_RECORD;
989 break;
990 default:
991 state->state = BIFF_PARSER_DATA;
992 break;
993 }
994 if (state->length == 0) {
995 state->state = BIFF_PARSER_INITIAL;
996 }
997 break;
998 default:
999 switch (state->state) {
1000 case BIFF_PARSER_NAME_RECORD:
1001 #if HAVE_JSON
1002 if (state->data_offset == 0) {
1003 state->tmp = buff[i] & 0x20;
1004 } else if ((state->data_offset == 14 || state->data_offset == 15) && state->tmp) {
1005 if (buff[i] == 1 || buff[i] == 2) {
1006 if (SCAN_COLLECT_METADATA && (ctx->wrkproperty != NULL)) {
1007 json_object *indicators = cli_jsonarray(ctx->wrkproperty, "MacroIndicators");
1008 if (indicators) {
1009 cli_jsonstr(indicators, NULL, "autorun");
1010 } else {
1011 cli_dbgmsg("[scan_biff_for_xlm_macros] Failed to add \"autorun\" entry to MacroIndicators JSON array\n");
1012 }
1013 }
1014 }
1015
1016 if (buff[i] != 0) {
1017 state->tmp = 0;
1018 }
1019 }
1020 #endif
1021 break;
1022 case BIFF_PARSER_BOUNDSHEET_RECORD:
1023 if (state->data_offset == 4) {
1024 state->tmp = buff[i];
1025 } else if (state->data_offset == 5 && buff[i] == 1) { //Excel 4.0 macro sheet
1026 cli_dbgmsg("[scan_biff_for_xlm_macros] Found XLM macro sheet\n");
1027 #if HAVE_JSON
1028 if (SCAN_COLLECT_METADATA && (ctx->wrkproperty != NULL)) {
1029 cli_jsonbool(ctx->wrkproperty, "HasMacros", 1);
1030 json_object *macro_languages = cli_jsonarray(ctx->wrkproperty, "MacroLanguages");
1031 if (macro_languages) {
1032 cli_jsonstr(macro_languages, NULL, "XLM");
1033 } else {
1034 cli_dbgmsg("[scan_biff_for_xlm_macros] Failed to add \"XLM\" entry to MacroLanguages JSON array\n");
1035 }
1036 if (state->tmp == 1 || state->tmp == 2) {
1037 json_object *indicators = cli_jsonarray(ctx->wrkproperty, "MacroIndicators");
1038 if (indicators) {
1039 cli_jsonstr(indicators, NULL, "hidden");
1040 } else {
1041 cli_dbgmsg("[scan_biff_for_xlm_macros] Failed to add \"hidden\" entry to MacroIndicators JSON array\n");
1042 }
1043 }
1044 }
1045 #endif
1046 found_macro = true;
1047 }
1048 break;
1049 case BIFF_PARSER_DATA:
1050 break;
1051 default:
1052 //Should never arrive here
1053 cli_dbgmsg("[scan_biff_for_xlm_macros] Unexpected state value %d\n", (int)state->state);
1054 break;
1055 }
1056 state->data_offset += 1;
1057
1058 if (state->data_offset >= state->length) {
1059 state->state = BIFF_PARSER_INITIAL;
1060 }
1061 }
1062 }
1063 return found_macro;
1064 }
1065
1066 /**
1067 * Scan for XLM (Excel 4.0) macro sheets in an OLE2 Workbook stream.
1068 * The stream should be encoded with <= BIFF8.
1069 */
1070 static int
scan_for_xlm_macros(ole2_header_t * hdr,property_t * prop,const char * dir,cli_ctx * ctx)1071 scan_for_xlm_macros(ole2_header_t *hdr, property_t *prop, const char *dir, cli_ctx *ctx)
1072 {
1073 unsigned char *buff = NULL;
1074 int32_t current_block;
1075 size_t len, offset;
1076 bitset_t *blk_bitset = NULL;
1077 struct biff_parser_state state;
1078 bool found_macro = false;
1079
1080 UNUSEDPARAM(ctx);
1081 UNUSEDPARAM(dir);
1082
1083 if (prop->type != 2) {
1084 /* Not a file */
1085 goto done;
1086 }
1087
1088 state.state = BIFF_PARSER_INITIAL;
1089 state.length = 0;
1090 current_block = prop->start_block;
1091 len = prop->size;
1092
1093 buff = (unsigned char *)cli_malloc(1 << hdr->log2_big_block_size);
1094 if (!buff) {
1095 cli_errmsg("OLE2 [scan_for_xlm_macros]: Unable to allocate memory for buff: %u\n", 1 << hdr->log2_big_block_size);
1096 goto done;
1097 }
1098 blk_bitset = cli_bitset_init();
1099 if (!blk_bitset) {
1100 cli_errmsg("OLE2 [scan_for_xlm_macros]: init bitset failed\n");
1101 goto done;
1102 }
1103 while ((current_block >= 0) && (len > 0)) {
1104 if (current_block > (int32_t)hdr->max_block_no) {
1105 cli_dbgmsg("OLE2 [scan_for_xlm_macros]: Max block number for file size exceeded: %d\n", current_block);
1106 goto done;
1107 }
1108 /* Check we aren't in a loop */
1109 if (cli_bitset_test(blk_bitset, (unsigned long)current_block)) {
1110 /* Loop in block list */
1111 cli_dbgmsg("OLE2 [scan_for_xlm_macros]: Block list loop detected\n");
1112 goto done;
1113 }
1114 if (!cli_bitset_set(blk_bitset, (unsigned long)current_block)) {
1115 goto done;
1116 }
1117 if (prop->size < (int64_t)hdr->sbat_cutoff) {
1118 /* Small block file */
1119 if (!ole2_get_sbat_data_block(hdr, buff, current_block)) {
1120 cli_dbgmsg("OLE2 [scan_for_xlm_macros]: ole2_get_sbat_data_block failed\n");
1121 goto done;
1122 }
1123 /* buff now contains the block with N small blocks in it */
1124 offset = (1 << hdr->log2_small_block_size) * (current_block % (1 << (hdr->log2_big_block_size - hdr->log2_small_block_size)));
1125
1126 found_macro = scan_biff_for_xlm_macros(&state, &buff[offset], MIN(len, 1 << hdr->log2_small_block_size), ctx) || found_macro;
1127 len -= MIN(len, 1 << hdr->log2_small_block_size);
1128 current_block = ole2_get_next_sbat_block(hdr, current_block);
1129 } else {
1130 /* Big block file */
1131 if (!ole2_read_block(hdr, buff, 1 << hdr->log2_big_block_size, current_block)) {
1132 goto done;
1133 }
1134
1135 found_macro = scan_biff_for_xlm_macros(&state, buff, MIN(len, (1 << hdr->log2_big_block_size)), ctx) || found_macro;
1136 current_block = ole2_get_next_block_number(hdr, current_block);
1137 len -= MIN(len, (1 << hdr->log2_big_block_size));
1138 }
1139 }
1140
1141 done:
1142 if (buff) {
1143 free(buff);
1144 }
1145 if (blk_bitset) {
1146 cli_bitset_free(blk_bitset);
1147 }
1148 return found_macro;
1149 }
1150
1151 /* enum file Handler - checks for VBA presence */
1152 static int
handler_enum(ole2_header_t * hdr,property_t * prop,const char * dir,cli_ctx * ctx)1153 handler_enum(ole2_header_t *hdr, property_t *prop, const char *dir, cli_ctx *ctx)
1154 {
1155 char *name = NULL;
1156 unsigned char *hwp_check;
1157 int32_t offset;
1158 int ret = CL_SUCCESS;
1159 #if HAVE_JSON
1160 json_object *arrobj, *strmobj;
1161
1162 name = cli_ole2_get_property_name2(prop->name, prop->name_size);
1163 if (name) {
1164 if (SCAN_COLLECT_METADATA && ctx->wrkproperty != NULL) {
1165 arrobj = cli_jsonarray(ctx->wrkproperty, "Streams");
1166 if (NULL == arrobj) {
1167 cli_warnmsg("ole2: no memory for streams list or streams is not an array\n");
1168 } else {
1169 strmobj = json_object_new_string(name);
1170 json_object_array_add(arrobj, strmobj);
1171 }
1172
1173 if (!strcmp(name, "powerpoint document")) {
1174 cli_jsonstr(ctx->wrkproperty, "FileType", "CL_TYPE_MSPPT");
1175 }
1176 if (!strcmp(name, "worddocument")) {
1177 cli_jsonstr(ctx->wrkproperty, "FileType", "CL_TYPE_MSWORD");
1178 }
1179 if (!strcmp(name, "workbook")) {
1180 cli_jsonstr(ctx->wrkproperty, "FileType", "CL_TYPE_MSXL");
1181 }
1182 }
1183 }
1184 #else
1185 UNUSEDPARAM(ctx);
1186 #endif
1187 UNUSEDPARAM(dir);
1188
1189 if (!hdr->has_vba) {
1190 if (!name)
1191 name = cli_ole2_get_property_name2(prop->name, prop->name_size);
1192 if (name) {
1193 if (!strcmp(name, "_vba_project") || !strcmp(name, "powerpoint document") || !strcmp(name, "worddocument") || !strcmp(name, "_1_ole10native"))
1194 hdr->has_vba = 1;
1195 }
1196 }
1197
1198 /*
1199 * if we can find a root entry fileheader, it may be a HWP file
1200 * identify the HWP signature "HWP Document File" at offset 0 stream
1201 */
1202 if (!hdr->is_hwp) {
1203 if (!name)
1204 name = cli_ole2_get_property_name2(prop->name, prop->name_size);
1205 if (name) {
1206 if (!strcmp(name, "fileheader")) {
1207 hwp_check = (unsigned char *)cli_calloc(1, 1 << hdr->log2_big_block_size);
1208 if (!hwp_check) {
1209 free(name);
1210 return CL_EMEM;
1211 }
1212
1213 /* reading safety checks; do-while used for breaks */
1214 do {
1215 if (prop->size == 0)
1216 break;
1217
1218 if (prop->start_block > hdr->max_block_no)
1219 break;
1220
1221 /* read the header block (~256 bytes) */
1222 offset = 0;
1223 if (prop->size < (int64_t)hdr->sbat_cutoff) {
1224 if (!ole2_get_sbat_data_block(hdr, hwp_check, prop->start_block)) {
1225 ret = CL_EREAD;
1226 break;
1227 }
1228 offset = (1 << hdr->log2_small_block_size) *
1229 (prop->start_block % (1 << (hdr->log2_big_block_size - hdr->log2_small_block_size)));
1230
1231 /* reading safety */
1232 if (offset + 40 >= 1 << hdr->log2_big_block_size)
1233 break;
1234 } else {
1235 if (!ole2_read_block(hdr, hwp_check, 1 << hdr->log2_big_block_size, prop->start_block)) {
1236 ret = CL_EREAD;
1237 break;
1238 }
1239 }
1240
1241 /* compare against HWP signature; we could add the 15 padding NULLs too */
1242 if (!memcmp(hwp_check + offset, "HWP Document File", 17)) {
1243 hwp5_header_t *hwp_new;
1244 #if HAVE_JSON
1245 cli_jsonstr(ctx->wrkproperty, "FileType", "CL_TYPE_HWP5");
1246 #endif
1247 hwp_new = cli_calloc(1, sizeof(hwp5_header_t));
1248 if (!(hwp_new)) {
1249 ret = CL_EMEM;
1250 break;
1251 }
1252
1253 memcpy(hwp_new, hwp_check + offset, sizeof(hwp5_header_t));
1254
1255 hwp_new->version = ole2_endian_convert_32(hwp_new->version);
1256 hwp_new->flags = ole2_endian_convert_32(hwp_new->flags);
1257
1258 hdr->is_hwp = hwp_new;
1259 }
1260 } while (0);
1261
1262 free(hwp_check);
1263 }
1264 }
1265 }
1266
1267 if (!hdr->has_xlm) {
1268 if (!name) {
1269 name = cli_ole2_get_property_name2(prop->name, prop->name_size);
1270 }
1271
1272 if (name && (strcmp(name, "workbook") == 0 || strcmp(name, "book") == 0)) {
1273 hdr->has_xlm = scan_for_xlm_macros(hdr, prop, dir, ctx);
1274 }
1275 }
1276
1277 if (name)
1278 free(name);
1279 return ret;
1280 }
1281
1282 static int
likely_mso_stream(int fd)1283 likely_mso_stream(int fd)
1284 {
1285 off_t fsize;
1286 unsigned char check[2];
1287
1288 fsize = lseek(fd, 0, SEEK_END);
1289 if (fsize == -1) {
1290 cli_dbgmsg("likely_mso_stream: call to lseek() failed\n");
1291 return 0;
1292 } else if (fsize < 6) {
1293 return 0;
1294 }
1295
1296 if (lseek(fd, 4, SEEK_SET) == -1) {
1297 cli_dbgmsg("likely_mso_stream: call to lseek() failed\n");
1298 return 0;
1299 }
1300
1301 if (cli_readn(fd, check, 2) != 2) {
1302 cli_dbgmsg("likely_mso_stream: reading from fd failed\n");
1303 return 0;
1304 }
1305
1306 if (check[0] == 0x78 && check[1] == 0x9C)
1307 return 1;
1308
1309 return 0;
1310 }
1311
scan_mso_stream(int fd,cli_ctx * ctx)1312 static cl_error_t scan_mso_stream(int fd, cli_ctx *ctx)
1313 {
1314 int zret, ofd;
1315 cl_error_t ret = CL_SUCCESS;
1316 fmap_t *input;
1317 off_t off_in = 0;
1318 size_t count, outsize = 0;
1319 z_stream zstrm;
1320 char *tmpname;
1321 uint32_t prefix;
1322 unsigned char inbuf[FILEBUFF], outbuf[FILEBUFF];
1323
1324 /* fmap the input file for easier manipulation */
1325 if (fd < 0) {
1326 cli_dbgmsg("scan_mso_stream: Invalid file descriptor argument\n");
1327 return CL_ENULLARG;
1328 } else {
1329 STATBUF statbuf;
1330
1331 if (FSTAT(fd, &statbuf) == -1) {
1332 cli_dbgmsg("scan_mso_stream: Can't stat file descriptor\n");
1333 return CL_ESTAT;
1334 }
1335
1336 input = fmap(fd, 0, statbuf.st_size, NULL);
1337 if (!input) {
1338 cli_dbgmsg("scan_mso_stream: Failed to get fmap for input stream\n");
1339 return CL_EMAP;
1340 }
1341 }
1342
1343 /* reserve tempfile for output and scanning */
1344 if ((ret = cli_gentempfd(ctx->sub_tmpdir, &tmpname, &ofd)) != CL_SUCCESS) {
1345 cli_errmsg("scan_mso_stream: Can't generate temporary file\n");
1346 funmap(input);
1347 return ret;
1348 }
1349
1350 /* initialize zlib inflation stream */
1351 memset(&zstrm, 0, sizeof(zstrm));
1352 zstrm.zalloc = Z_NULL;
1353 zstrm.zfree = Z_NULL;
1354 zstrm.opaque = Z_NULL;
1355 zstrm.next_in = inbuf;
1356 zstrm.next_out = outbuf;
1357 zstrm.avail_in = 0;
1358 zstrm.avail_out = FILEBUFF;
1359
1360 zret = inflateInit(&zstrm);
1361 if (zret != Z_OK) {
1362 cli_dbgmsg("scan_mso_stream: Can't initialize zlib inflation stream\n");
1363 ret = CL_EUNPACK;
1364 goto mso_end;
1365 }
1366
1367 /* extract 32-bit prefix */
1368 if (fmap_readn(input, &prefix, off_in, sizeof(prefix)) != sizeof(prefix)) {
1369 cli_dbgmsg("scan_mso_stream: Can't extract 4-byte prefix\n");
1370 ret = CL_EREAD;
1371 goto mso_end;
1372 }
1373
1374 /* RFC1952 says numbers are stored with least significant byte first */
1375 prefix = le32_to_host(prefix);
1376
1377 off_in += sizeof(uint32_t);
1378 cli_dbgmsg("scan_mso_stream: stream prefix = %08x(%d)\n", prefix, prefix);
1379
1380 /* inflation loop */
1381 do {
1382 if (zstrm.avail_in == 0) {
1383 size_t bytes_read;
1384
1385 zstrm.next_in = inbuf;
1386 bytes_read = fmap_readn(input, inbuf, off_in, FILEBUFF);
1387 if (bytes_read == (size_t)-1) {
1388 cli_errmsg("scan_mso_stream: Error reading MSO file\n");
1389 ret = CL_EUNPACK;
1390 goto mso_end;
1391 }
1392 if (bytes_read == 0)
1393 break;
1394
1395 zstrm.avail_in = bytes_read;
1396 off_in += bytes_read;
1397 }
1398 zret = inflate(&zstrm, Z_SYNC_FLUSH);
1399 count = FILEBUFF - zstrm.avail_out;
1400 if (count) {
1401 if (cli_checklimits("MSO", ctx, outsize + count, 0, 0) != CL_SUCCESS)
1402 break;
1403 if (cli_writen(ofd, outbuf, count) != count) {
1404 cli_errmsg("scan_mso_stream: Can't write to file %s\n", tmpname);
1405 ret = CL_EWRITE;
1406 goto mso_end;
1407 }
1408 outsize += count;
1409 }
1410 zstrm.next_out = outbuf;
1411 zstrm.avail_out = FILEBUFF;
1412 } while (zret == Z_OK);
1413
1414 /* post inflation checks */
1415 if (zret != Z_STREAM_END && zret != Z_OK) {
1416 if (outsize == 0) {
1417 cli_infomsg(ctx, "scan_mso_stream: Error decompressing MSO file. No data decompressed.\n");
1418 ret = CL_EUNPACK;
1419 goto mso_end;
1420 }
1421
1422 cli_infomsg(ctx, "scan_mso_stream: Error decompressing MSO file. Scanning what was decompressed.\n");
1423 }
1424 cli_dbgmsg("scan_mso_stream: Decompressed %llu bytes to %s\n", (long long unsigned)outsize, tmpname);
1425
1426 if (outsize != prefix) {
1427 cli_warnmsg("scan_mso_stream: declared prefix != inflated stream size, %llu != %llu\n",
1428 (long long unsigned)prefix, (long long unsigned)outsize);
1429 } else {
1430 cli_dbgmsg("scan_mso_stream: declared prefix == inflated stream size, %llu == %llu\n",
1431 (long long unsigned)prefix, (long long unsigned)outsize);
1432 }
1433
1434 /* scanning inflated stream */
1435 ret = cli_magic_scan_desc(ofd, tmpname, ctx, NULL);
1436
1437 /* clean-up */
1438 mso_end:
1439 zret = inflateEnd(&zstrm);
1440 if (zret != Z_OK)
1441 ret = CL_EUNPACK;
1442 close(ofd);
1443 if (!ctx->engine->keeptmp)
1444 if (cli_unlink(tmpname))
1445 ret = CL_EUNLINK;
1446 free(tmpname);
1447 funmap(input);
1448 return ret;
1449 }
1450
1451 static int
handler_otf(ole2_header_t * hdr,property_t * prop,const char * dir,cli_ctx * ctx)1452 handler_otf(ole2_header_t *hdr, property_t *prop, const char *dir, cli_ctx *ctx)
1453 {
1454 char *tempfile, *name = NULL;
1455 unsigned char *buff;
1456 int32_t current_block;
1457 size_t len, offset;
1458 int ofd, is_mso, ret;
1459 bitset_t *blk_bitset;
1460
1461 UNUSEDPARAM(dir);
1462
1463 if (prop->type != 2) {
1464 /* Not a file */
1465 return CL_SUCCESS;
1466 }
1467 print_ole2_property(prop);
1468
1469 if (!(tempfile = cli_gentemp(ctx ? ctx->sub_tmpdir : NULL)))
1470 return CL_EMEM;
1471
1472 if ((ofd = open(tempfile, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR)) < 0) {
1473 cli_dbgmsg("OLE2: Can't create file %s\n", tempfile);
1474 free(tempfile);
1475 return CL_ECREAT;
1476 }
1477 current_block = prop->start_block;
1478 len = prop->size;
1479
1480 if (cli_debug_flag) {
1481 if (!name)
1482 name = cli_ole2_get_property_name2(prop->name, prop->name_size);
1483 cli_dbgmsg("OLE2 [handler_otf]: Dumping '%s' to '%s'\n", name, tempfile);
1484 }
1485
1486 buff = (unsigned char *)cli_malloc(1 << hdr->log2_big_block_size);
1487 if (!buff) {
1488 close(ofd);
1489 if (name)
1490 free(name);
1491 cli_unlink(tempfile);
1492 free(tempfile);
1493 return CL_EMEM;
1494 }
1495 blk_bitset = cli_bitset_init();
1496
1497 if (!blk_bitset) {
1498 cli_errmsg("OLE2: OTF handler init bitset failed\n");
1499 free(buff);
1500 close(ofd);
1501 if (name)
1502 free(name);
1503 if (cli_unlink(tempfile)) {
1504 free(tempfile);
1505 return CL_EUNLINK;
1506 }
1507 free(tempfile);
1508 return CL_BREAK;
1509 }
1510 while ((current_block >= 0) && (len > 0)) {
1511 if (current_block > (int32_t)hdr->max_block_no) {
1512 cli_dbgmsg("OLE2: Max block number for file size exceeded: %d\n", current_block);
1513 break;
1514 }
1515 /* Check we aren't in a loop */
1516 if (cli_bitset_test(blk_bitset, (unsigned long)current_block)) {
1517 /* Loop in block list */
1518 cli_dbgmsg("OLE2: Block list loop detected\n");
1519 break;
1520 }
1521 if (!cli_bitset_set(blk_bitset, (unsigned long)current_block)) {
1522 break;
1523 }
1524 if (prop->size < (int64_t)hdr->sbat_cutoff) {
1525 /* Small block file */
1526 if (!ole2_get_sbat_data_block(hdr, buff, current_block)) {
1527 cli_dbgmsg("ole2_get_sbat_data_block failed\n");
1528 break;
1529 }
1530 /* buff now contains the block with N small blocks in it */
1531 offset = (1 << hdr->log2_small_block_size) * (current_block % (1 << (hdr->log2_big_block_size - hdr->log2_small_block_size)));
1532 if (cli_writen(ofd, &buff[offset], MIN(len, 1 << hdr->log2_small_block_size)) != MIN(len, 1 << hdr->log2_small_block_size)) {
1533 close(ofd);
1534 if (name)
1535 free(name);
1536 free(buff);
1537 cli_bitset_free(blk_bitset);
1538 if (cli_unlink(tempfile)) {
1539 free(tempfile);
1540 return CL_EUNLINK;
1541 }
1542 free(tempfile);
1543 return CL_BREAK;
1544 }
1545 len -= MIN(len, 1 << hdr->log2_small_block_size);
1546 current_block = ole2_get_next_sbat_block(hdr, current_block);
1547 } else {
1548 /* Big block file */
1549 if (!ole2_read_block(hdr, buff, 1 << hdr->log2_big_block_size, current_block)) {
1550 break;
1551 }
1552 if (cli_writen(ofd, buff, MIN(len, (1 << hdr->log2_big_block_size))) != MIN(len, (1 << hdr->log2_big_block_size))) {
1553 close(ofd);
1554 if (name)
1555 free(name);
1556 free(buff);
1557 cli_bitset_free(blk_bitset);
1558 if (cli_unlink(tempfile)) {
1559 free(tempfile);
1560 return CL_EUNLINK;
1561 }
1562 free(tempfile);
1563 return CL_EWRITE;
1564 }
1565 current_block = ole2_get_next_block_number(hdr, current_block);
1566 len -= MIN(len, (1 << hdr->log2_big_block_size));
1567 }
1568 }
1569
1570 /* defragmenting of ole2 stream complete */
1571
1572 is_mso = likely_mso_stream(ofd);
1573 if (lseek(ofd, 0, SEEK_SET) == -1) {
1574 close(ofd);
1575 if (name)
1576 free(name);
1577 if (ctx && !(ctx->engine->keeptmp))
1578 cli_unlink(tempfile);
1579
1580 free(tempfile);
1581 free(buff);
1582 cli_bitset_free(blk_bitset);
1583 return CL_ESEEK;
1584 }
1585
1586 #if HAVE_JSON
1587 /* JSON Output Summary Information */
1588 if (SCAN_COLLECT_METADATA && (ctx->properties != NULL)) {
1589 if (!name)
1590 name = cli_ole2_get_property_name2(prop->name, prop->name_size);
1591 if (name) {
1592 if (!strncmp(name, "_5_summaryinformation", 21)) {
1593 cli_dbgmsg("OLE2: detected a '_5_summaryinformation' stream\n");
1594 /* JSONOLE2 - what to do if something breaks? */
1595 if (cli_ole2_summary_json(ctx, ofd, 0) == CL_ETIMEOUT) {
1596 free(name);
1597 close(ofd);
1598 if (ctx && !(ctx->engine->keeptmp))
1599 cli_unlink(tempfile);
1600
1601 free(tempfile);
1602 free(buff);
1603 cli_bitset_free(blk_bitset);
1604 return CL_ETIMEOUT;
1605 }
1606 }
1607 if (!strncmp(name, "_5_documentsummaryinformation", 29)) {
1608 cli_dbgmsg("OLE2: detected a '_5_documentsummaryinformation' stream\n");
1609 /* JSONOLE2 - what to do if something breaks? */
1610 if (cli_ole2_summary_json(ctx, ofd, 1) == CL_ETIMEOUT) {
1611 free(name);
1612 close(ofd);
1613 if (ctx && !(ctx->engine->keeptmp))
1614 cli_unlink(tempfile);
1615
1616 free(tempfile);
1617 free(buff);
1618 cli_bitset_free(blk_bitset);
1619 return CL_ETIMEOUT;
1620 }
1621 }
1622 }
1623 }
1624 #endif
1625
1626 if (hdr->is_hwp) {
1627 if (!name)
1628 name = cli_ole2_get_property_name2(prop->name, prop->name_size);
1629 ret = cli_scanhwp5_stream(ctx, hdr->is_hwp, name, ofd, tempfile);
1630 } else if (is_mso < 0) {
1631 ret = CL_ESEEK;
1632 } else if (is_mso) {
1633 /* MSO Stream Scan */
1634 ret = scan_mso_stream(ofd, ctx);
1635 } else {
1636 /* Normal File Scan */
1637 ret = cli_magic_scan_desc(ofd, tempfile, ctx, NULL);
1638 }
1639 if (name)
1640 free(name);
1641 close(ofd);
1642 free(buff);
1643 cli_bitset_free(blk_bitset);
1644 if (ctx && !ctx->engine->keeptmp) {
1645 if (cli_unlink(tempfile)) {
1646 free(tempfile);
1647 return CL_EUNLINK;
1648 }
1649 }
1650 free(tempfile);
1651 return ret == CL_VIRUS ? CL_VIRUS : CL_SUCCESS;
1652 }
1653
1654 #if !defined(HAVE_ATTRIB_PACKED) && !defined(HAVE_PRAGMA_PACK) && !defined(HAVE_PRAGMA_PACK_HPPA)
1655 static int
ole2_read_header(int fd,ole2_header_t * hdr)1656 ole2_read_header(int fd, ole2_header_t *hdr)
1657 {
1658 int i;
1659
1660 if (cli_readn(fd, &hdr->magic, 8) != 8) {
1661 return FALSE;
1662 }
1663 if (cli_readn(fd, &hdr->clsid, 16) != 16) {
1664 return FALSE;
1665 }
1666 if (cli_readn(fd, &hdr->minor_version, 2) != 2) {
1667 return FALSE;
1668 }
1669 if (cli_readn(fd, &hdr->dll_version, 2) != 2) {
1670 return FALSE;
1671 }
1672 if (cli_readn(fd, &hdr->byte_order, 2) != 2) {
1673 return FALSE;
1674 }
1675 if (cli_readn(fd, &hdr->log2_big_block_size, 2) != 2) {
1676 return FALSE;
1677 }
1678 if (cli_readn(fd, &hdr->log2_small_block_size, 4) != 4) {
1679 return FALSE;
1680 }
1681 if (cli_readn(fd, &hdr->reserved, 8) != 8) {
1682 return FALSE;
1683 }
1684 if (cli_readn(fd, &hdr->bat_count, 4) != 4) {
1685 return FALSE;
1686 }
1687 if (cli_readn(fd, &hdr->prop_start, 4) != 4) {
1688 return FALSE;
1689 }
1690 if (cli_readn(fd, &hdr->signature, 4) != 4) {
1691 return FALSE;
1692 }
1693 if (cli_readn(fd, &hdr->sbat_cutoff, 4) != 4) {
1694 return FALSE;
1695 }
1696 if (cli_readn(fd, &hdr->sbat_start, 4) != 4) {
1697 return FALSE;
1698 }
1699 if (cli_readn(fd, &hdr->sbat_block_count, 4) != 4) {
1700 return FALSE;
1701 }
1702 if (cli_readn(fd, &hdr->xbat_start, 4) != 4) {
1703 return FALSE;
1704 }
1705 if (cli_readn(fd, &hdr->xbat_count, 4) != 4) {
1706 return FALSE;
1707 }
1708 for (i = 0; i < 109; i++) {
1709 if (cli_readn(fd, &hdr->bat_array[i], 4) != 4) {
1710 return FALSE;
1711 }
1712 }
1713 return TRUE;
1714 }
1715 #endif
1716
cli_ole2_extract(const char * dirname,cli_ctx * ctx,struct uniq ** files,int * has_vba,int * has_xlm)1717 int cli_ole2_extract(const char *dirname, cli_ctx *ctx, struct uniq **files, int *has_vba, int *has_xlm)
1718 {
1719 ole2_header_t hdr;
1720 int ret = CL_CLEAN;
1721 size_t hdr_size;
1722 unsigned int file_count = 0;
1723 unsigned long scansize, scansize2;
1724 const void *phdr;
1725
1726 cli_dbgmsg("in cli_ole2_extract()\n");
1727 if (!ctx)
1728 return CL_ENULLARG;
1729
1730 hdr.is_hwp = NULL;
1731 hdr.bitset = NULL;
1732 if (ctx->engine->maxscansize) {
1733 if (ctx->engine->maxscansize > ctx->scansize)
1734 scansize = ctx->engine->maxscansize - ctx->scansize;
1735 else
1736 return CL_EMAXSIZE;
1737 } else
1738 scansize = -1;
1739
1740 scansize2 = scansize;
1741
1742 /* size of header - size of other values in struct */
1743 hdr_size = sizeof(struct ole2_header_tag) - sizeof(int32_t) - sizeof(uint32_t) -
1744 sizeof(off_t) - sizeof(bitset_t *) -
1745 sizeof(struct uniq *) - sizeof(fmap_t *) - sizeof(int) - sizeof(hwp5_header_t *);
1746
1747 if ((size_t)(ctx->fmap->len) < (size_t)(hdr_size)) {
1748 return CL_CLEAN;
1749 }
1750 hdr.map = ctx->fmap;
1751 hdr.m_length = hdr.map->len;
1752 phdr = fmap_need_off_once(hdr.map, 0, hdr_size);
1753 if (phdr) {
1754 memcpy(&hdr, phdr, hdr_size);
1755 } else {
1756 cli_dbgmsg("cli_ole2_extract: failed to read header\n");
1757 goto abort;
1758 }
1759
1760 hdr.minor_version = ole2_endian_convert_16(hdr.minor_version);
1761 hdr.dll_version = ole2_endian_convert_16(hdr.dll_version);
1762 hdr.byte_order = ole2_endian_convert_16(hdr.byte_order);
1763 hdr.log2_big_block_size = ole2_endian_convert_16(hdr.log2_big_block_size);
1764 hdr.log2_small_block_size = ole2_endian_convert_32(hdr.log2_small_block_size);
1765 hdr.bat_count = ole2_endian_convert_32(hdr.bat_count);
1766 hdr.prop_start = ole2_endian_convert_32(hdr.prop_start);
1767 hdr.sbat_cutoff = ole2_endian_convert_32(hdr.sbat_cutoff);
1768 hdr.sbat_start = ole2_endian_convert_32(hdr.sbat_start);
1769 hdr.sbat_block_count = ole2_endian_convert_32(hdr.sbat_block_count);
1770 hdr.xbat_start = ole2_endian_convert_32(hdr.xbat_start);
1771 hdr.xbat_count = ole2_endian_convert_32(hdr.xbat_count);
1772
1773 hdr.sbat_root_start = -1;
1774
1775 hdr.bitset = cli_bitset_init();
1776 if (!hdr.bitset) {
1777 ret = CL_EMEM;
1778 goto abort;
1779 }
1780 if (memcmp(hdr.magic, magic_id, 8) != 0) {
1781 cli_dbgmsg("OLE2 magic failed!\n");
1782 ret = CL_EFORMAT;
1783 goto abort;
1784 }
1785 if (hdr.log2_big_block_size < 6 || hdr.log2_big_block_size > 30) {
1786 cli_dbgmsg("CAN'T PARSE: Invalid big block size (2^%u)\n", hdr.log2_big_block_size);
1787 goto abort;
1788 }
1789 if (!hdr.log2_small_block_size || hdr.log2_small_block_size > hdr.log2_big_block_size) {
1790 cli_dbgmsg("CAN'T PARSE: Invalid small block size (2^%u)\n", hdr.log2_small_block_size);
1791 goto abort;
1792 }
1793 if (hdr.sbat_cutoff != 4096) {
1794 cli_dbgmsg("WARNING: Untested sbat cutoff (%u); data may not extract correctly\n", hdr.sbat_cutoff);
1795 }
1796
1797 if (hdr.map->len > INT32_MAX) {
1798 cli_dbgmsg("OLE2 extract: Overflow detected\n");
1799 ret = CL_EFORMAT;
1800 goto abort;
1801 }
1802 /* 8 SBAT blocks per file block */
1803 hdr.max_block_no = (hdr.map->len - MAX(512, 1 << hdr.log2_big_block_size)) / (1 << hdr.log2_small_block_size);
1804
1805 print_ole2_header(&hdr);
1806 cli_dbgmsg("Max block number: %lu\n", (unsigned long int)hdr.max_block_no);
1807
1808 /* PASS 1 : Count files and check for VBA */
1809 hdr.has_vba = 0;
1810 hdr.has_xlm = 0;
1811 ret = ole2_walk_property_tree(&hdr, NULL, 0, handler_enum, 0, &file_count, ctx, &scansize);
1812 cli_bitset_free(hdr.bitset);
1813 hdr.bitset = NULL;
1814 if (!file_count || !(hdr.bitset = cli_bitset_init()))
1815 goto abort;
1816
1817 if (hdr.is_hwp) {
1818 cli_dbgmsg("OLE2: identified HWP document\n");
1819 cli_dbgmsg("OLE2: HWP signature: %.17s\n", hdr.is_hwp->signature);
1820 cli_dbgmsg("OLE2: HWP version: 0x%08x\n", hdr.is_hwp->version);
1821 cli_dbgmsg("OLE2: HWP flags: 0x%08x\n", hdr.is_hwp->flags);
1822
1823 ret = cli_hwp5header(ctx, hdr.is_hwp);
1824 if (ret != CL_SUCCESS)
1825 goto abort;
1826 }
1827
1828 /* If there's no VBA we scan OTF */
1829 if (hdr.has_vba || hdr.has_xlm) {
1830 /* PASS 2/A : VBA scan */
1831 cli_dbgmsg("OLE2: VBA project found\n");
1832 if (!(hdr.U = uniq_init(file_count))) {
1833 cli_dbgmsg("OLE2: uniq_init() failed\n");
1834 ret = CL_EMEM;
1835 goto abort;
1836 }
1837 file_count = 0;
1838 ole2_walk_property_tree(&hdr, dirname, 0, handler_writefile, 0, &file_count, ctx, &scansize2);
1839 ret = CL_CLEAN;
1840 *files = hdr.U;
1841 if (has_vba) {
1842 *has_vba = hdr.has_vba;
1843 }
1844 if (has_xlm) {
1845 *has_xlm = hdr.has_xlm;
1846 }
1847 } else {
1848 cli_dbgmsg("OLE2: no VBA projects found\n");
1849 /* PASS 2/B : OTF scan */
1850 file_count = 0;
1851 ret = ole2_walk_property_tree(&hdr, NULL, 0, handler_otf, 0, &file_count, ctx, &scansize2);
1852 }
1853
1854 abort:
1855 if (hdr.bitset)
1856 cli_bitset_free(hdr.bitset);
1857
1858 if (hdr.is_hwp)
1859 free(hdr.is_hwp);
1860
1861 return ret == CL_BREAK ? CL_CLEAN : ret;
1862 }
1863