1 //--------------------------------------------------------------------------
2 // Copyright (C) 2021 Cisco and/or its affiliates. All rights reserved.
3 //
4 // This program is free software; you can redistribute it and/or modify it
5 // under the terms of the GNU General Public License Version 2 as published
6 // by the Free Software Foundation.  You may not use, modify or distribute
7 // this program under any other version of the GNU General Public License.
8 //
9 // This program is distributed in the hope that it will be useful, but
10 // WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 // General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License along
15 // with this program; if not, write to the Free Software Foundation, Inc.,
16 // 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 //--------------------------------------------------------------------------
18 
19 // file_olefile.cc author Vigneshwari Viswanathan vignvisw@cisco.com
20 
21 #ifdef HAVE_CONFIG_H
22 #include "config.h"
23 #endif
24 
25 #include "file_olefile.h"
26 
~DirectoryList()27 DirectoryList :: ~DirectoryList()
28 {
29     std::unordered_map<char*, FileProperty*>::iterator it = oleentry.begin();
30 
31     while (it != oleentry.end())
32     {
33         FileProperty* node =  it->second;
34         delete[] node->get_name();
35         delete node;
36         it = oleentry.erase(it);
37     }
38 }
39 
40 // The function walk_directory_list() will read the entries of all the directory entry
41 // arrays of an ole file and will create a mapping between the storage/stream name and
42 // the fileproperty object. Each entry of directory entry array will be of 64 bytes.
43 //
44 // The first directory entry array value is obtained from the ole header. The subsequent
45 // sectors will be obtained by referring the fat list array.
46 //
47 // Each object of fileproperty will give us the information about the starting sector of
48 // that storage/stream, overall size of the stream/storage and other metadata.
49 
50 // The content of any storage/stream is read by combining all the sectors of that stream/
51 // storage and it will begin with starting sector value mentioned in fileproperty object.
52 // Also, this starting sector value can be used to obtain the next sector to read by
53 // referring the FAT list array.
walk_directory_list()54 void OleFile :: walk_directory_list()
55 {
56     int32_t current_sector;
57     uint16_t sector_size;
58     uint8_t* name_buf;
59     int bytes_copied;
60     FileProperty* node;
61     char* file_name;
62 
63     VBA_DEBUG(vba_data_trace, DEFAULT_TRACE_OPTION_ID, TRACE_INFO_LEVEL, CURRENT_PACKET,
64         "Parsing the Directory list.\n");
65 
66     current_sector = header->get_first_dir();
67     sector_size = header->get_sector_size();
68 
69     dir_list = new DirectoryList();
70 
71     while (current_sector > INVALID_SECTOR)
72     {
73         const uint8_t* buf = file_buf;
74         uint32_t start_offset = get_fat_offset(current_sector);
75 
76         if ((start_offset + sector_size) > buf_len)
77             return;
78 
79         buf += start_offset;
80 
81         int32_t count = 0;
82 
83         while (count < (sector_size/DIR_ENTRY_SIZE))
84         {
85             node = new FileProperty;
86             name_buf = new uint8_t[32];
87 
88             // The filename is UTF16 encoded and will be of the size 64 bytes.
89             dir_list->utf_state = new snort::UtfDecodeSession();
90             if (!header->get_byte_order())
91                 dir_list->utf_state->set_decode_utf_state_charset(CHARSET_UTF16LE);
92             else
93                 dir_list->utf_state->set_decode_utf_state_charset(CHARSET_UTF16BE);
94             dir_list->utf_state->decode_utf(buf, OLE_MAX_FILENAME_LEN_UTF16, name_buf,
95                 OLE_MAX_FILENAME_ASCII, &bytes_copied);
96 
97             node->set_name(name_buf);
98 
99             node->set_file_type(buf + DIR_FILE_TYPE_OFFSET);
100 
101             node->set_color(buf + DIR_COLOR_OFFSET);
102 
103             node->set_lef_sib_id(buf + DIR_LEFT_SIB_OFFSET, header->get_byte_order());
104 
105             node->set_rig_sib_id(buf + DIR_RIGHT_SIB_OFFSET, header->get_byte_order());
106 
107             node->set_root_node_id(buf + DIR_ROOT_NODE_OFFSET, header->get_byte_order());
108 
109             node->set_cls_id(buf + DIR_CLS_ID_OFFSET);
110 
111             node->set_starting_sector(buf + DIR_STARTING_SEC_OFFSET, header->get_byte_order());
112 
113             node->set_stream_size(buf + DIR_STREAM_SIZE_OFFSET, header->get_byte_order());
114 
115             buf += DIR_NEXT_ENTR_OFFSET;
116 
117             //Insert the oleentry
118             file_name = (char*)name_buf;
119 
120             if (strcmp(file_name, ROOT_ENTRY) == 0)
121                 dir_list->set_mini_stream_sector(node->get_starting_sector());
122             object_type type = node->get_file_type();
123             // check for all the empty/non valid entries in the directory list.
124             if (!(type == ROOT_STORAGE or type == STORAGE or type == STREAM))
125             {
126                 delete node;
127                 delete[] name_buf;
128             }
129             else
130                 dir_list->oleentry.insert({ file_name, node });
131             count++;
132             delete dir_list->utf_state;
133         }
134         // Reading the next sector of current_sector by referring the FAT list array.
135         // A negative number suggests the end of directory entry array and there are
136         // no more stream/storage to read.
137         int32_t next_sector = get_next_fat_sector(current_sector);
138         if (next_sector > INVALID_SECTOR)
139             current_sector = next_sector;
140         else
141             current_sector = INVALID_SECTOR;
142     }
143     VBA_DEBUG(vba_data_trace, DEFAULT_TRACE_OPTION_ID, TRACE_INFO_LEVEL, CURRENT_PACKET,
144         "End of Directory list parsing.\n");
145 }
146 
get_file_node(char * name)147 FileProperty* DirectoryList :: get_file_node(char* name)
148 {
149     std::unordered_map<char*, FileProperty*>::iterator it;
150 
151     it = oleentry.find(name);
152 
153     if (it != oleentry.end())
154         return(it->second);
155     return nullptr;
156 }
157 
158 // Every index of fat_list array is the fat sector ID and the value present
159 // at that index will be its corresponding next fat sector ID.
get_next_fat_sector(int32_t sec_id)160 int32_t OleFile :: get_next_fat_sector(int32_t sec_id)
161 {
162     if (fat_list and sec_id > INVALID_SECTOR and sec_id < fat_list_len)
163         return fat_list[sec_id];
164     else
165     {
166         VBA_DEBUG(vba_data_trace, DEFAULT_TRACE_OPTION_ID, TRACE_INFO_LEVEL, CURRENT_PACKET,
167             "The next sector ID of fat sector %d is not available in fat list.\n", sec_id);
168         return INVALID_SECTOR;
169     }
170 }
171 
172 // Every index of mini_fat_list array is the minifat sector ID and the value present
173 // at that index will be its corresponding next minifat sector ID.
get_next_mini_fat_sector(int32_t sec_id)174 int32_t OleFile :: get_next_mini_fat_sector(int32_t sec_id)
175 {
176     if (mini_fat_list and sec_id > INVALID_SECTOR and sec_id < mini_fat_list_len)
177         return mini_fat_list[sec_id];
178     else
179     {
180         VBA_DEBUG(vba_data_trace, DEFAULT_TRACE_OPTION_ID, TRACE_INFO_LEVEL, CURRENT_PACKET,
181             "The next sector ID of mini fat sector %d is not available in minifat list.\n",
182             sec_id);
183         return INVALID_SECTOR;
184     }
185 }
186 
187 // The offset of a sector is header_size + (sector_number * size_of_each_sector).
get_fat_offset(int32_t sec_id)188 int32_t OleFile :: get_fat_offset(int32_t sec_id)
189 {
190     int32_t byte_offset;
191     byte_offset = OLE_HEADER_LEN + (sec_id * header->get_sector_size());
192     return(byte_offset);
193 }
194 
195 // Example to get the mini fat sector offset.
196 // If,
197 // sector size = 512 bytes
198 // mini sector size = 64 bytes
199 // and sector 2 and 5 are storing the mini fat sectors (assuming the mini fat sector is
200 // starting with sector 2) , then the offset of 12th mini fat sector is calculated as
201 // below:
202 //
203 // mini fat sector per sector = 512 bytes / 64 bytes = 8.
204 // The first 8 mini fat sectors would be stored in the fat sector 2 and therefore the
205 // 12th minifat sector would be stored in the next fat sector of sector 2 which is sector
206 //  5.( we'll get this info from the fat sector array ( get_next_fat_sector() ) where
207 // sector 5 would be mapped to sector 2 as the next fat sector. -2 will be mapped against
208 // sector 5, as sector 5 is the last sector storing mini fat sectors )
209 //
210 // The 12th mini fat sector would be the 4th ( index = 3) mini fat sector stored in fat sector 5.
211 //
212 // The offset of sector 5  = header_size + (sector_size) * 5 = 512 + 512 * 5 = 3072 bytes.
213 //
214 // The offset of 12th mini fat sector = (offset of 5th fat sector ) + ( offset of 4th(index is 3)
215 //  mini fat sector in 5th fat sector)
216 //                                    =  3072 + 64 * 3
217 //                                    =  3264 bytes.
get_mini_fat_offset(int32_t sec_id)218 int32_t OleFile :: get_mini_fat_offset(int32_t sec_id)
219 {
220     int32_t sec_position, mini_sec_position, count, current_sector;
221     int32_t byte_offset, mini_fat_persector;
222 
223     mini_fat_persector = header->get_sector_size() / header->get_mini_sector_size();
224 
225     if (sec_id >=  mini_fat_persector)
226     {
227         sec_position = sec_id/mini_fat_persector;
228         mini_sec_position = sec_id % mini_fat_persector;
229     }
230     else
231     {
232         sec_position = 0;
233         mini_sec_position = sec_id;
234     }
235 
236     count = 0;
237 
238     current_sector = dir_list->get_mini_stream_sector();
239 
240     while (count < sec_position)
241     {
242         int32_t next_sector = get_next_fat_sector(current_sector);
243         if (next_sector <= INVALID_SECTOR)
244             return -1;
245         count++;
246         current_sector = next_sector;
247     }
248     byte_offset = OLE_HEADER_LEN + (current_sector * header->get_sector_size()) +
249         (mini_sec_position *
250         header->get_mini_sector_size());
251     return byte_offset;
252 }
253 
find_bytes_to_copy(uint32_t byte_offset,uint32_t data_len,uint32_t stream_size,uint16_t sector_size)254 uint32_t OleFile :: find_bytes_to_copy(uint32_t byte_offset, uint32_t data_len,
255                                    uint32_t stream_size, uint16_t sector_size)
256 {
257     uint32_t remaining_bytes = stream_size - data_len;
258     uint32_t bytes_to_copy;
259 
260     if ((byte_offset + sector_size) > buf_len)
261     {
262         bytes_to_copy = buf_len - byte_offset;
263     }
264     else
265     {
266         bytes_to_copy = sector_size;
267     }
268 
269     if  (bytes_to_copy > remaining_bytes)
270         bytes_to_copy = remaining_bytes;
271 
272     return bytes_to_copy;
273 }
274 
get_file_data(char * file,uint8_t * & file_data,uint32_t & data_len)275 void OleFile :: get_file_data(char* file, uint8_t*& file_data, uint32_t& data_len)
276 {
277     FileProperty* node = dir_list->get_file_node(file);
278     data_len = 0;
279 
280     if (node)
281     {
282         int32_t starting_sector;
283         uint32_t stream_size;
284         sector_type is_fat = FAT_SECTOR;
285         uint32_t byte_offset, bytes_to_copy;
286         uint8_t* temp_data;
287 
288         starting_sector = node->get_starting_sector();
289         stream_size = node->get_stream_size();
290 
291         file_data = new uint8_t[stream_size];
292         temp_data = file_data;
293         if (stream_size <= header->get_minifat_cutoff())
294             is_fat = MINIFAT_SECTOR;
295 
296         if (is_fat == FAT_SECTOR)
297         {
298             int32_t current_sector = starting_sector;
299             uint16_t sector_size = header->get_sector_size();
300             while (current_sector > INVALID_SECTOR)
301             {
302                 byte_offset = get_fat_offset(current_sector);
303                 if (byte_offset > buf_len)
304                     return;
305 
306                 bytes_to_copy = find_bytes_to_copy(byte_offset, data_len,
307                                     stream_size, sector_size);
308 
309                 memcpy(temp_data, (file_buf + byte_offset), bytes_to_copy);
310                 temp_data += bytes_to_copy;
311                 data_len += bytes_to_copy;
312 
313                 int32_t next_sector = get_next_fat_sector(current_sector);
314                 current_sector = next_sector;
315             }
316         }
317         else
318         {
319             int32_t mini_sector = node->get_starting_sector();
320             uint16_t mini_sector_size = header->get_mini_sector_size();
321             while (mini_sector > INVALID_SECTOR)
322             {
323                 byte_offset = get_mini_fat_offset(mini_sector);
324                 if (byte_offset > buf_len)
325                     return;
326 
327                 bytes_to_copy = find_bytes_to_copy(byte_offset, data_len,
328                                     stream_size, mini_sector_size);
329 
330                 memcpy(temp_data, (file_buf + byte_offset), bytes_to_copy);
331                 temp_data += bytes_to_copy;
332                 data_len += bytes_to_copy;
333 
334                 int32_t next_sector = get_next_mini_fat_sector(mini_sector);
335                 mini_sector =  next_sector;
336             }
337         }
338     }
339 }
340 
341 // The function populate_fat_list() reads the contents of FAT array sectors to create
342 // the the fat_list array where each of the indices represents the current sector
343 // ID and the value at that index would be its next sector ID.
populate_fat_list()344 void OleFile :: populate_fat_list()
345 {
346     int32_t current_sector, fat_sector_curr_cnt = 0;
347     int32_t fat_sector = header->get_difat_array(fat_sector_curr_cnt);
348     int32_t max_secchain_cnt = header->get_sector_size()/4;
349     int32_t count = 0;
350 
351     VBA_DEBUG(vba_data_trace, DEFAULT_TRACE_OPTION_ID, TRACE_INFO_LEVEL, CURRENT_PACKET,
352         "Reading the FAT list array.\n");
353     fat_list_len = ( header->get_fat_sector_count() * header->get_sector_size() ) / 4;
354     if (fat_list_len < 1)
355     {
356         VBA_DEBUG(vba_data_trace, DEFAULT_TRACE_OPTION_ID, TRACE_INFO_LEVEL, CURRENT_PACKET,
357             "FAT list array is empty.\n");
358         return;
359     }
360 
361     fat_list = new int32_t[fat_list_len];
362 
363     memset(fat_list, -1, fat_list_len);
364 
365     current_sector = fat_sector;
366     while (current_sector > INVALID_SECTOR)
367     {
368         uint32_t byte_offset = OLE_HEADER_LEN + (current_sector * header->get_sector_size());
369 
370         const uint8_t* buf = file_buf;
371 
372         buf += byte_offset;
373 
374         if ((byte_offset + header->get_sector_size()) > buf_len)
375             return;
376 
377         while ((count - (fat_sector_curr_cnt * max_secchain_cnt)) < (max_secchain_cnt))
378         {
379             if (!header->get_byte_order())
380                 fat_list[count] = LETOHL_UNALIGNED(buf);
381             else
382                 fat_list[count] = BETOHL_UNALIGNED(buf);
383             count++;
384             buf += 4;
385         }
386         fat_sector_curr_cnt++;
387         if (fat_sector_curr_cnt < MAX_DIFAT_SECTORS)
388             current_sector = header->get_difat_array(fat_sector_curr_cnt);
389         else
390             return;
391     }
392     VBA_DEBUG(vba_data_trace, DEFAULT_TRACE_OPTION_ID, TRACE_INFO_LEVEL, CURRENT_PACKET,
393         "FAT list array is populated.\n");
394 }
395 
396 // The function populate_mini_fat_list() reads the contents of mini FAT array sectors to
397 // create the the mini_fat_list array where each of the indices represents the
398 // current mini sector ID and the value at that index would be its next mini
399 // sector ID.
populate_mini_fat_list()400 void OleFile :: populate_mini_fat_list()
401 {
402     int32_t minifat_sector = header->get_first_minifat(), current_sector;
403 
404     int32_t max_secchain_cnt = header->get_sector_size()/4;
405     int32_t count = 0;
406 
407     VBA_DEBUG(vba_data_trace, DEFAULT_TRACE_OPTION_ID, TRACE_INFO_LEVEL, CURRENT_PACKET,
408         "Reading the Mini-FAT list array.\n");
409     mini_fat_list_len = ( header->get_minifat_count() * header->get_sector_size() )  / 4;
410     if (mini_fat_list_len < 1)
411     {
412         VBA_DEBUG(vba_data_trace, DEFAULT_TRACE_OPTION_ID, TRACE_INFO_LEVEL, CURRENT_PACKET,
413             "Mini-FAT list array is empty.\n");
414         return;
415     }
416 
417     mini_fat_list = new int32_t[mini_fat_list_len];
418 
419     memset(mini_fat_list, -1, mini_fat_list_len);
420 
421     current_sector = minifat_sector;
422     int32_t minfat_curr_cnt = 0;
423     while (current_sector > INVALID_SECTOR)
424     {
425         uint32_t byte_offset = OLE_HEADER_LEN + (current_sector * header->get_sector_size());
426 
427         if ((byte_offset + header->get_sector_size()) > buf_len)
428             return;
429 
430         const uint8_t* buf = file_buf;
431 
432         buf += byte_offset;
433 
434         while ((count - (minfat_curr_cnt * max_secchain_cnt)) < max_secchain_cnt)
435         {
436             if (!header->get_byte_order())
437                 mini_fat_list[count] = LETOHL_UNALIGNED(buf);
438             else
439                 mini_fat_list[count] = BETOHL_UNALIGNED(buf);
440             count++;
441             buf += 4;
442         }
443         minfat_curr_cnt++;
444         int32_t next_sector = get_next_fat_sector(current_sector);
445         if (next_sector > INVALID_SECTOR)
446             current_sector = next_sector;
447         else
448             current_sector = INVALID_SECTOR;
449     }
450     VBA_DEBUG(vba_data_trace, DEFAULT_TRACE_OPTION_ID, TRACE_INFO_LEVEL, CURRENT_PACKET,
451         "Mini-FAT list array is populated..\n");
452 }
453 
454 // API to parse the OLE File Header.
455 // The header is always located at the beginning of the file,
456 // and its size is exactly 512 bytes. This implies that the first
457 // sector (with SecID 0) always starts at file offset 512.
parse_ole_header()458 bool OleFile :: parse_ole_header()
459 {
460     VBA_DEBUG(vba_data_trace, DEFAULT_TRACE_OPTION_ID, TRACE_INFO_LEVEL, CURRENT_PACKET,
461         "Staring the OLE header parsing.\n");
462     header = new OleHeader;
463     if (!header->set_byte_order(file_buf + HEADER_BYTE_ORDER_OFFSET))
464     {
465         VBA_DEBUG(vba_data_trace, DEFAULT_TRACE_OPTION_ID, TRACE_ERROR_LEVEL, CURRENT_PACKET,
466             "Invalid byte order in the OLE header. Returning.\n");
467         return false;
468     }
469 
470     // Header Signature (8 bytes) is Identification signature of the OLE file,
471     // and must be of the value 0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1.
472     if (!header->match_ole_sig(file_buf))
473     {
474         VBA_DEBUG(vba_data_trace, DEFAULT_TRACE_OPTION_ID, TRACE_ERROR_LEVEL, CURRENT_PACKET,
475             "Invalid file signature of OLE file. Returning.\n");
476         return false;
477     }
478 
479     // Minor Version field should be set to 0x003E.
480     header->set_minor_version(file_buf + HEADER_MINOR_VER_OFFSET);
481 
482     // Major Version field is set to either 0x0003 (version 3) or 0x0004 (version 4).
483     header->set_major_version(file_buf + HEADER_MAJOR_VER_OFFSET);
484 
485     // This field specifies the sector size of the compound file as a power of 2.
486     header->set_sector_size(file_buf + HEADER_SECTR_SIZE_OFFSET);
487 
488     // This field specifies the sector size of the Mini Stream as a power of 2.
489     header->set_mini_sector_size(file_buf + HEADER_MIN_SECTR_SIZE_OFFSET);
490 
491     header->set_fat_sector_count(file_buf + HEADER_FAT_SECTR_CNT_OFFSET);
492 
493     header->set_first_dir(file_buf + HEADER_FIRST_DIR_SECTR_OFFSET);
494 
495     header->set_minifat_cutoff(file_buf + HEADER_MINFAT_CUTOFF_OFFSET);
496 
497     header->set_first_minifat(file_buf + HEADER_FIRST_MINFAT_OFFSET);
498 
499     header->set_minifat_count(file_buf + HEADER_MINFAT_COUNT_OFFSET);
500 
501     header->set_first_difat(file_buf + HEADER_FIRST_DIFAT_OFFSET);
502 
503     header->set_difat_count(file_buf + HEADER_DIFAT_CNT_OFFSET);
504 
505     header->set_dir_sector_count(file_buf + HEADER_DIR_SECTR_CNT_OFFSET);
506 
507     // DIFAT array of 32-bit integer fields contains the first 109 FAT sector locations of the
508     // compound file.
509     header->set_difat_array(file_buf + HEADER_DIFAT_ARRY_OFFSET);
510 
511     VBA_DEBUG(vba_data_trace, DEFAULT_TRACE_OPTION_ID, TRACE_INFO_LEVEL, CURRENT_PACKET,
512         "Parsing of OLE header is done.\n");
513 
514     return true;
515 }
516 
517 // The vba code in a VBA macro file begins with the keyword "ATTRIBUT" .This
518 // keyword is used to calculate the offset of vba code and is decompressed using
519 // RLE algorithm.
get_file_offset(const uint8_t * data,uint32_t data_len)520 int32_t OleFile :: get_file_offset(const uint8_t* data, uint32_t data_len)
521 {
522     if (searcher == nullptr)
523     {
524         VBA_DEBUG(vba_data_trace, DEFAULT_TRACE_OPTION_ID, TRACE_ERROR_LEVEL, CURRENT_PACKET,
525             "Error in the searcher.\n");
526         return -1;
527     }
528 
529     int32_t offset = searcher->search(search_handle, data, data_len);
530     return offset;
531 }
532 
cli_readn(const uint8_t * & fd,uint32_t & data_len,void * buff,int32_t count)533 int32_t cli_readn(const uint8_t*& fd, uint32_t& data_len, void* buff, int32_t count)
534 {
535     int32_t i;
536 
537     for (i = 0; i < count; i++)
538     {
539         if (data_len)
540         {
541             *((uint8_t*)buff + i) = *(fd + i);
542             data_len -= 1;
543         }
544         else
545         {
546             break;
547         }
548     }
549 
550     fd += i;
551     return i;
552 }
553 
554 // Function for RLE decompression.
555 //
556 // Run-length encoding (RLE) is a very simple form of data compression
557 // in which a stream of data is given as the input (i.e. "AAABBCCCC") and
558 // the output is a sequence of counts of consecutive data values in a row
559 // (i.e. "3A2B4C"). This type of data compression is lossless, meaning that
560 // when decompressed, all of the original data will be recovered when decoded.
decompression(const uint8_t * data,uint32_t & data_len,uint8_t * & local_vba_buffer,uint32_t & vba_buffer_offset)561 void OleFile :: decompression(const uint8_t* data, uint32_t& data_len, uint8_t*& local_vba_buffer,
562     uint32_t& vba_buffer_offset)
563 {
564     int16_t header;
565     bool flagCompressed;
566     unsigned char buffer[VBA_COMPRESSION_WINDOW]={ };
567     uint16_t token;
568     unsigned int pos, shift, mask, distance;
569     uint8_t flag;
570     bool clean;
571 
572     if (!data)
573         return;
574 
575     if (*data!= SIG_COMP_CONTAINER)
576     {
577         VBA_DEBUG(vba_data_trace, DEFAULT_TRACE_OPTION_ID, TRACE_ERROR_LEVEL, CURRENT_PACKET,
578             "Invalid Compressed flag.\n");
579         return;
580     }
581 
582     header = LETOHS_UNALIGNED(data + 1);
583 
584     flagCompressed = header & 0x8000;
585 
586     if (((header >> 12) & 0x07) != 0b011)
587     {
588         VBA_DEBUG(vba_data_trace, DEFAULT_TRACE_OPTION_ID, TRACE_INFO_LEVEL, CURRENT_PACKET,
589             "Invalid Chunk signature.\n");
590     }
591 
592     data += 3;
593     data_len -= 3;
594 
595     if (flagCompressed == 0)
596     {
597         memcpy(&buffer, data, data_len);
598         return;
599     }
600 
601     pos = 0;
602     clean = 1;
603     uint32_t size = data_len;
604     while (cli_readn(data, size, &flag, 1))
605     {
606         for (mask = 1; mask < 0x100; mask <<= 1)
607         {
608             unsigned int winpos = pos % VBA_COMPRESSION_WINDOW;
609             if (flag & mask)
610             {
611                 uint16_t len;
612                 uint32_t srcpos;
613 
614                 if (!cli_readn(data, size, &token, 2))
615                     return;
616 
617                 shift    = 12 - (winpos > 0x10) - (winpos > 0x20) - (winpos > 0x40) - (winpos >
618                     0x80) - (winpos > 0x100) - (winpos > 0x200) - (winpos > 0x400) - (winpos >
619                     0x800);
620                 len      = (uint16_t)((token & ((1 << shift) - 1)) + 3);
621                 distance = token >> shift;
622 
623                 srcpos = pos - distance - 1;
624                 if ((((srcpos + len) % VBA_COMPRESSION_WINDOW) < winpos)and
625                         ((winpos + len) < VBA_COMPRESSION_WINDOW) and
626                         (((srcpos % VBA_COMPRESSION_WINDOW) + len) < VBA_COMPRESSION_WINDOW) and
627                         (len <= VBA_COMPRESSION_WINDOW))
628                 {
629                     srcpos %= VBA_COMPRESSION_WINDOW;
630                     memcpy(&buffer[winpos], &buffer[srcpos],
631                         len);
632                     pos += len;
633                 }
634                 else
635                     while (len-- > 0)
636                     {
637                         srcpos = (pos - distance - 1) % VBA_COMPRESSION_WINDOW;
638                         buffer[pos++ % VBA_COMPRESSION_WINDOW] = buffer[srcpos];
639                     }
640             }
641             else
642             {
643                 if ((pos != 0)and (winpos == 0) and clean)
644                 {
645                     if (cli_readn(data, size, &token, 2) != 2)
646                     {
647                         return;
648                     }
649                     clean = 0;
650                     break;
651                 }
652                 if (cli_readn(data, size,  &buffer[winpos], 1) == 1)
653                     pos++;
654             }
655             clean = 1;
656         }
657     }
658 
659     int32_t decomp_len = strlen((char*)buffer);
660 
661     if ((vba_buffer_offset + decomp_len) > MAX_VBA_BUFFER_LEN)
662     {
663         decomp_len =  MAX_VBA_BUFFER_LEN - vba_buffer_offset;
664     }
665     memcpy((local_vba_buffer + vba_buffer_offset), buffer, decomp_len);
666     vba_buffer_offset += decomp_len;
667 }
668 
669 // Function to extract the VBA data and send it for RLE decompression.
find_and_extract_vba(uint8_t * & vba_buf,uint32_t & vba_buf_len)670 void OleFile :: find_and_extract_vba(uint8_t*& vba_buf, uint32_t& vba_buf_len)
671 {
672     std::unordered_map<char*, FileProperty*>::iterator it = dir_list->oleentry.begin();
673     uint32_t vba_buffer_offset = 0;
674     vba_buf = new uint8_t[MAX_VBA_BUFFER_LEN + 1]();
675 
676     while (it != dir_list->oleentry.end())
677     {
678         FileProperty* node = it->second;
679         ++it;
680         if (node->get_file_type() == STREAM)
681         {
682             uint8_t* data = nullptr;
683             uint32_t data_len;
684             get_file_data(node->get_name(), data, data_len);
685             uint8_t* data1 = data;
686             int32_t offset = get_file_offset(data, data_len);
687             if (offset <= 0)
688             {
689                 VBA_DEBUG(vba_data_trace, DEFAULT_TRACE_OPTION_ID, TRACE_INFO_LEVEL,
690                     CURRENT_PACKET,
691                     "Stream %s of size %ld does not have VBA code within first detected"
692                     " %d bytes\n", node->get_name(), node->get_stream_size(), data_len);
693                 delete[] data1;
694                 continue;
695             }
696 
697             data += offset - 4;
698             data_len = data_len - offset + 4;
699             VBA_DEBUG(vba_data_trace, DEFAULT_TRACE_OPTION_ID, TRACE_INFO_LEVEL,
700                 CURRENT_PACKET, "Stream %s of size %ld has vba code starting at "
701                 "offset %d bytes. First %d bytes will be processed\n",
702                 node->get_name(), node->get_stream_size(), (offset - 4), data_len);
703 
704             decompression(data, data_len, vba_buf, vba_buffer_offset);
705             delete[] data1;
706             if ( vba_buffer_offset >= MAX_VBA_BUFFER_LEN)
707                 break;
708         }
709     }
710     vba_buf_len = vba_buffer_offset;
711 
712     //Delete vba_buf if decompression could not happen
713     if (!vba_buf_len)
714         delete[] vba_buf;
715 }
716 
717 // Beginning function of ole file processing.
718 //
719 // An OLE file contains streams of data that look like files embedded within the
720 // OLE file.It can also contain storages, which is a folder that contains streams
721 // or other storages.
722 //
723 // Ole file processing begins with OLE header matching, followed by populating
724 // the FAT array-list which contains the mapping between current fat sector and
725 // its next fat sector.Followed by populating the mini-FAT array-list which
726 // contains the mapping between current mini-fat sector and its next mini-fat
727 // sector. Followed by reading the entries of all the directory entry arrays of
728 // an ole file and creating a mapping between the storage/stream name and the
729 // fileproperty object.Afterwards, based on the directory the data is fetched and
730 // extracted & RLE decompression is done.
oleprocess(const uint8_t * const ole_file,const uint32_t ole_length,uint8_t * & vba_buf,uint32_t & vba_buf_len)731 void oleprocess(const uint8_t* const ole_file, const uint32_t ole_length, uint8_t*& vba_buf,
732     uint32_t& vba_buf_len)
733 {
734     if (ole_length < OLE_HEADER_LEN)
735     {
736         VBA_DEBUG(vba_data_trace, DEFAULT_TRACE_OPTION_ID, TRACE_INFO_LEVEL, CURRENT_PACKET,
737             "OLE file data is too short for the inspection. Returning\n");
738         return;
739     }
740 
741     std::unique_ptr<OleFile> olefile (new OleFile(ole_file,ole_length));
742 
743     if (!olefile->parse_ole_header())
744         return;
745 
746     olefile->populate_fat_list();
747     olefile->populate_mini_fat_list();
748     olefile->walk_directory_list();
749     olefile->find_and_extract_vba(vba_buf, vba_buf_len);
750 }
751 
752