1 /*
2 * Extract component parts of OLE2 files (e.g. MS Office Documents)
3 *
4 * Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
5 * Copyright (C) 2007-2013 Sourcefire, Inc.
6 *
7 * Authors: Kevin Lin
8 *
9 * This program is free software; you can redistribute it and/or modify it under
10 * the terms of the GNU General Public License version 2 as published by the
11 * Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 * more details.
17 *
18 * You should have received a copy of the GNU General Public License along with
19 * this program; if not, write to the Free Software Foundation, Inc., 51
20 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 */
22
23 #if HAVE_CONFIG_H
24 #include "clamav-config.h"
25 #endif
26
27 #include <sys/types.h>
28 #include <sys/stat.h>
29 #include <fcntl.h>
30 #include <stdio.h>
31 #include <string.h>
32 #include <ctype.h>
33 #include <stdlib.h>
34 #include <errno.h>
35 #include <conv.h>
36 #ifdef HAVE_UNISTD_H
37 #include <unistd.h>
38 #endif
39
40 #if HAVE_ICONV
41 #include <iconv.h>
42 #endif
43
44 #include "clamav.h"
45 #include "others.h"
46 #include "msdoc.h"
47 #include "scanners.h"
48 #include "fmap.h"
49 #include "json_api.h"
50 #include "entconv.h"
51
52 #if HAVE_JSON
53 static char *
ole2_convert_utf(summary_ctx_t * sctx,char * begin,size_t sz,const char * encoding)54 ole2_convert_utf(summary_ctx_t *sctx, char *begin, size_t sz, const char *encoding)
55 {
56 char *outbuf = NULL;
57 #if HAVE_ICONV
58 char *buf, *p1, *p2;
59 off_t offset;
60 size_t inlen, outlen, nonrev, sz2;
61 size_t i;
62 int attempt;
63 iconv_t cd;
64 #else
65 UNUSEDPARAM(encoding);
66 #endif
67 /* applies in the both case */
68 if (sctx->codepage == 20127 || sctx->codepage == CODEPAGE_UTF8) {
69 char *track;
70 size_t bcnt, scnt;
71
72 outbuf = cli_calloc(1, sz + 1);
73 if (!(outbuf))
74 return NULL;
75 memcpy(outbuf, begin, sz);
76
77 track = outbuf + sz - 1;
78 if ((sctx->codepage == CODEPAGE_UTF8) && (*track & 0x80)) { /* UTF-8 with a most significant bit */
79 /* locate the start of the last character */
80 for (bcnt = 1; (track != outbuf); track--, bcnt++) {
81 if (((uint8_t)*track & 0xC0) != 0x80)
82 break;
83 }
84
85 /* count number of set (1) significant bits */
86 for (scnt = 0; scnt < sizeof(uint8_t) * 8; scnt++) {
87 if (((uint8_t)*track & (0x80 >> scnt)) == 0)
88 break;
89 }
90
91 if (bcnt != scnt) {
92 cli_dbgmsg("ole2_convert_utf: cleaning out %zu bytes from incomplete utf-8 character length %zu\n",
93 bcnt, scnt);
94 for (; bcnt > 0; bcnt--, track++)
95 *track = '\0';
96 }
97 }
98 return outbuf;
99 }
100
101 #if HAVE_ICONV
102 p1 = buf = cli_calloc(1, sz);
103 if (!(buf))
104 return NULL;
105
106 memcpy(buf, begin, sz);
107 inlen = sz;
108
109 /* encoding lookup if not specified */
110 if (!encoding) {
111 for (i = 0; i < NUMCODEPAGES; ++i) {
112 if (sctx->codepage == codepage_entries[i].codepage)
113 encoding = codepage_entries[i].encoding;
114 else if (sctx->codepage < codepage_entries[i].codepage) {
115 /* assuming sorted array */
116 break;
117 }
118 }
119
120 if (!encoding) {
121 cli_warnmsg("ole2_convert_utf: could not locate codepage encoding for %d\n", sctx->codepage);
122 sctx->flags |= OLE2_CODEPAGE_ERROR_NOTFOUND;
123 free(buf);
124 return NULL;
125 }
126 }
127
128 cd = iconv_open("UTF-8", encoding);
129 if (cd == (iconv_t)(-1)) {
130 char errbuf[128];
131 cli_strerror(errno, errbuf, sizeof(errbuf));
132 cli_errmsg("ole2_convert_utf: could not initialize iconv for encoding %s: %s\n", encoding, errbuf);
133 sctx->flags |= OLE2_CODEPAGE_ERROR_UNINITED;
134 } else {
135 offset = 0;
136 for (attempt = 1; attempt <= 3; ++attempt) {
137 /* charset to UTF-8 should never exceed sz*6 */
138 sz2 = (attempt * 2) * sz;
139 /* use cli_realloc, reuse the buffer that has already been translated */
140 outbuf = (char *)cli_realloc(outbuf, sz2 + 1);
141 if (!outbuf) {
142 free(buf);
143 iconv_close(cd);
144 return NULL;
145 }
146
147 outlen = sz2 - offset;
148 p2 = outbuf + offset;
149
150 /* conversion */
151 nonrev = iconv(cd, &p1, &inlen, &p2, &outlen);
152
153 if (errno == EILSEQ) {
154 cli_dbgmsg("ole2_convert_utf: input buffer contains invalid character for its encoding\n");
155 sctx->flags |= OLE2_CODEPAGE_ERROR_INVALID;
156 break;
157 } else if (errno == EINVAL && nonrev == (size_t)-1) {
158 cli_dbgmsg("ole2_convert_utf: input buffer contains incomplete multibyte character\n");
159 sctx->flags |= OLE2_CODEPAGE_ERROR_INCOMPLETE;
160 break;
161 } else if (inlen == 0) {
162 //cli_dbgmsg("ole2_convert_utf: input buffer is successfully translated\n");
163 break;
164 }
165
166 //outbuf[sz2 - outlen] = '\0';
167 //cli_dbgmsg("%u %s\n", inlen, outbuf);
168
169 offset = sz2 - outlen;
170 if (attempt < 3)
171 cli_dbgmsg("ole2_convert_utf: outbuf is too small, resizing %llu -> %llu\n",
172 (long long unsigned)((attempt * 2) * sz), (long long unsigned)(((attempt + 1) * 2) * sz));
173 }
174
175 if (errno == E2BIG && nonrev == (size_t)-1) {
176 cli_dbgmsg("ole2_convert_utf: buffer could not be fully translated\n");
177 sctx->flags |= OLE2_CODEPAGE_ERROR_OUTBUFTOOSMALL;
178 }
179
180 outbuf[sz2 - outlen] = '\0';
181 }
182
183 iconv_close(cd);
184 free(buf);
185 #endif
186 /* this should force base64 encoding if NULL */
187 return outbuf;
188 }
189
190 static int
ole2_process_property(summary_ctx_t * sctx,unsigned char * databuf,uint32_t offset)191 ole2_process_property(summary_ctx_t *sctx, unsigned char *databuf, uint32_t offset)
192 {
193 uint16_t proptype, padding;
194 int ret = CL_SUCCESS;
195
196 if (cli_json_timeout_cycle_check(sctx->ctx, &(sctx->toval)) != CL_SUCCESS) {
197 sctx->flags |= OLE2_SUMMARY_FLAG_TIMEOUT;
198 return CL_ETIMEOUT;
199 }
200
201 if (offset + sizeof(proptype) + sizeof(padding) > sctx->pssize) {
202 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
203 return CL_EFORMAT;
204 }
205
206 memcpy(&proptype, databuf + offset, sizeof(proptype));
207 offset += sizeof(proptype);
208 memcpy(&padding, databuf + offset, sizeof(padding));
209 offset += sizeof(padding);
210 /* endian conversion */
211 proptype = sum16_endian_convert(proptype);
212
213 //cli_dbgmsg("proptype: 0x%04x\n", proptype);
214 if (padding != 0) {
215 cli_dbgmsg("ole2_process_property: invalid padding value, non-zero\n");
216 sctx->flags |= OLE2_SUMMARY_ERROR_INVALID_ENTRY;
217 return CL_EFORMAT;
218 }
219
220 switch (proptype) {
221 case PT_EMPTY:
222 case PT_NULL:
223 ret = cli_jsonnull(sctx->summary, sctx->propname);
224 break;
225 case PT_INT16: {
226 int16_t dout;
227 if (offset + sizeof(dout) > sctx->pssize) {
228 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
229 return CL_EFORMAT;
230 }
231 memcpy(&dout, databuf + offset, sizeof(dout));
232 offset += sizeof(dout);
233 /* endian conversion */
234 dout = sum16_endian_convert(dout);
235
236 if (sctx->writecp) {
237 sctx->codepage = (uint16_t)dout;
238 ret = cli_jsonint(sctx->summary, sctx->propname, sctx->codepage);
239 } else
240 ret = cli_jsonint(sctx->summary, sctx->propname, dout);
241 break;
242 }
243 case PT_INT32:
244 case PT_INT32v1: {
245 int32_t dout;
246 if (offset + sizeof(dout) > sctx->pssize) {
247 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
248 return CL_EFORMAT;
249 }
250 memcpy(&dout, databuf + offset, sizeof(dout));
251 offset += sizeof(dout);
252 /* endian conversion */
253 dout = sum32_endian_convert(dout);
254
255 ret = cli_jsonint(sctx->summary, sctx->propname, dout);
256 break;
257 }
258 case PT_FLOAT32: /* review this please */
259 {
260 float dout;
261 if (offset + sizeof(dout) > sctx->pssize) {
262 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
263 return CL_EFORMAT;
264 }
265 memcpy(&dout, databuf + offset, sizeof(dout));
266 offset += sizeof(dout);
267 /* endian conversion */
268 dout = sum32_endian_convert(dout);
269
270 ret = cli_jsondouble(sctx->summary, sctx->propname, dout);
271 break;
272 }
273 case PT_DATE:
274 case PT_DOUBLE64: /* review this please */
275 {
276 double dout;
277 if (offset + sizeof(dout) > sctx->pssize) {
278 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
279 return CL_EFORMAT;
280 }
281 memcpy(&dout, databuf + offset, sizeof(dout));
282 offset += sizeof(dout);
283 /* endian conversion */
284 dout = sum64_endian_convert(dout);
285
286 ret = cli_jsondouble(sctx->summary, sctx->propname, dout);
287 break;
288 }
289 case PT_BOOL: {
290 uint16_t dout;
291 if (offset + sizeof(dout) > sctx->pssize) {
292 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
293 return CL_EFORMAT;
294 }
295 memcpy(&dout, databuf + offset, sizeof(dout));
296 offset += sizeof(dout);
297 /* no need for endian conversion */
298
299 ret = cli_jsonbool(sctx->summary, sctx->propname, dout);
300 break;
301 }
302 case PT_INT8v1: {
303 int8_t dout;
304 if (offset + sizeof(dout) > sctx->pssize) {
305 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
306 return CL_EFORMAT;
307 }
308 memcpy(&dout, databuf + offset, sizeof(dout));
309 offset += sizeof(dout);
310 /* no need for endian conversion */
311
312 ret = cli_jsonint(sctx->summary, sctx->propname, dout);
313 break;
314 }
315 case PT_UINT8: {
316 uint8_t dout;
317 if (offset + sizeof(dout) > sctx->pssize) {
318 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
319 return CL_EFORMAT;
320 }
321 memcpy(&dout, databuf + offset, sizeof(dout));
322 offset += sizeof(dout);
323 /* no need for endian conversion */
324
325 ret = cli_jsonint(sctx->summary, sctx->propname, dout);
326 break;
327 }
328 case PT_UINT16: {
329 uint16_t dout;
330 if (offset + sizeof(dout) > sctx->pssize) {
331 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
332 return CL_EFORMAT;
333 }
334 memcpy(&dout, databuf + offset, sizeof(dout));
335 offset += sizeof(dout);
336 /* endian conversion */
337 dout = sum16_endian_convert(dout);
338
339 if (sctx->writecp)
340 sctx->codepage = dout;
341
342 ret = cli_jsonint(sctx->summary, sctx->propname, dout);
343 break;
344 }
345 case PT_UINT32:
346 case PT_UINT32v1: {
347 uint32_t dout;
348 if (offset + sizeof(dout) > sctx->pssize) {
349 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
350 return CL_EFORMAT;
351 }
352 memcpy(&dout, databuf + offset, sizeof(dout));
353 offset += sizeof(dout);
354 /* endian conversion */
355 dout = sum32_endian_convert(dout);
356
357 ret = cli_jsonint(sctx->summary, sctx->propname, dout);
358 break;
359 }
360 case PT_INT64: {
361 int64_t dout;
362 if (offset + sizeof(dout) > sctx->pssize) {
363 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
364 return CL_EFORMAT;
365 }
366 memcpy(&dout, databuf + offset, sizeof(dout));
367 offset += sizeof(dout);
368 /* endian conversion */
369 dout = sum64_endian_convert(dout);
370
371 ret = cli_jsonint64(sctx->summary, sctx->propname, dout);
372 break;
373 }
374 case PT_UINT64: {
375 uint64_t dout;
376 if (offset + sizeof(dout) > sctx->pssize) {
377 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
378 return CL_EFORMAT;
379 }
380 memcpy(&dout, databuf + offset, sizeof(dout));
381 offset += sizeof(dout);
382 /* endian conversion */
383 dout = sum64_endian_convert(dout);
384
385 ret = cli_jsonint64(sctx->summary, sctx->propname, dout);
386 break;
387 }
388 case PT_BSTR:
389 case PT_LPSTR:
390 if (sctx->codepage == 0) {
391 cli_dbgmsg("ole2_propset_json: current codepage is unknown, cannot parse char stream\n");
392 sctx->flags |= OLE2_SUMMARY_FLAG_CODEPAGE;
393 } else {
394 uint32_t strsize;
395 char *outstr, *outstr2;
396
397 if (offset + sizeof(strsize) > sctx->pssize) {
398 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
399 return CL_EFORMAT;
400 }
401
402 memcpy(&strsize, databuf + offset, sizeof(strsize));
403 offset += sizeof(strsize);
404 /* endian conversion? */
405 strsize = sum32_endian_convert(strsize);
406
407 if (offset + strsize > sctx->pssize) {
408 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
409 return CL_EFORMAT;
410 }
411
412 /* limitation on string length */
413 if (strsize > PROPSTRLIMIT) {
414 cli_dbgmsg("ole2_process_property: property string sized %lu truncated to size %lu\n",
415 (unsigned long)strsize, (unsigned long)PROPSTRLIMIT);
416 sctx->flags |= OLE2_SUMMARY_FLAG_TRUNC_STR;
417 strsize = PROPSTRLIMIT;
418 }
419
420 outstr = cli_calloc(strsize + 1, 1); /* last char must be NULL */
421 if (!outstr) {
422 return CL_EMEM;
423 }
424 strncpy(outstr, (const char *)(databuf + offset), strsize);
425
426 /* conversion of various encodings to UTF-8 */
427 outstr2 = ole2_convert_utf(sctx, outstr, strsize, NULL);
428 if (!outstr2) {
429 /* use base64 encoding when all else fails! */
430 char b64jstr[PROPSTRLIMIT];
431
432 /* outstr2 should be 4/3 times the original (rounded up) */
433 outstr2 = cl_base64_encode(outstr, strsize);
434 if (!outstr2) {
435 cli_dbgmsg("ole2_process_property: failed to convert to base64 string\n");
436 return CL_EMEM;
437 }
438
439 snprintf(b64jstr, PROPSTRLIMIT, "%s_base64", sctx->propname);
440 ret = cli_jsonbool(sctx->summary, b64jstr, 1);
441 if (ret != CL_SUCCESS)
442 return ret;
443 }
444
445 ret = cli_jsonstr(sctx->summary, sctx->propname, outstr2);
446 free(outstr);
447 free(outstr2);
448 }
449 break;
450 case PT_LPWSTR: {
451 uint32_t strsize;
452 char *outstr, *outstr2;
453
454 if (offset + sizeof(strsize) > sctx->pssize) {
455 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
456 return CL_EFORMAT;
457 }
458 memcpy(&strsize, databuf + offset, sizeof(strsize));
459 offset += sizeof(strsize);
460 /* endian conversion; wide strings are by length, not size (x2) */
461 strsize = sum32_endian_convert(strsize) * 2;
462
463 /* limitation on string length */
464 if (strsize > (2 * PROPSTRLIMIT)) {
465 cli_dbgmsg("ole2_process_property: property string sized %lu truncated to size %lu\n",
466 (unsigned long)strsize, (unsigned long)(2 * PROPSTRLIMIT));
467 sctx->flags |= OLE2_SUMMARY_FLAG_TRUNC_STR;
468 strsize = (2 * PROPSTRLIMIT);
469 }
470
471 if (offset + strsize > sctx->pssize) {
472 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
473 return CL_EFORMAT;
474 }
475 outstr = cli_calloc(strsize + 2, 1); /* last two chars must be NULL */
476 if (!outstr) {
477 return CL_EMEM;
478 }
479 memcpy(outstr, (const char *)(databuf + offset), strsize);
480 /* conversion of 16-width char strings (UTF-16 or UTF-16LE??) to UTF-8 */
481 outstr2 = ole2_convert_utf(sctx, outstr, strsize, UTF16_MS);
482 if (!outstr2) {
483 /* use base64 encoding when all else fails! */
484 char b64jstr[PROPSTRLIMIT];
485
486 outstr2 = cl_base64_encode(outstr, strsize);
487 if (!outstr2) {
488 free(outstr);
489 return CL_EMEM;
490 }
491
492 snprintf(b64jstr, PROPSTRLIMIT, "%s_base64", sctx->propname);
493 ret = cli_jsonbool(sctx->summary, b64jstr, 1);
494 if (ret != CL_SUCCESS) {
495 free(outstr);
496 free(outstr2);
497 return ret;
498 }
499 }
500
501 ret = cli_jsonstr(sctx->summary, sctx->propname, outstr2);
502 free(outstr);
503 free(outstr2);
504 break;
505 }
506 case PT_FILETIME: {
507 uint32_t ltime, htime;
508 uint64_t wtime = 0, utime = 0;
509
510 if (offset + sizeof(ltime) + sizeof(htime) > sctx->pssize) {
511 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
512 return CL_EFORMAT;
513 }
514 memcpy(<ime, databuf + offset, sizeof(ltime));
515 offset += sizeof(ltime);
516 memcpy(&htime, databuf + offset, sizeof(htime));
517 offset += sizeof(ltime);
518 ltime = sum32_endian_convert(ltime);
519 htime = sum32_endian_convert(htime);
520
521 /* UNIX timestamp formatting */
522 wtime = htime;
523 wtime <<= 32;
524 wtime |= ltime;
525
526 utime = wtime / 10000000;
527 utime -= 11644473600LL;
528
529 if ((uint32_t)((utime & 0xFFFFFFFF00000000) >> 32)) {
530 cli_dbgmsg("ole2_process_property: UNIX timestamp is larger than 32-bit number\n");
531 } else {
532 ret = cli_jsonint(sctx->summary, sctx->propname, (uint32_t)(utime & 0xFFFFFFFF));
533 }
534 break;
535 }
536 default:
537 cli_dbgmsg("ole2_process_property: unhandled property type 0x%04x for %s property\n",
538 proptype, sctx->propname);
539 sctx->flags |= OLE2_SUMMARY_FLAG_UNHANDLED_PROPTYPE;
540 }
541
542 return ret;
543 }
544
ole2_translate_docsummary_propid(summary_ctx_t * sctx,uint32_t propid)545 static void ole2_translate_docsummary_propid(summary_ctx_t *sctx, uint32_t propid)
546 {
547 switch (propid) {
548 case DSPID_CODEPAGE:
549 sctx->writecp = 1; /* must be set ONLY for codepage */
550 sctx->propname = "CodePage";
551 break;
552 case DSPID_CATEGORY:
553 sctx->propname = "Category";
554 break;
555 case DSPID_PRESFORMAT:
556 sctx->propname = "PresentationTarget";
557 break;
558 case DSPID_BYTECOUNT:
559 sctx->propname = "Bytes";
560 break;
561 case DSPID_LINECOUNT:
562 sctx->propname = "Lines";
563 break;
564 case DSPID_PARCOUNT:
565 sctx->propname = "Paragraphs";
566 break;
567 case DSPID_SLIDECOUNT:
568 sctx->propname = "Slides";
569 break;
570 case DSPID_NOTECOUNT:
571 sctx->propname = "Notes";
572 break;
573 case DSPID_HIDDENCOUNT:
574 sctx->propname = "HiddenSlides";
575 break;
576 case DSPID_MMCLIPCOUNT:
577 sctx->propname = "MMClips";
578 break;
579 case DSPID_SCALE:
580 sctx->propname = "Scale";
581 break;
582 case DSPID_HEADINGPAIR: /* VT_VARIANT | VT_VECTOR */
583 sctx->propname = "HeadingPairs";
584 break;
585 case DSPID_DOCPARTS: /* VT_VECTOR | VT_LPSTR */
586 sctx->propname = "DocPartTitles";
587 break;
588 case DSPID_MANAGER:
589 sctx->propname = "Manager";
590 break;
591 case DSPID_COMPANY:
592 sctx->propname = "Company";
593 break;
594 case DSPID_LINKSDIRTY:
595 sctx->propname = "LinksDirty";
596 break;
597 case DSPID_CCHWITHSPACES:
598 sctx->propname = "Char&WSCount";
599 break;
600 case DSPID_SHAREDDOC: /* SHOULD BE FALSE! */
601 sctx->propname = "SharedDoc";
602 break;
603 case DSPID_LINKBASE: /* moved to user-defined */
604 sctx->propname = "LinkBase";
605 break;
606 case DSPID_HLINKS: /* moved to user-defined */
607 sctx->propname = "HyperLinks";
608 break;
609 case DSPID_HYPERLINKSCHANGED:
610 sctx->propname = "HyperLinksChanged";
611 break;
612 case DSPID_VERSION:
613 sctx->propname = "Version";
614 break;
615 case DSPID_DIGSIG:
616 sctx->propname = "DigitalSig";
617 break;
618 case DSPID_CONTENTTYPE:
619 sctx->propname = "ContentType";
620 break;
621 case DSPID_CONTENTSTATUS:
622 sctx->propname = "ContentStatus";
623 break;
624 case DSPID_LANGUAGE:
625 sctx->propname = "Language";
626 break;
627 case DSPID_DOCVERSION:
628 sctx->propname = "DocVersion";
629 break;
630 default:
631 cli_dbgmsg("ole2_docsum_propset_json: unrecognized propid!\n");
632 sctx->flags |= OLE2_SUMMARY_FLAG_UNKNOWN_PROPID;
633 }
634 }
635
ole2_translate_summary_propid(summary_ctx_t * sctx,uint32_t propid)636 static void ole2_translate_summary_propid(summary_ctx_t *sctx, uint32_t propid)
637 {
638 switch (propid) {
639 case SPID_CODEPAGE:
640 sctx->writecp = 1; /* must be set ONLY for codepage */
641 sctx->propname = "CodePage";
642 break;
643 case SPID_TITLE:
644 sctx->propname = "Title";
645 break;
646 case SPID_SUBJECT:
647 sctx->propname = "Subject";
648 break;
649 case SPID_AUTHOR:
650 sctx->propname = "Author";
651 break;
652 case SPID_KEYWORDS:
653 sctx->propname = "Keywords";
654 break;
655 case SPID_COMMENTS:
656 sctx->propname = "Comments";
657 break;
658 case SPID_TEMPLATE:
659 sctx->propname = "Template";
660 break;
661 case SPID_LASTAUTHOR:
662 sctx->propname = "LastAuthor";
663 break;
664 case SPID_REVNUMBER:
665 sctx->propname = "RevNumber";
666 break;
667 case SPID_EDITTIME:
668 sctx->propname = "EditTime";
669 break;
670 case SPID_LASTPRINTED:
671 sctx->propname = "LastPrinted";
672 break;
673 case SPID_CREATEDTIME:
674 sctx->propname = "CreatedTime";
675 break;
676 case SPID_MODIFIEDTIME:
677 sctx->propname = "ModifiedTime";
678 break;
679 case SPID_PAGECOUNT:
680 sctx->propname = "PageCount";
681 break;
682 case SPID_WORDCOUNT:
683 sctx->propname = "WordCount";
684 break;
685 case SPID_CHARCOUNT:
686 sctx->propname = "CharCount";
687 break;
688 case SPID_THUMBNAIL:
689 sctx->propname = "Thumbnail";
690 break;
691 case SPID_APPNAME:
692 sctx->propname = "AppName";
693 break;
694 case SPID_SECURITY:
695 sctx->propname = "Security";
696 break;
697 default:
698 cli_dbgmsg("ole2_translate_summary_propid: unrecognized propid!\n");
699 sctx->flags |= OLE2_SUMMARY_FLAG_UNKNOWN_PROPID;
700 }
701 }
702
ole2_summary_propset_json(summary_ctx_t * sctx,off_t offset)703 static int ole2_summary_propset_json(summary_ctx_t *sctx, off_t offset)
704 {
705 unsigned char *hdr, *ps;
706 uint32_t numprops, limitprops;
707 off_t foff = offset, psoff = 0;
708 uint32_t poffset;
709 int ret;
710 uint32_t i;
711
712 cli_dbgmsg("in ole2_summary_propset_json\n");
713
714 /* summary ctx propset-specific setup*/
715 sctx->codepage = 0;
716 sctx->writecp = 0;
717 sctx->propname = NULL;
718
719 /* examine property set metadata */
720 if ((foff + (2 * sizeof(uint32_t))) > sctx->maplen) {
721 sctx->flags |= OLE2_SUMMARY_ERROR_TOOSMALL;
722 return CL_EFORMAT;
723 }
724 hdr = (unsigned char *)fmap_need_off_once(sctx->sfmap, foff, (2 * sizeof(uint32_t)));
725 if (!hdr) {
726 sctx->flags |= OLE2_SUMMARY_ERROR_DATABUF;
727 return CL_EREAD;
728 }
729 //foff+=(2*sizeof(uint32_t)); // keep foff pointing to start of propset segment
730 psoff += (2 * sizeof(uint32_t));
731 memcpy(&(sctx->pssize), hdr, sizeof(sctx->pssize));
732 memcpy(&numprops, hdr + sizeof(sctx->pssize), sizeof(numprops));
733 /* endian conversion */
734 sctx->pssize = sum32_endian_convert(sctx->pssize);
735 numprops = sum32_endian_convert(numprops);
736 cli_dbgmsg("ole2_summary_propset_json: pssize: %u, numprops: %u\n", sctx->pssize, numprops);
737 if (numprops > PROPCNTLIMIT) {
738 sctx->flags |= OLE2_SUMMARY_LIMIT_PROPS;
739 limitprops = PROPCNTLIMIT;
740 } else {
741 limitprops = numprops;
742 }
743 cli_dbgmsg("ole2_summary_propset_json: processing %u of %u (%u max) properties\n",
744 limitprops, numprops, PROPCNTLIMIT);
745
746 /* extract remaining fragment of propset */
747 if ((size_t)(foff + (sctx->pssize)) > (size_t)(sctx->maplen)) {
748 sctx->flags |= OLE2_SUMMARY_ERROR_TOOSMALL;
749 return CL_EFORMAT;
750 }
751 ps = (unsigned char *)fmap_need_off_once(sctx->sfmap, foff, sctx->pssize);
752 if (!ps) {
753 sctx->flags |= OLE2_SUMMARY_ERROR_DATABUF;
754 return CL_EREAD;
755 }
756
757 /* iterate over the properties */
758 for (i = 0; i < limitprops; ++i) {
759 uint32_t propid, propoff;
760
761 if (psoff + sizeof(propid) + sizeof(poffset) > sctx->pssize) {
762 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
763 return CL_EFORMAT;
764 }
765 memcpy(&propid, ps + psoff, sizeof(propid));
766 psoff += sizeof(propid);
767 memcpy(&propoff, ps + psoff, sizeof(propoff));
768 psoff += sizeof(propoff);
769 /* endian conversion */
770 propid = sum32_endian_convert(propid);
771 propoff = sum32_endian_convert(propoff);
772 cli_dbgmsg("ole2_summary_propset_json: propid: 0x%08x, propoff: %u\n", propid, propoff);
773
774 sctx->propname = NULL;
775 sctx->writecp = 0;
776 switch (sctx->mode) {
777 case 1:
778 ole2_translate_docsummary_propid(sctx, propid);
779 break;
780 default:
781 ole2_translate_summary_propid(sctx, propid);
782 }
783
784 if (sctx->propname != NULL) {
785 ret = ole2_process_property(sctx, ps, propoff);
786 if (ret != CL_SUCCESS)
787 return ret;
788 } else {
789 /* add unknown propid flag */
790 }
791 }
792
793 return CL_SUCCESS;
794 }
795
cli_ole2_summary_json_cleanup(summary_ctx_t * sctx,int retcode)796 static int cli_ole2_summary_json_cleanup(summary_ctx_t *sctx, int retcode)
797 {
798 json_object *jarr;
799
800 cli_dbgmsg("in cli_ole2_summary_json_cleanup: %d[%x]\n", retcode, sctx->flags);
801
802 if (sctx->sfmap) {
803 funmap(sctx->sfmap);
804 }
805
806 if (sctx->flags) {
807 jarr = cli_jsonarray(sctx->summary, "ParseErrors");
808
809 /* summary errors */
810 if (sctx->flags & OLE2_SUMMARY_ERROR_TOOSMALL) {
811 cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_ERROR_TOOSMALL");
812 }
813 if (sctx->flags & OLE2_SUMMARY_ERROR_OOB) {
814 cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_ERROR_OOB");
815 }
816 if (sctx->flags & OLE2_SUMMARY_ERROR_DATABUF) {
817 cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_ERROR_DATABUF");
818 }
819 if (sctx->flags & OLE2_SUMMARY_ERROR_INVALID_ENTRY) {
820 cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_ERROR_INVALID_ENTRY");
821 }
822 if (sctx->flags & OLE2_SUMMARY_LIMIT_PROPS) {
823 cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_LIMIT_PROPS");
824 }
825 if (sctx->flags & OLE2_SUMMARY_FLAG_TIMEOUT) {
826 cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_TIMEOUT");
827 }
828 if (sctx->flags & OLE2_SUMMARY_FLAG_CODEPAGE) {
829 cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_CODEPAGE");
830 }
831 if (sctx->flags & OLE2_SUMMARY_FLAG_UNKNOWN_PROPID) {
832 cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_UNKNOWN_PROPID");
833 }
834 if (sctx->flags & OLE2_SUMMARY_FLAG_UNHANDLED_PROPTYPE) {
835 cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_UNHANDLED_PROPTYPE");
836 }
837 if (sctx->flags & OLE2_SUMMARY_FLAG_TRUNC_STR) {
838 cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_TRUNC_STR");
839 }
840
841 /* codepage translation errors */
842 if (sctx->flags & OLE2_CODEPAGE_ERROR_NOTFOUND) {
843 cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_NOTFOUND");
844 }
845 if (sctx->flags & OLE2_CODEPAGE_ERROR_UNINITED) {
846 cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_UNINITED");
847 }
848 if (sctx->flags & OLE2_CODEPAGE_ERROR_INVALID) {
849 cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_INVALID");
850 }
851 if (sctx->flags & OLE2_CODEPAGE_ERROR_INCOMPLETE) {
852 cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_INCOMPLETE");
853 }
854 if (sctx->flags & OLE2_CODEPAGE_ERROR_OUTBUFTOOSMALL) {
855 cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_OUTBUFTOOSMALL");
856 }
857 }
858
859 return retcode;
860 }
861
cli_ole2_summary_json(cli_ctx * ctx,int fd,int mode)862 int cli_ole2_summary_json(cli_ctx *ctx, int fd, int mode)
863 {
864 summary_ctx_t sctx;
865 STATBUF statbuf;
866 off_t foff = 0;
867 unsigned char *databuf;
868 summary_stub_t sumstub;
869 propset_entry_t pentry;
870 int ret = CL_SUCCESS;
871
872 cli_dbgmsg("in cli_ole2_summary_json\n");
873
874 /* preliminary sanity checks */
875 if (ctx == NULL) {
876 return CL_ENULLARG;
877 }
878
879 if (fd < 0) {
880 cli_dbgmsg("ole2_summary_json: invalid file descriptor\n");
881 return CL_ENULLARG; /* placeholder */
882 }
883
884 if (mode < 0 || mode > 2) {
885 cli_dbgmsg("ole2_summary_json: invalid mode specified\n");
886 return CL_ENULLARG; /* placeholder */
887 }
888
889 /* summary ctx setup */
890 memset(&sctx, 0, sizeof(sctx));
891 sctx.ctx = ctx;
892 sctx.mode = mode;
893
894 if (FSTAT(fd, &statbuf) == -1) {
895 cli_dbgmsg("ole2_summary_json: cannot stat file descriptor\n");
896 return CL_ESTAT;
897 }
898
899 sctx.sfmap = fmap(fd, 0, statbuf.st_size, NULL);
900 if (!sctx.sfmap) {
901 cli_dbgmsg("ole2_summary_json: failed to get fmap\n");
902 return CL_EMAP;
903 }
904 sctx.maplen = sctx.sfmap->len;
905 cli_dbgmsg("ole2_summary_json: streamsize: %zu\n", sctx.maplen);
906
907 switch (mode) {
908 case 1:
909 sctx.summary = cli_jsonobj(ctx->wrkproperty, "DocSummaryInfo");
910 break;
911 case 2:
912 sctx.summary = cli_jsonobj(ctx->wrkproperty, "Hwp5SummaryInfo");
913 break;
914 case 0:
915 default:
916 sctx.summary = cli_jsonobj(ctx->wrkproperty, "SummaryInfo");
917 break;
918 }
919
920 if (!sctx.summary) {
921 cli_errmsg("ole2_summary_json: no memory for json object.\n");
922 return cli_ole2_summary_json_cleanup(&sctx, CL_EMEM);
923 }
924
925 sctx.codepage = 0;
926 sctx.writecp = 0;
927
928 /* acquire property stream metadata */
929 if (sctx.maplen < sizeof(summary_stub_t)) {
930 sctx.flags |= OLE2_SUMMARY_ERROR_TOOSMALL;
931 return cli_ole2_summary_json_cleanup(&sctx, CL_EFORMAT);
932 }
933 databuf = (unsigned char *)fmap_need_off_once(sctx.sfmap, foff, sizeof(summary_stub_t));
934 if (!databuf) {
935 sctx.flags |= OLE2_SUMMARY_ERROR_DATABUF;
936 return cli_ole2_summary_json_cleanup(&sctx, CL_EREAD);
937 }
938 foff += sizeof(summary_stub_t);
939 memcpy(&sumstub, databuf, sizeof(summary_stub_t));
940
941 /* endian conversion and checks */
942 sumstub.byte_order = le16_to_host(sumstub.byte_order);
943 if (sumstub.byte_order != 0xfffe) {
944 cli_dbgmsg("ole2_summary_json: byteorder 0x%x is invalid\n", sumstub.byte_order);
945 sctx.flags |= OLE2_SUMMARY_ERROR_INVALID_ENTRY;
946 return cli_ole2_summary_json_cleanup(&sctx, CL_EFORMAT);
947 }
948 sumstub.version = sum16_endian_convert(sumstub.version); /*unused*/
949 sumstub.system = sum32_endian_convert(sumstub.system); /*unused*/
950 sumstub.num_propsets = sum32_endian_convert(sumstub.num_propsets);
951 if (sumstub.num_propsets != 1 && sumstub.num_propsets != 2) {
952 cli_dbgmsg("ole2_summary_json: invalid number of property sets\n");
953 sctx.flags |= OLE2_SUMMARY_ERROR_INVALID_ENTRY;
954 return cli_ole2_summary_json_cleanup(&sctx, CL_EFORMAT);
955 }
956
957 cli_dbgmsg("ole2_summary_json: byteorder 0x%x\n", sumstub.byte_order);
958 cli_dbgmsg("ole2_summary_json: %u property set(s) detected\n", sumstub.num_propsets);
959
960 /* first property set (index=0) is always SummaryInfo or DocSummaryInfo */
961 if ((sctx.maplen - foff) < sizeof(propset_entry_t)) {
962 sctx.flags |= OLE2_SUMMARY_ERROR_TOOSMALL;
963 return cli_ole2_summary_json_cleanup(&sctx, CL_EFORMAT);
964 }
965 databuf = (unsigned char *)fmap_need_off_once(sctx.sfmap, foff, sizeof(propset_entry_t));
966 if (!databuf) {
967 sctx.flags |= OLE2_SUMMARY_ERROR_DATABUF;
968 return cli_ole2_summary_json_cleanup(&sctx, CL_EREAD);
969 }
970 foff += sizeof(propset_entry_t);
971 memcpy(&pentry, databuf, sizeof(propset_entry_t));
972 /* endian conversion */
973 pentry.offset = sum32_endian_convert(pentry.offset);
974
975 if ((ret = ole2_summary_propset_json(&sctx, pentry.offset)) != CL_SUCCESS) {
976 return cli_ole2_summary_json_cleanup(&sctx, ret);
977 }
978
979 /* second property set (index=1) is always a custom property set (if present) */
980 if (sumstub.num_propsets == 2) {
981 cli_jsonbool(ctx->wrkproperty, "HasUserDefinedProperties", 1);
982 }
983
984 return cli_ole2_summary_json_cleanup(&sctx, CL_SUCCESS);
985 }
986 #endif /* HAVE_JSON */
987