1 /*
2  * Extract component parts of OLE2 files (e.g. MS Office Documents)
3  *
4  * Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
5  * Copyright (C) 2007-2013 Sourcefire, Inc.
6  *
7  * Authors: Kevin Lin
8  *
9  * This program is free software; you can redistribute it and/or modify it under
10  * the terms of the GNU General Public License version 2 as published by the
11  * Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful, but WITHOUT
14  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
16  * more details.
17  *
18  * You should have received a copy of the GNU General Public License along with
19  * this program; if not, write to the Free Software Foundation, Inc., 51
20  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21  */
22 
23 #if HAVE_CONFIG_H
24 #include "clamav-config.h"
25 #endif
26 
27 #include <sys/types.h>
28 #include <sys/stat.h>
29 #include <fcntl.h>
30 #include <stdio.h>
31 #include <string.h>
32 #include <ctype.h>
33 #include <stdlib.h>
34 #include <errno.h>
35 #include <conv.h>
36 #ifdef HAVE_UNISTD_H
37 #include <unistd.h>
38 #endif
39 
40 #if HAVE_ICONV
41 #include <iconv.h>
42 #endif
43 
44 #include "clamav.h"
45 #include "others.h"
46 #include "msdoc.h"
47 #include "scanners.h"
48 #include "fmap.h"
49 #include "json_api.h"
50 #include "entconv.h"
51 
52 #if HAVE_JSON
53 static char *
ole2_convert_utf(summary_ctx_t * sctx,char * begin,size_t sz,const char * encoding)54 ole2_convert_utf(summary_ctx_t *sctx, char *begin, size_t sz, const char *encoding)
55 {
56     char *outbuf = NULL;
57 #if HAVE_ICONV
58     char *buf, *p1, *p2;
59     off_t offset;
60     size_t inlen, outlen, nonrev, sz2;
61     size_t i;
62     int attempt;
63     iconv_t cd;
64 #else
65     UNUSEDPARAM(encoding);
66 #endif
67     /* applies in the both case */
68     if (sctx->codepage == 20127 || sctx->codepage == CODEPAGE_UTF8) {
69         char *track;
70         size_t bcnt, scnt;
71 
72         outbuf = cli_calloc(1, sz + 1);
73         if (!(outbuf))
74             return NULL;
75         memcpy(outbuf, begin, sz);
76 
77         track = outbuf + sz - 1;
78         if ((sctx->codepage == CODEPAGE_UTF8) && (*track & 0x80)) { /* UTF-8 with a most significant bit */
79             /* locate the start of the last character */
80             for (bcnt = 1; (track != outbuf); track--, bcnt++) {
81                 if (((uint8_t)*track & 0xC0) != 0x80)
82                     break;
83             }
84 
85             /* count number of set (1) significant bits */
86             for (scnt = 0; scnt < sizeof(uint8_t) * 8; scnt++) {
87                 if (((uint8_t)*track & (0x80 >> scnt)) == 0)
88                     break;
89             }
90 
91             if (bcnt != scnt) {
92                 cli_dbgmsg("ole2_convert_utf: cleaning out %zu bytes from incomplete utf-8 character length %zu\n",
93                            bcnt, scnt);
94                 for (; bcnt > 0; bcnt--, track++)
95                     *track = '\0';
96             }
97         }
98         return outbuf;
99     }
100 
101 #if HAVE_ICONV
102     p1 = buf = cli_calloc(1, sz);
103     if (!(buf))
104         return NULL;
105 
106     memcpy(buf, begin, sz);
107     inlen = sz;
108 
109     /* encoding lookup if not specified */
110     if (!encoding) {
111         for (i = 0; i < NUMCODEPAGES; ++i) {
112             if (sctx->codepage == codepage_entries[i].codepage)
113                 encoding = codepage_entries[i].encoding;
114             else if (sctx->codepage < codepage_entries[i].codepage) {
115                 /* assuming sorted array */
116                 break;
117             }
118         }
119 
120         if (!encoding) {
121             cli_warnmsg("ole2_convert_utf: could not locate codepage encoding for %d\n", sctx->codepage);
122             sctx->flags |= OLE2_CODEPAGE_ERROR_NOTFOUND;
123             free(buf);
124             return NULL;
125         }
126     }
127 
128     cd = iconv_open("UTF-8", encoding);
129     if (cd == (iconv_t)(-1)) {
130         char errbuf[128];
131         cli_strerror(errno, errbuf, sizeof(errbuf));
132         cli_errmsg("ole2_convert_utf: could not initialize iconv for encoding %s: %s\n", encoding, errbuf);
133         sctx->flags |= OLE2_CODEPAGE_ERROR_UNINITED;
134     } else {
135         offset = 0;
136         for (attempt = 1; attempt <= 3; ++attempt) {
137             /* charset to UTF-8 should never exceed sz*6 */
138             sz2 = (attempt * 2) * sz;
139             /* use cli_realloc, reuse the buffer that has already been translated */
140             outbuf = (char *)cli_realloc(outbuf, sz2 + 1);
141             if (!outbuf) {
142                 free(buf);
143                 iconv_close(cd);
144                 return NULL;
145             }
146 
147             outlen = sz2 - offset;
148             p2     = outbuf + offset;
149 
150             /* conversion */
151             nonrev = iconv(cd, &p1, &inlen, &p2, &outlen);
152 
153             if (errno == EILSEQ) {
154                 cli_dbgmsg("ole2_convert_utf: input buffer contains invalid character for its encoding\n");
155                 sctx->flags |= OLE2_CODEPAGE_ERROR_INVALID;
156                 break;
157             } else if (errno == EINVAL && nonrev == (size_t)-1) {
158                 cli_dbgmsg("ole2_convert_utf: input buffer contains incomplete multibyte character\n");
159                 sctx->flags |= OLE2_CODEPAGE_ERROR_INCOMPLETE;
160                 break;
161             } else if (inlen == 0) {
162                 //cli_dbgmsg("ole2_convert_utf: input buffer is successfully translated\n");
163                 break;
164             }
165 
166             //outbuf[sz2 - outlen] = '\0';
167             //cli_dbgmsg("%u %s\n", inlen, outbuf);
168 
169             offset = sz2 - outlen;
170             if (attempt < 3)
171                 cli_dbgmsg("ole2_convert_utf: outbuf is too small, resizing %llu -> %llu\n",
172                            (long long unsigned)((attempt * 2) * sz), (long long unsigned)(((attempt + 1) * 2) * sz));
173         }
174 
175         if (errno == E2BIG && nonrev == (size_t)-1) {
176             cli_dbgmsg("ole2_convert_utf: buffer could not be fully translated\n");
177             sctx->flags |= OLE2_CODEPAGE_ERROR_OUTBUFTOOSMALL;
178         }
179 
180         outbuf[sz2 - outlen] = '\0';
181     }
182 
183     iconv_close(cd);
184     free(buf);
185 #endif
186     /* this should force base64 encoding if NULL */
187     return outbuf;
188 }
189 
190 static int
ole2_process_property(summary_ctx_t * sctx,unsigned char * databuf,uint32_t offset)191 ole2_process_property(summary_ctx_t *sctx, unsigned char *databuf, uint32_t offset)
192 {
193     uint16_t proptype, padding;
194     int ret = CL_SUCCESS;
195 
196     if (cli_json_timeout_cycle_check(sctx->ctx, &(sctx->toval)) != CL_SUCCESS) {
197         sctx->flags |= OLE2_SUMMARY_FLAG_TIMEOUT;
198         return CL_ETIMEOUT;
199     }
200 
201     if (offset + sizeof(proptype) + sizeof(padding) > sctx->pssize) {
202         sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
203         return CL_EFORMAT;
204     }
205 
206     memcpy(&proptype, databuf + offset, sizeof(proptype));
207     offset += sizeof(proptype);
208     memcpy(&padding, databuf + offset, sizeof(padding));
209     offset += sizeof(padding);
210     /* endian conversion */
211     proptype = sum16_endian_convert(proptype);
212 
213     //cli_dbgmsg("proptype: 0x%04x\n", proptype);
214     if (padding != 0) {
215         cli_dbgmsg("ole2_process_property: invalid padding value, non-zero\n");
216         sctx->flags |= OLE2_SUMMARY_ERROR_INVALID_ENTRY;
217         return CL_EFORMAT;
218     }
219 
220     switch (proptype) {
221         case PT_EMPTY:
222         case PT_NULL:
223             ret = cli_jsonnull(sctx->summary, sctx->propname);
224             break;
225         case PT_INT16: {
226             int16_t dout;
227             if (offset + sizeof(dout) > sctx->pssize) {
228                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
229                 return CL_EFORMAT;
230             }
231             memcpy(&dout, databuf + offset, sizeof(dout));
232             offset += sizeof(dout);
233             /* endian conversion */
234             dout = sum16_endian_convert(dout);
235 
236             if (sctx->writecp) {
237                 sctx->codepage = (uint16_t)dout;
238                 ret            = cli_jsonint(sctx->summary, sctx->propname, sctx->codepage);
239             } else
240                 ret = cli_jsonint(sctx->summary, sctx->propname, dout);
241             break;
242         }
243         case PT_INT32:
244         case PT_INT32v1: {
245             int32_t dout;
246             if (offset + sizeof(dout) > sctx->pssize) {
247                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
248                 return CL_EFORMAT;
249             }
250             memcpy(&dout, databuf + offset, sizeof(dout));
251             offset += sizeof(dout);
252             /* endian conversion */
253             dout = sum32_endian_convert(dout);
254 
255             ret = cli_jsonint(sctx->summary, sctx->propname, dout);
256             break;
257         }
258         case PT_FLOAT32: /* review this please */
259         {
260             float dout;
261             if (offset + sizeof(dout) > sctx->pssize) {
262                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
263                 return CL_EFORMAT;
264             }
265             memcpy(&dout, databuf + offset, sizeof(dout));
266             offset += sizeof(dout);
267             /* endian conversion */
268             dout = sum32_endian_convert(dout);
269 
270             ret = cli_jsondouble(sctx->summary, sctx->propname, dout);
271             break;
272         }
273         case PT_DATE:
274         case PT_DOUBLE64: /* review this please */
275         {
276             double dout;
277             if (offset + sizeof(dout) > sctx->pssize) {
278                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
279                 return CL_EFORMAT;
280             }
281             memcpy(&dout, databuf + offset, sizeof(dout));
282             offset += sizeof(dout);
283             /* endian conversion */
284             dout = sum64_endian_convert(dout);
285 
286             ret = cli_jsondouble(sctx->summary, sctx->propname, dout);
287             break;
288         }
289         case PT_BOOL: {
290             uint16_t dout;
291             if (offset + sizeof(dout) > sctx->pssize) {
292                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
293                 return CL_EFORMAT;
294             }
295             memcpy(&dout, databuf + offset, sizeof(dout));
296             offset += sizeof(dout);
297             /* no need for endian conversion */
298 
299             ret = cli_jsonbool(sctx->summary, sctx->propname, dout);
300             break;
301         }
302         case PT_INT8v1: {
303             int8_t dout;
304             if (offset + sizeof(dout) > sctx->pssize) {
305                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
306                 return CL_EFORMAT;
307             }
308             memcpy(&dout, databuf + offset, sizeof(dout));
309             offset += sizeof(dout);
310             /* no need for endian conversion */
311 
312             ret = cli_jsonint(sctx->summary, sctx->propname, dout);
313             break;
314         }
315         case PT_UINT8: {
316             uint8_t dout;
317             if (offset + sizeof(dout) > sctx->pssize) {
318                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
319                 return CL_EFORMAT;
320             }
321             memcpy(&dout, databuf + offset, sizeof(dout));
322             offset += sizeof(dout);
323             /* no need for endian conversion */
324 
325             ret = cli_jsonint(sctx->summary, sctx->propname, dout);
326             break;
327         }
328         case PT_UINT16: {
329             uint16_t dout;
330             if (offset + sizeof(dout) > sctx->pssize) {
331                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
332                 return CL_EFORMAT;
333             }
334             memcpy(&dout, databuf + offset, sizeof(dout));
335             offset += sizeof(dout);
336             /* endian conversion */
337             dout = sum16_endian_convert(dout);
338 
339             if (sctx->writecp)
340                 sctx->codepage = dout;
341 
342             ret = cli_jsonint(sctx->summary, sctx->propname, dout);
343             break;
344         }
345         case PT_UINT32:
346         case PT_UINT32v1: {
347             uint32_t dout;
348             if (offset + sizeof(dout) > sctx->pssize) {
349                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
350                 return CL_EFORMAT;
351             }
352             memcpy(&dout, databuf + offset, sizeof(dout));
353             offset += sizeof(dout);
354             /* endian conversion */
355             dout = sum32_endian_convert(dout);
356 
357             ret = cli_jsonint(sctx->summary, sctx->propname, dout);
358             break;
359         }
360         case PT_INT64: {
361             int64_t dout;
362             if (offset + sizeof(dout) > sctx->pssize) {
363                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
364                 return CL_EFORMAT;
365             }
366             memcpy(&dout, databuf + offset, sizeof(dout));
367             offset += sizeof(dout);
368             /* endian conversion */
369             dout = sum64_endian_convert(dout);
370 
371             ret = cli_jsonint64(sctx->summary, sctx->propname, dout);
372             break;
373         }
374         case PT_UINT64: {
375             uint64_t dout;
376             if (offset + sizeof(dout) > sctx->pssize) {
377                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
378                 return CL_EFORMAT;
379             }
380             memcpy(&dout, databuf + offset, sizeof(dout));
381             offset += sizeof(dout);
382             /* endian conversion */
383             dout = sum64_endian_convert(dout);
384 
385             ret = cli_jsonint64(sctx->summary, sctx->propname, dout);
386             break;
387         }
388         case PT_BSTR:
389         case PT_LPSTR:
390             if (sctx->codepage == 0) {
391                 cli_dbgmsg("ole2_propset_json: current codepage is unknown, cannot parse char stream\n");
392                 sctx->flags |= OLE2_SUMMARY_FLAG_CODEPAGE;
393             } else {
394                 uint32_t strsize;
395                 char *outstr, *outstr2;
396 
397                 if (offset + sizeof(strsize) > sctx->pssize) {
398                     sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
399                     return CL_EFORMAT;
400                 }
401 
402                 memcpy(&strsize, databuf + offset, sizeof(strsize));
403                 offset += sizeof(strsize);
404                 /* endian conversion? */
405                 strsize = sum32_endian_convert(strsize);
406 
407                 if (offset + strsize > sctx->pssize) {
408                     sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
409                     return CL_EFORMAT;
410                 }
411 
412                 /* limitation on string length */
413                 if (strsize > PROPSTRLIMIT) {
414                     cli_dbgmsg("ole2_process_property: property string sized %lu truncated to size %lu\n",
415                                (unsigned long)strsize, (unsigned long)PROPSTRLIMIT);
416                     sctx->flags |= OLE2_SUMMARY_FLAG_TRUNC_STR;
417                     strsize = PROPSTRLIMIT;
418                 }
419 
420                 outstr = cli_calloc(strsize + 1, 1); /* last char must be NULL */
421                 if (!outstr) {
422                     return CL_EMEM;
423                 }
424                 strncpy(outstr, (const char *)(databuf + offset), strsize);
425 
426                 /* conversion of various encodings to UTF-8 */
427                 outstr2 = ole2_convert_utf(sctx, outstr, strsize, NULL);
428                 if (!outstr2) {
429                     /* use base64 encoding when all else fails! */
430                     char b64jstr[PROPSTRLIMIT];
431 
432                     /* outstr2 should be 4/3 times the original (rounded up) */
433                     outstr2 = cl_base64_encode(outstr, strsize);
434                     if (!outstr2) {
435                         cli_dbgmsg("ole2_process_property: failed to convert to base64 string\n");
436                         return CL_EMEM;
437                     }
438 
439                     snprintf(b64jstr, PROPSTRLIMIT, "%s_base64", sctx->propname);
440                     ret = cli_jsonbool(sctx->summary, b64jstr, 1);
441                     if (ret != CL_SUCCESS)
442                         return ret;
443                 }
444 
445                 ret = cli_jsonstr(sctx->summary, sctx->propname, outstr2);
446                 free(outstr);
447                 free(outstr2);
448             }
449             break;
450         case PT_LPWSTR: {
451             uint32_t strsize;
452             char *outstr, *outstr2;
453 
454             if (offset + sizeof(strsize) > sctx->pssize) {
455                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
456                 return CL_EFORMAT;
457             }
458             memcpy(&strsize, databuf + offset, sizeof(strsize));
459             offset += sizeof(strsize);
460             /* endian conversion; wide strings are by length, not size (x2) */
461             strsize = sum32_endian_convert(strsize) * 2;
462 
463             /* limitation on string length */
464             if (strsize > (2 * PROPSTRLIMIT)) {
465                 cli_dbgmsg("ole2_process_property: property string sized %lu truncated to size %lu\n",
466                            (unsigned long)strsize, (unsigned long)(2 * PROPSTRLIMIT));
467                 sctx->flags |= OLE2_SUMMARY_FLAG_TRUNC_STR;
468                 strsize = (2 * PROPSTRLIMIT);
469             }
470 
471             if (offset + strsize > sctx->pssize) {
472                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
473                 return CL_EFORMAT;
474             }
475             outstr = cli_calloc(strsize + 2, 1); /* last two chars must be NULL */
476             if (!outstr) {
477                 return CL_EMEM;
478             }
479             memcpy(outstr, (const char *)(databuf + offset), strsize);
480             /* conversion of 16-width char strings (UTF-16 or UTF-16LE??) to UTF-8 */
481             outstr2 = ole2_convert_utf(sctx, outstr, strsize, UTF16_MS);
482             if (!outstr2) {
483                 /* use base64 encoding when all else fails! */
484                 char b64jstr[PROPSTRLIMIT];
485 
486                 outstr2 = cl_base64_encode(outstr, strsize);
487                 if (!outstr2) {
488                     free(outstr);
489                     return CL_EMEM;
490                 }
491 
492                 snprintf(b64jstr, PROPSTRLIMIT, "%s_base64", sctx->propname);
493                 ret = cli_jsonbool(sctx->summary, b64jstr, 1);
494                 if (ret != CL_SUCCESS) {
495                     free(outstr);
496                     free(outstr2);
497                     return ret;
498                 }
499             }
500 
501             ret = cli_jsonstr(sctx->summary, sctx->propname, outstr2);
502             free(outstr);
503             free(outstr2);
504             break;
505         }
506         case PT_FILETIME: {
507             uint32_t ltime, htime;
508             uint64_t wtime = 0, utime = 0;
509 
510             if (offset + sizeof(ltime) + sizeof(htime) > sctx->pssize) {
511                 sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
512                 return CL_EFORMAT;
513             }
514             memcpy(&ltime, databuf + offset, sizeof(ltime));
515             offset += sizeof(ltime);
516             memcpy(&htime, databuf + offset, sizeof(htime));
517             offset += sizeof(ltime);
518             ltime = sum32_endian_convert(ltime);
519             htime = sum32_endian_convert(htime);
520 
521             /* UNIX timestamp formatting */
522             wtime = htime;
523             wtime <<= 32;
524             wtime |= ltime;
525 
526             utime = wtime / 10000000;
527             utime -= 11644473600LL;
528 
529             if ((uint32_t)((utime & 0xFFFFFFFF00000000) >> 32)) {
530                 cli_dbgmsg("ole2_process_property: UNIX timestamp is larger than 32-bit number\n");
531             } else {
532                 ret = cli_jsonint(sctx->summary, sctx->propname, (uint32_t)(utime & 0xFFFFFFFF));
533             }
534             break;
535         }
536         default:
537             cli_dbgmsg("ole2_process_property: unhandled property type 0x%04x for %s property\n",
538                        proptype, sctx->propname);
539             sctx->flags |= OLE2_SUMMARY_FLAG_UNHANDLED_PROPTYPE;
540     }
541 
542     return ret;
543 }
544 
ole2_translate_docsummary_propid(summary_ctx_t * sctx,uint32_t propid)545 static void ole2_translate_docsummary_propid(summary_ctx_t *sctx, uint32_t propid)
546 {
547     switch (propid) {
548         case DSPID_CODEPAGE:
549             sctx->writecp  = 1; /* must be set ONLY for codepage */
550             sctx->propname = "CodePage";
551             break;
552         case DSPID_CATEGORY:
553             sctx->propname = "Category";
554             break;
555         case DSPID_PRESFORMAT:
556             sctx->propname = "PresentationTarget";
557             break;
558         case DSPID_BYTECOUNT:
559             sctx->propname = "Bytes";
560             break;
561         case DSPID_LINECOUNT:
562             sctx->propname = "Lines";
563             break;
564         case DSPID_PARCOUNT:
565             sctx->propname = "Paragraphs";
566             break;
567         case DSPID_SLIDECOUNT:
568             sctx->propname = "Slides";
569             break;
570         case DSPID_NOTECOUNT:
571             sctx->propname = "Notes";
572             break;
573         case DSPID_HIDDENCOUNT:
574             sctx->propname = "HiddenSlides";
575             break;
576         case DSPID_MMCLIPCOUNT:
577             sctx->propname = "MMClips";
578             break;
579         case DSPID_SCALE:
580             sctx->propname = "Scale";
581             break;
582         case DSPID_HEADINGPAIR: /* VT_VARIANT | VT_VECTOR */
583             sctx->propname = "HeadingPairs";
584             break;
585         case DSPID_DOCPARTS: /* VT_VECTOR | VT_LPSTR */
586             sctx->propname = "DocPartTitles";
587             break;
588         case DSPID_MANAGER:
589             sctx->propname = "Manager";
590             break;
591         case DSPID_COMPANY:
592             sctx->propname = "Company";
593             break;
594         case DSPID_LINKSDIRTY:
595             sctx->propname = "LinksDirty";
596             break;
597         case DSPID_CCHWITHSPACES:
598             sctx->propname = "Char&WSCount";
599             break;
600         case DSPID_SHAREDDOC: /* SHOULD BE FALSE! */
601             sctx->propname = "SharedDoc";
602             break;
603         case DSPID_LINKBASE: /* moved to user-defined */
604             sctx->propname = "LinkBase";
605             break;
606         case DSPID_HLINKS: /* moved to user-defined */
607             sctx->propname = "HyperLinks";
608             break;
609         case DSPID_HYPERLINKSCHANGED:
610             sctx->propname = "HyperLinksChanged";
611             break;
612         case DSPID_VERSION:
613             sctx->propname = "Version";
614             break;
615         case DSPID_DIGSIG:
616             sctx->propname = "DigitalSig";
617             break;
618         case DSPID_CONTENTTYPE:
619             sctx->propname = "ContentType";
620             break;
621         case DSPID_CONTENTSTATUS:
622             sctx->propname = "ContentStatus";
623             break;
624         case DSPID_LANGUAGE:
625             sctx->propname = "Language";
626             break;
627         case DSPID_DOCVERSION:
628             sctx->propname = "DocVersion";
629             break;
630         default:
631             cli_dbgmsg("ole2_docsum_propset_json: unrecognized propid!\n");
632             sctx->flags |= OLE2_SUMMARY_FLAG_UNKNOWN_PROPID;
633     }
634 }
635 
ole2_translate_summary_propid(summary_ctx_t * sctx,uint32_t propid)636 static void ole2_translate_summary_propid(summary_ctx_t *sctx, uint32_t propid)
637 {
638     switch (propid) {
639         case SPID_CODEPAGE:
640             sctx->writecp  = 1; /* must be set ONLY for codepage */
641             sctx->propname = "CodePage";
642             break;
643         case SPID_TITLE:
644             sctx->propname = "Title";
645             break;
646         case SPID_SUBJECT:
647             sctx->propname = "Subject";
648             break;
649         case SPID_AUTHOR:
650             sctx->propname = "Author";
651             break;
652         case SPID_KEYWORDS:
653             sctx->propname = "Keywords";
654             break;
655         case SPID_COMMENTS:
656             sctx->propname = "Comments";
657             break;
658         case SPID_TEMPLATE:
659             sctx->propname = "Template";
660             break;
661         case SPID_LASTAUTHOR:
662             sctx->propname = "LastAuthor";
663             break;
664         case SPID_REVNUMBER:
665             sctx->propname = "RevNumber";
666             break;
667         case SPID_EDITTIME:
668             sctx->propname = "EditTime";
669             break;
670         case SPID_LASTPRINTED:
671             sctx->propname = "LastPrinted";
672             break;
673         case SPID_CREATEDTIME:
674             sctx->propname = "CreatedTime";
675             break;
676         case SPID_MODIFIEDTIME:
677             sctx->propname = "ModifiedTime";
678             break;
679         case SPID_PAGECOUNT:
680             sctx->propname = "PageCount";
681             break;
682         case SPID_WORDCOUNT:
683             sctx->propname = "WordCount";
684             break;
685         case SPID_CHARCOUNT:
686             sctx->propname = "CharCount";
687             break;
688         case SPID_THUMBNAIL:
689             sctx->propname = "Thumbnail";
690             break;
691         case SPID_APPNAME:
692             sctx->propname = "AppName";
693             break;
694         case SPID_SECURITY:
695             sctx->propname = "Security";
696             break;
697         default:
698             cli_dbgmsg("ole2_translate_summary_propid: unrecognized propid!\n");
699             sctx->flags |= OLE2_SUMMARY_FLAG_UNKNOWN_PROPID;
700     }
701 }
702 
ole2_summary_propset_json(summary_ctx_t * sctx,off_t offset)703 static int ole2_summary_propset_json(summary_ctx_t *sctx, off_t offset)
704 {
705     unsigned char *hdr, *ps;
706     uint32_t numprops, limitprops;
707     off_t foff = offset, psoff = 0;
708     uint32_t poffset;
709     int ret;
710     uint32_t i;
711 
712     cli_dbgmsg("in ole2_summary_propset_json\n");
713 
714     /* summary ctx propset-specific setup*/
715     sctx->codepage = 0;
716     sctx->writecp  = 0;
717     sctx->propname = NULL;
718 
719     /* examine property set metadata */
720     if ((foff + (2 * sizeof(uint32_t))) > sctx->maplen) {
721         sctx->flags |= OLE2_SUMMARY_ERROR_TOOSMALL;
722         return CL_EFORMAT;
723     }
724     hdr = (unsigned char *)fmap_need_off_once(sctx->sfmap, foff, (2 * sizeof(uint32_t)));
725     if (!hdr) {
726         sctx->flags |= OLE2_SUMMARY_ERROR_DATABUF;
727         return CL_EREAD;
728     }
729     //foff+=(2*sizeof(uint32_t)); // keep foff pointing to start of propset segment
730     psoff += (2 * sizeof(uint32_t));
731     memcpy(&(sctx->pssize), hdr, sizeof(sctx->pssize));
732     memcpy(&numprops, hdr + sizeof(sctx->pssize), sizeof(numprops));
733     /* endian conversion */
734     sctx->pssize = sum32_endian_convert(sctx->pssize);
735     numprops     = sum32_endian_convert(numprops);
736     cli_dbgmsg("ole2_summary_propset_json: pssize: %u, numprops: %u\n", sctx->pssize, numprops);
737     if (numprops > PROPCNTLIMIT) {
738         sctx->flags |= OLE2_SUMMARY_LIMIT_PROPS;
739         limitprops = PROPCNTLIMIT;
740     } else {
741         limitprops = numprops;
742     }
743     cli_dbgmsg("ole2_summary_propset_json: processing %u of %u (%u max) properties\n",
744                limitprops, numprops, PROPCNTLIMIT);
745 
746     /* extract remaining fragment of propset */
747     if ((size_t)(foff + (sctx->pssize)) > (size_t)(sctx->maplen)) {
748         sctx->flags |= OLE2_SUMMARY_ERROR_TOOSMALL;
749         return CL_EFORMAT;
750     }
751     ps = (unsigned char *)fmap_need_off_once(sctx->sfmap, foff, sctx->pssize);
752     if (!ps) {
753         sctx->flags |= OLE2_SUMMARY_ERROR_DATABUF;
754         return CL_EREAD;
755     }
756 
757     /* iterate over the properties */
758     for (i = 0; i < limitprops; ++i) {
759         uint32_t propid, propoff;
760 
761         if (psoff + sizeof(propid) + sizeof(poffset) > sctx->pssize) {
762             sctx->flags |= OLE2_SUMMARY_ERROR_OOB;
763             return CL_EFORMAT;
764         }
765         memcpy(&propid, ps + psoff, sizeof(propid));
766         psoff += sizeof(propid);
767         memcpy(&propoff, ps + psoff, sizeof(propoff));
768         psoff += sizeof(propoff);
769         /* endian conversion */
770         propid  = sum32_endian_convert(propid);
771         propoff = sum32_endian_convert(propoff);
772         cli_dbgmsg("ole2_summary_propset_json: propid: 0x%08x, propoff: %u\n", propid, propoff);
773 
774         sctx->propname = NULL;
775         sctx->writecp  = 0;
776         switch (sctx->mode) {
777             case 1:
778                 ole2_translate_docsummary_propid(sctx, propid);
779                 break;
780             default:
781                 ole2_translate_summary_propid(sctx, propid);
782         }
783 
784         if (sctx->propname != NULL) {
785             ret = ole2_process_property(sctx, ps, propoff);
786             if (ret != CL_SUCCESS)
787                 return ret;
788         } else {
789             /* add unknown propid flag */
790         }
791     }
792 
793     return CL_SUCCESS;
794 }
795 
cli_ole2_summary_json_cleanup(summary_ctx_t * sctx,int retcode)796 static int cli_ole2_summary_json_cleanup(summary_ctx_t *sctx, int retcode)
797 {
798     json_object *jarr;
799 
800     cli_dbgmsg("in cli_ole2_summary_json_cleanup: %d[%x]\n", retcode, sctx->flags);
801 
802     if (sctx->sfmap) {
803         funmap(sctx->sfmap);
804     }
805 
806     if (sctx->flags) {
807         jarr = cli_jsonarray(sctx->summary, "ParseErrors");
808 
809         /* summary errors */
810         if (sctx->flags & OLE2_SUMMARY_ERROR_TOOSMALL) {
811             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_ERROR_TOOSMALL");
812         }
813         if (sctx->flags & OLE2_SUMMARY_ERROR_OOB) {
814             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_ERROR_OOB");
815         }
816         if (sctx->flags & OLE2_SUMMARY_ERROR_DATABUF) {
817             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_ERROR_DATABUF");
818         }
819         if (sctx->flags & OLE2_SUMMARY_ERROR_INVALID_ENTRY) {
820             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_ERROR_INVALID_ENTRY");
821         }
822         if (sctx->flags & OLE2_SUMMARY_LIMIT_PROPS) {
823             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_LIMIT_PROPS");
824         }
825         if (sctx->flags & OLE2_SUMMARY_FLAG_TIMEOUT) {
826             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_TIMEOUT");
827         }
828         if (sctx->flags & OLE2_SUMMARY_FLAG_CODEPAGE) {
829             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_CODEPAGE");
830         }
831         if (sctx->flags & OLE2_SUMMARY_FLAG_UNKNOWN_PROPID) {
832             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_UNKNOWN_PROPID");
833         }
834         if (sctx->flags & OLE2_SUMMARY_FLAG_UNHANDLED_PROPTYPE) {
835             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_UNHANDLED_PROPTYPE");
836         }
837         if (sctx->flags & OLE2_SUMMARY_FLAG_TRUNC_STR) {
838             cli_jsonstr(jarr, NULL, "OLE2_SUMMARY_FLAG_TRUNC_STR");
839         }
840 
841         /* codepage translation errors */
842         if (sctx->flags & OLE2_CODEPAGE_ERROR_NOTFOUND) {
843             cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_NOTFOUND");
844         }
845         if (sctx->flags & OLE2_CODEPAGE_ERROR_UNINITED) {
846             cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_UNINITED");
847         }
848         if (sctx->flags & OLE2_CODEPAGE_ERROR_INVALID) {
849             cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_INVALID");
850         }
851         if (sctx->flags & OLE2_CODEPAGE_ERROR_INCOMPLETE) {
852             cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_INCOMPLETE");
853         }
854         if (sctx->flags & OLE2_CODEPAGE_ERROR_OUTBUFTOOSMALL) {
855             cli_jsonstr(jarr, NULL, "OLE2_CODEPAGE_ERROR_OUTBUFTOOSMALL");
856         }
857     }
858 
859     return retcode;
860 }
861 
cli_ole2_summary_json(cli_ctx * ctx,int fd,int mode)862 int cli_ole2_summary_json(cli_ctx *ctx, int fd, int mode)
863 {
864     summary_ctx_t sctx;
865     STATBUF statbuf;
866     off_t foff = 0;
867     unsigned char *databuf;
868     summary_stub_t sumstub;
869     propset_entry_t pentry;
870     int ret = CL_SUCCESS;
871 
872     cli_dbgmsg("in cli_ole2_summary_json\n");
873 
874     /* preliminary sanity checks */
875     if (ctx == NULL) {
876         return CL_ENULLARG;
877     }
878 
879     if (fd < 0) {
880         cli_dbgmsg("ole2_summary_json: invalid file descriptor\n");
881         return CL_ENULLARG; /* placeholder */
882     }
883 
884     if (mode < 0 || mode > 2) {
885         cli_dbgmsg("ole2_summary_json: invalid mode specified\n");
886         return CL_ENULLARG; /* placeholder */
887     }
888 
889     /* summary ctx setup */
890     memset(&sctx, 0, sizeof(sctx));
891     sctx.ctx  = ctx;
892     sctx.mode = mode;
893 
894     if (FSTAT(fd, &statbuf) == -1) {
895         cli_dbgmsg("ole2_summary_json: cannot stat file descriptor\n");
896         return CL_ESTAT;
897     }
898 
899     sctx.sfmap = fmap(fd, 0, statbuf.st_size, NULL);
900     if (!sctx.sfmap) {
901         cli_dbgmsg("ole2_summary_json: failed to get fmap\n");
902         return CL_EMAP;
903     }
904     sctx.maplen = sctx.sfmap->len;
905     cli_dbgmsg("ole2_summary_json: streamsize: %zu\n", sctx.maplen);
906 
907     switch (mode) {
908         case 1:
909             sctx.summary = cli_jsonobj(ctx->wrkproperty, "DocSummaryInfo");
910             break;
911         case 2:
912             sctx.summary = cli_jsonobj(ctx->wrkproperty, "Hwp5SummaryInfo");
913             break;
914         case 0:
915         default:
916             sctx.summary = cli_jsonobj(ctx->wrkproperty, "SummaryInfo");
917             break;
918     }
919 
920     if (!sctx.summary) {
921         cli_errmsg("ole2_summary_json: no memory for json object.\n");
922         return cli_ole2_summary_json_cleanup(&sctx, CL_EMEM);
923     }
924 
925     sctx.codepage = 0;
926     sctx.writecp  = 0;
927 
928     /* acquire property stream metadata */
929     if (sctx.maplen < sizeof(summary_stub_t)) {
930         sctx.flags |= OLE2_SUMMARY_ERROR_TOOSMALL;
931         return cli_ole2_summary_json_cleanup(&sctx, CL_EFORMAT);
932     }
933     databuf = (unsigned char *)fmap_need_off_once(sctx.sfmap, foff, sizeof(summary_stub_t));
934     if (!databuf) {
935         sctx.flags |= OLE2_SUMMARY_ERROR_DATABUF;
936         return cli_ole2_summary_json_cleanup(&sctx, CL_EREAD);
937     }
938     foff += sizeof(summary_stub_t);
939     memcpy(&sumstub, databuf, sizeof(summary_stub_t));
940 
941     /* endian conversion and checks */
942     sumstub.byte_order = le16_to_host(sumstub.byte_order);
943     if (sumstub.byte_order != 0xfffe) {
944         cli_dbgmsg("ole2_summary_json: byteorder 0x%x is invalid\n", sumstub.byte_order);
945         sctx.flags |= OLE2_SUMMARY_ERROR_INVALID_ENTRY;
946         return cli_ole2_summary_json_cleanup(&sctx, CL_EFORMAT);
947     }
948     sumstub.version      = sum16_endian_convert(sumstub.version); /*unused*/
949     sumstub.system       = sum32_endian_convert(sumstub.system);  /*unused*/
950     sumstub.num_propsets = sum32_endian_convert(sumstub.num_propsets);
951     if (sumstub.num_propsets != 1 && sumstub.num_propsets != 2) {
952         cli_dbgmsg("ole2_summary_json: invalid number of property sets\n");
953         sctx.flags |= OLE2_SUMMARY_ERROR_INVALID_ENTRY;
954         return cli_ole2_summary_json_cleanup(&sctx, CL_EFORMAT);
955     }
956 
957     cli_dbgmsg("ole2_summary_json: byteorder 0x%x\n", sumstub.byte_order);
958     cli_dbgmsg("ole2_summary_json: %u property set(s) detected\n", sumstub.num_propsets);
959 
960     /* first property set (index=0) is always SummaryInfo or DocSummaryInfo */
961     if ((sctx.maplen - foff) < sizeof(propset_entry_t)) {
962         sctx.flags |= OLE2_SUMMARY_ERROR_TOOSMALL;
963         return cli_ole2_summary_json_cleanup(&sctx, CL_EFORMAT);
964     }
965     databuf = (unsigned char *)fmap_need_off_once(sctx.sfmap, foff, sizeof(propset_entry_t));
966     if (!databuf) {
967         sctx.flags |= OLE2_SUMMARY_ERROR_DATABUF;
968         return cli_ole2_summary_json_cleanup(&sctx, CL_EREAD);
969     }
970     foff += sizeof(propset_entry_t);
971     memcpy(&pentry, databuf, sizeof(propset_entry_t));
972     /* endian conversion */
973     pentry.offset = sum32_endian_convert(pentry.offset);
974 
975     if ((ret = ole2_summary_propset_json(&sctx, pentry.offset)) != CL_SUCCESS) {
976         return cli_ole2_summary_json_cleanup(&sctx, ret);
977     }
978 
979     /* second property set (index=1) is always a custom property set (if present) */
980     if (sumstub.num_propsets == 2) {
981         cli_jsonbool(ctx->wrkproperty, "HasUserDefinedProperties", 1);
982     }
983 
984     return cli_ole2_summary_json_cleanup(&sctx, CL_SUCCESS);
985 }
986 #endif /* HAVE_JSON */
987