1 /*
2  * Extract component parts of various MS XML files (e.g. MS Office 2003 XML Documents)
3  *
4  * Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
5  * Copyright (C) 2007-2013 Sourcefire, Inc.
6  *
7  * Authors: Kevin Lin
8  *
9  * This program is free software; you can redistribute it and/or modify it under
10  * the terms of the GNU General Public License version 2 as published by the
11  * Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful, but WITHOUT
14  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
16  * more details.
17  *
18  * You should have received a copy of the GNU General Public License along with
19  * this program; if not, write to the Free Software Foundation, Inc., 51
20  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21  */
22 
23 #ifndef __MSXML_PARSER_H
24 #define __MSXML_PARSER_H
25 
26 #if HAVE_LIBXML2
27 
28 #if HAVE_CONFIG_H
29 #include "clamav-config.h"
30 #endif
31 
32 #include "others.h"
33 
34 #ifdef _WIN32
35 #ifndef LIBXML_WRITER_ENABLED
36 #define LIBXML_WRITER_ENABLED 1
37 #endif
38 #endif
39 #include <libxml/xmlreader.h>
40 
41 #define MSXML_RECLEVEL_MAX 20
42 #define MSXML_JSON_STRLEN_MAX 128
43 
44 /* reader usage flags */
45 #define MSXML_FLAG_JSON 0x1
46 #define MSXML_FLAG_WALK 0x2
47 
48 struct msxml_ictx;
49 
50 struct attrib_entry {
51     const char *key;
52     const char *value;
53 };
54 
55 struct key_entry {
56 /* how */
57 #define MSXML_IGNORE 0x0
58 #define MSXML_IGNORE_ELEM 0x1
59 #define MSXML_SCAN_CB 0x2
60 #define MSXML_SCAN_B64 0x4
61 #define MSXML_COMMENT_CB 0x8
62 /* where */
63 #define MSXML_JSON_ROOT 0x10
64 #define MSXML_JSON_WRKPTR 0x20
65 #define MSXML_JSON_MULTI 0x40
66 
67 #define MSXML_JSON_TRACK (MSXML_JSON_ROOT | MSXML_JSON_WRKPTR)
68 /* what */
69 #define MSXML_JSON_COUNT 0x100
70 #define MSXML_JSON_VALUE 0x200
71 #define MSXML_JSON_ATTRIB 0x400
72 
73     const char *key;
74     const char *name;
75     uint32_t type;
76 };
77 
78 typedef cl_error_t (*msxml_scan_cb)(int fd, const char *filepath, cli_ctx *ctx, int num_attribs, struct attrib_entry *attribs, void *cbdata);
79 typedef cl_error_t (*msxml_comment_cb)(const char *comment, cli_ctx *ctx, void *wrkjobj, void *cbdata);
80 
81 struct msxml_ctx {
82     msxml_scan_cb scan_cb;
83     void *scan_data;
84     msxml_comment_cb comment_cb;
85     void *comment_data;
86     struct msxml_ictx *ictx;
87 };
88 
89 int cli_msxml_parse_document(cli_ctx *ctx, xmlTextReaderPtr reader, const struct key_entry *keys, const size_t num_keys, uint32_t flags, struct msxml_ctx *mxctx);
90 
91 #endif /* HAVE_LIBXML2 */
92 
93 #endif /* __MSXML_PARSER_H */
94