1#!/usr/local/bin/python3.8
2#
3# This Source Code Form is subject to the terms of the Mozilla Public
4# License, v. 2.0. If a copy of the MPL was not distributed with this
5# file, You can obtain one at http://mozilla.org/MPL/2.0/.
6#
7from builtins import range
8import sys, os.path, optparse
9sys.path.insert(0, sys.path[0]+"/msodump.zip")
10import traceback
11
12from msodumper import ole, xlsstream, globals, node, xlsmodel, olestream
13from msodumper import xlsparser, msocrypto
14
15from msodumper.globals import error
16
17def equalsName (name, array):
18    if len(name) != len(array):
19        return False
20
21    for i in range(0, len(name)):
22        if globals.indexbytes(name, i) != array[i]:
23            return False
24
25    return True
26
27def isOleStream (dirname):
28    """Determine whether or not a stream is an OLE stream.
29
30Accodring to the spec, an OLE stream is always named '\1Ole'."""
31
32    name = [0x01, 0x4F, 0x6C, 0x65] # 0x01, 'Ole'
33    return equalsName(dirname, name)
34
35def isCompObjStream (dirname):
36    name = [0x01, 0x43, 0x6F, 0x6D, 0x70, 0x4F, 0x62, 0x6A] # 0x01, 'CompObj'
37    return equalsName(dirname, name)
38
39class XLDumper(object):
40
41    def __init__ (self, filepath, params):
42        self.filepath = filepath
43        self.params = params
44        self.strm = None
45        self.strmData = None
46
47    def __printDirHeader (self, direntry, byteLen):
48        dirname = direntry.Name
49        dirname = globals.encodeName(dirname)
50        globals.outputln("")
51        globals.outputln("="*globals.OutputWidth)
52        if direntry.isStorage():
53            globals.outputln("%s (storage)"%dirname)
54        else:
55            globals.outputln("%s (stream, size: %d bytes)"%(dirname, byteLen))
56        globals.outputln("-"*globals.OutputWidth)
57
58    def __parseFile (self):
59        file = open(self.filepath, 'rb')
60        self.strmData = xlsstream.StreamData()
61        self.strm = xlsstream.XLStream(file.read(), self.params, self.strmData)
62        file.close()
63
64    def dumpXML (self):
65        self.__parseFile()
66        dirs = self.strm.getDirectoryEntries()
67        docroot = node.Root()
68        root = docroot.appendElement('xls-dump')
69
70        for d in dirs:
71            if d.Name != b"Workbook":
72                # for now, we only dump the Workbook directory stream.
73                continue
74
75            dirstrm = self.strm.getDirectoryStream(d)
76            data = self.__readSubStreamXML(dirstrm)
77            self.__dumpDataAsXML(data, root)
78
79        node.prettyPrint(globals.utfwriter(), docroot, utf8 = self.params.utf8)
80
81    def dumpCanonicalXML (self):
82        self.__parseFile()
83        docroot = node.Root()
84        root = docroot.appendElement('xls-dump')
85
86        dirEntries = self.strm.getDirectoryEntries()
87        for entry in dirEntries:
88            dirname = entry.Name
89            if dirname != b"Workbook":
90                # for now, we only dump the Workbook directory stream.
91                continue
92
93            dirstrm = self.strm.getDirectoryStream(entry)
94            wbmodel = self.__buildWorkbookModel(dirstrm)
95            wbmodel.encrypted = self.strmData.encrypted
96            root.appendChild(wbmodel.createDOM())
97
98        node.prettyPrint(globals.utfwriter(), docroot, utf8 = self.params.utf8)
99
100    def dump (self):
101        self.__parseFile()
102        self.strm.printStreamInfo()
103        self.strm.printHeader()
104        self.strm.printMSAT()
105        self.strm.printSAT()
106        self.strm.printSSAT()
107        self.strm.printDirectory()
108        dirEntries = self.strm.getDirectoryEntries()
109        for entry in dirEntries:
110            dirname = entry.Name
111            if len(dirname) == 0:
112                continue
113
114            dirstrm = self.strm.getDirectoryStream(entry)
115            self.__printDirHeader(entry, len(dirstrm.bytes))
116            if entry.isStorage():
117                continue
118
119            elif dirname == b"Workbook":
120                success = True
121                while success:
122                    success = self.__readSubStream(dirstrm)
123
124            elif dirname == b"Revision Log":
125                dirstrm.type = xlsstream.DirType.RevisionLog
126                self.__readSubStream(dirstrm)
127
128            elif dirname == b"EncryptionInfo":
129                globals.dumpBytes(dirstrm.bytes, 512)
130                globals.outputln("-"*globals.OutputWidth)
131                info = msocrypto.EncryptionInfo(dirstrm.bytes)
132                info.read()
133                info.output()
134
135            elif self.strmData.isPivotCacheStream(dirname):
136                dirstrm.type = xlsstream.DirType.PivotTableCache
137                self.__readSubStream(dirstrm)
138            elif isOleStream(dirname):
139                self.__readOleStream(dirstrm)
140            elif isCompObjStream(dirname):
141                self.__readCompObjStream(dirstrm)
142            else:
143                globals.dumpBytes(dirstrm.bytes, 512)
144
145    def __readSubStream (self, strm):
146        try:
147            # read bytes from BOF to EOF.
148            header = 0x0000
149            while header != 0x000A:
150                header = strm.readRecord()
151            return True
152        except xlsstream.EndOfStream:
153            return False
154
155    def __readOleStream (self, dirstrm):
156        strm = olestream.OLEStream(dirstrm.bytes)
157        strm.read()
158
159    def __readCompObjStream (self, dirstrm):
160        try:
161            strm = olestream.CompObjStream(dirstrm.bytes)
162            strm.read()
163        except olestream.CompObjStreamError:
164            globals.error("failed to parse CompObj stream.\n")
165
166    def __dumpDataAsXML(self, data, root):
167        if isinstance(data, tuple):
168            newRoot = root.appendElement(data[0])
169            if isinstance(data[1], dict): # attrs
170                for key,val in data[1].iteritems():
171                    newRoot.setAttr(key, val)
172                if len(data) > 2: # data has a list of children
173                    self.__dumpDataAsXML(data[2], newRoot)
174            else:
175                self.__dumpDataAsXML(data[1], newRoot)
176        elif isinstance(data, list):
177            for x in data:
178                self.__dumpDataAsXML(x, root)
179        else:
180            pass # we're skipping all unknown elems
181
182    def __readSubStreamXML (self, strm):
183        handlers = []
184        try:
185            while True:
186                handler = strm.getNextRecordHandler()
187                handlers.append(handler)
188        except xlsstream.EndOfStream:
189            pass
190        parser = xlsparser.XlsParser(handlers)
191        return parser.dumpData()
192
193    def __buildWorkbookModel (self, strm):
194        model = xlsmodel.Workbook()
195        try:
196            while True:
197                strm.fillModel(model)
198        except xlsstream.EndOfStream:
199            pass
200
201        return model
202
203def main ():
204    parser = optparse.OptionParser()
205    parser.add_option("-d", "--debug", action="store_true", dest="debug", default=False,
206        help="Turn on debug mode")
207    parser.add_option("--show-sector-chain", action="store_true", dest="show_sector_chain", default=False,
208        help="Show sector chain information at the start of the output.")
209    parser.add_option("--show-stream-pos", action="store_true", dest="show_stream_pos", default=False,
210        help="Show the position of each record relative to the stream.")
211    parser.add_option("--dump-mode", dest="dump_mode", default="flat", metavar="MODE",
212        help="Specify the dump mode.  Possible values are: 'flat', 'xml', or 'canonical-xml'.  The default value is 'flat'.")
213    parser.add_option("--catch", action="store_true", dest="catch_exceptions", default=False,
214        help="Catch exceptions and try to continue.")
215    parser.add_option("--utf-8", action="store_true", dest="utf8", default=False,
216        help="Output strings as UTF-8.")
217    options, args = parser.parse_args()
218    params = globals.params
219    params.debug = options.debug
220    params.showSectorChain = options.show_sector_chain
221    params.showStreamPos = options.show_stream_pos
222    params.catchExceptions = options.catch_exceptions
223    params.utf8 = options.utf8
224
225    if len(args) < 1:
226        globals.error("takes at least one argument\n")
227        parser.print_help()
228        sys.exit(1)
229
230    dumper = XLDumper(args[0], params)
231    if options.dump_mode == 'flat':
232        dumper.dump()
233    elif options.dump_mode == 'xml':
234        dumper.dumpXML()
235    elif options.dump_mode == 'canonical-xml' or options.dump_mode == 'cxml':
236        try:
237            dumper.dumpCanonicalXML()
238        except Exception as err:
239            if globals.params.catchExceptions:
240                traceback.print_exc()
241            else:
242                raise
243            globals.error("Dump failed")
244            sys.exit(1)
245    else:
246        error("unknown dump mode: '%s'\n"%options.dump_mode)
247        parser.print_help()
248        sys.exit(1)
249
250if __name__ == '__main__':
251    main()
252
253# vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab:
254