1#!/usr/bin/env python # -*- tab-width: 4; indent-tabs-mode: nil; py-indent-offset: 4 -*-
2#
3# Version: MPL 1.1 / GPLv3+ / LGPLv3+
4#
5# The contents of this file are subject to the Mozilla Public License Version
6# 1.1 (the "License"); you may not use this file except in compliance with
7# the License or as specified alternatively below. You may obtain a copy of
8# the License at http://www.mozilla.org/MPL/
9#
10# Software distributed under the License is distributed on an "AS IS" basis,
11# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12# for the specific language governing rights and limitations under the
13# License.
14#
15# Major Contributor(s):
16# Copyright (C) 2012 Red Hat, Inc., Michael Stahl <mstahl@redhat.com>
17#  (initial developer)
18#
19# All Rights Reserved.
20#
21# For minor contributions see the git repository.
22#
23# Alternatively, the contents of this file may be used under the terms of
24# either the GNU General Public License Version 3 or later (the "GPLv3+"), or
25# the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"),
26# in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable
27# instead of those above.
28
29# Simple script to load a bunch of documents and export them as Flat ODF
30#
31# Personally I run it like this:
32# ~/lo/master-suse/instdir/program/python ~/lo/master-suse/bin/benchmark-document-loading  --soffice=path:/home/tml/lo/master-suse/instdir/program/soffice --outdir=file://$PWD/out --userdir=file:///tmp/test $PWD/docs
33#
34
35import argparse
36import datetime
37import os
38import subprocess
39import sys
40import threading
41import time
42import urllib
43try:
44    from urllib.parse import quote
45except ImportError:
46    from urllib import quote
47import uuid
48
49try:
50    import pyuno
51    import uno
52    import unohelper
53except ImportError:
54    print("pyuno not found: try to set PYTHONPATH and URE_BOOTSTRAP variables")
55    print("PYTHONPATH=/installation/opt/program")
56    print("URE_BOOTSTRAP=file:///installation/opt/program/fundamentalrc")
57    raise
58
59try:
60    from com.sun.star.beans import PropertyValue
61    from com.sun.star.document import XDocumentEventListener
62    from com.sun.star.io import IOException, XOutputStream
63except ImportError:
64    print("UNO API class not found: try to set URE_BOOTSTRAP variable")
65    print("URE_BOOTSTRAP=file:///installation/opt/program/fundamentalrc")
66    raise
67
68validCalcFileExtensions = [ ".xlsx", ".xls", ".ods", ".fods" ]
69validWriterFileExtensions = [ ".docx" , ".rtf", ".odt", ".fodt", ".doc" ]
70validImpressFileExtensions = [ ".ppt", ".pptx", ".odp", ".fodp" ]
71validDrawFileExtensions = [ ".odg", ".fodg" ]
72validReverseFileExtensions = [ ".vsd", ".vdx", ".cdr", ".pub", ".wpd" ]
73validFileExtensions = {"calc": validCalcFileExtensions,
74                       "writer": validWriterFileExtensions,
75                       "impress": validImpressFileExtensions,
76                       "draw": validDrawFileExtensions,
77                       "reverse": validReverseFileExtensions}
78flatODFTypes = {"calc": (".fods", "OpenDocument Spreadsheet Flat XML"),
79                "writer": (".fodt", "OpenDocument Text Flat XML"),
80                "impress": (".fodp", "OpenDocument Presentation Flat XML"),
81                "draw": (".fodg", "OpenDocument Drawing Flat XML")}
82
83outdir = ""
84
85def partition(list, pred):
86    left = []
87    right = []
88    for e in list:
89        if pred(e):
90            left.append(e)
91        else:
92            right.append(e)
93    return (left, right)
94
95def filelist(directory, suffix):
96    if not directory:
97        raise Exception("filelist: empty directory")
98    if directory[-1] != "/":
99        directory += "/"
100    files = [directory + f for f in os.listdir(directory)]
101#    print(files)
102    return [f for f in files
103                    if os.path.isfile(f) and os.path.splitext(f)[1] == suffix]
104
105def getFiles(dirs, suffix):
106#    print( dirs )
107    files = []
108    for d in dirs:
109        files += filelist(d, suffix)
110    return files
111
112### UNO utilities ###
113
114class OutputStream( unohelper.Base, XOutputStream ):
115    def __init__( self ):
116        self.closed = 0
117
118    def closeOutput(self):
119        self.closed = 1
120
121    def writeBytes( self, seq ):
122        sys.stdout.write( seq.value )
123
124    def flush( self ):
125        pass
126
127class OfficeConnection:
128    def __init__(self, args):
129        self.args = args
130        self.soffice = None
131        self.socket = None
132        self.xContext = None
133        self.pro = None
134    def setUp(self):
135        (method, sep, rest) = self.args.soffice.partition(":")
136        if sep != ":":
137            raise Exception("soffice parameter does not specify method")
138        if method == "path":
139            socket = "pipe,name=pytest" + str(uuid.uuid1())
140            userdir = self.args.userdir
141            if not userdir:
142                raise Exception("'path' method requires --userdir")
143            if not userdir.startswith("file://"):
144                raise Exception("--userdir must be file URL")
145            self.soffice = self.bootstrap(rest, userdir, socket)
146        elif method == "connect":
147            socket = rest
148        else:
149            raise Exception("unsupported connection method: " + method)
150        self.xContext = self.connect(socket)
151
152    def bootstrap(self, soffice, userdir, socket):
153        argv = [ soffice, "--accept=" + socket + ";urp",
154                "-env:UserInstallation=" + userdir,
155                "--quickstart=no",
156                "--norestore", "--nologo", "--headless" ]
157        if self.args.valgrind:
158            argv.append("--valgrind")
159        os.putenv("SAL_LOG", "-INFO-WARN")
160        os.putenv("LIBO_ONEWAY_STABLE_ODF_EXPORT", "YES")
161        self.pro = subprocess.Popen(argv)
162#        print(self.pro.pid)
163
164    def connect(self, socket):
165        xLocalContext = uno.getComponentContext()
166        xUnoResolver = xLocalContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", xLocalContext)
167        url = "uno:" + socket + ";urp;StarOffice.ComponentContext"
168#        print("OfficeConnection: connecting to: " + url)
169        while True:
170            try:
171                xContext = xUnoResolver.resolve(url)
172                return xContext
173#            except com.sun.star.connection.NoConnectException
174            except pyuno.getClass("com.sun.star.connection.NoConnectException"):
175#                print("NoConnectException: sleeping...")
176                time.sleep(1)
177
178    def tearDown(self):
179        if self.soffice:
180            if self.xContext:
181                try:
182#                    print("tearDown: calling terminate()...")
183                    xMgr = self.xContext.ServiceManager
184                    xDesktop = xMgr.createInstanceWithContext("com.sun.star.frame.Desktop", self.xContext)
185                    xDesktop.terminate()
186#                    print("...done")
187#                except com.sun.star.lang.DisposedException:
188                except pyuno.getClass("com.sun.star.beans.UnknownPropertyException"):
189#                    print("caught UnknownPropertyException while TearDown")
190                    pass # ignore, also means disposed
191                except pyuno.getClass("com.sun.star.lang.DisposedException"):
192#                    print("caught DisposedException while TearDown")
193                    pass # ignore
194            else:
195                self.soffice.terminate()
196            ret = self.soffice.wait()
197            self.xContext = None
198            self.socket = None
199            self.soffice = None
200            if ret != 0:
201                raise Exception("Exit status indicates failure: " + str(ret))
202#            return ret
203    def kill(self):
204        command = "kill " + str(self.pro.pid)
205        with open("killFile.log", "a") as killFile:
206            killFile.write(command + "\n")
207#        print("kill")
208#        print(command)
209        os.system(command)
210
211class PersistentConnection:
212    def __init__(self, args):
213        self.args = args
214        self.connection = None
215    def getContext(self):
216        return self.connection.xContext
217    def setUp(self):
218        assert(not self.connection)
219        conn = OfficeConnection(self.args)
220        conn.setUp()
221        self.connection = conn
222    def preTest(self):
223        assert(self.connection)
224    def postTest(self):
225        assert(self.connection)
226    def tearDown(self):
227        if self.connection:
228            try:
229                self.connection.tearDown()
230            finally:
231                self.connection = None
232    def kill(self):
233        if self.connection:
234            self.connection.kill()
235
236def simpleInvoke(connection, test):
237    try:
238        connection.preTest()
239        test.run(connection.getContext(), connection)
240    finally:
241        connection.postTest()
242
243def runConnectionTests(connection, invoker, tests):
244    try:
245        connection.setUp()
246        for test in tests:
247            invoker(connection, test)
248    finally:
249        pass
250        #connection.tearDown()
251
252class EventListener(XDocumentEventListener,unohelper.Base):
253    def __init__(self):
254        self.layoutFinished = False
255    def documentEventOccured(self, event):
256#        print(str(event.EventName))
257        if event.EventName == "OnLayoutFinished":
258            self.layoutFinished = True
259    def disposing(event):
260        pass
261
262def mkPropertyValue(name, value):
263    return uno.createUnoStruct("com.sun.star.beans.PropertyValue",
264            name, 0, value, 0)
265
266### tests ###
267
268def logTimeSpent(url, startTime):
269    print(os.path.basename(urllib.parse.urlparse(url).path) + "\t" + str(time.time()-startTime))
270
271def loadFromURL(xContext, url, t, component):
272    xDesktop = xContext.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", xContext)
273    props = [("Hidden", True), ("ReadOnly", True)] # FilterName?
274    loadProps = tuple([mkPropertyValue(name, value) for (name, value) in props])
275    xListener = None
276    if component == "writer":
277        xListener = EventListener()
278        xGEB = xContext.getValueByName(
279            "/singletons/com.sun.star.frame.theGlobalEventBroadcaster")
280        xGEB.addDocumentEventListener(xListener)
281    try:
282        xDoc = None
283        startTime = time.time()
284        xDoc = xDesktop.loadComponentFromURL(url, "_blank", 0, loadProps)
285        if component == "calc":
286            try:
287                if xDoc:
288                    xDoc.calculateAll()
289            except AttributeError:
290                pass
291            t.cancel()
292            logTimeSpent(url, startTime)
293            return xDoc
294        elif component == "writer":
295            time_ = 0
296            t.cancel()
297            while time_ < 30:
298                if xListener.layoutFinished:
299                    logTimeSpent(url, startTime)
300                    return xDoc
301#                print("delaying...")
302                time_ += 1
303                time.sleep(1)
304        else:
305            t.cancel()
306            logTimeSpent(url, startTime)
307            return xDoc
308        with open("file.log", "a") as fh:
309            fh.write("layout did not finish\n")
310        return xDoc
311    except pyuno.getClass("com.sun.star.beans.UnknownPropertyException"):
312        xListener = None
313        raise # means crashed, handle it later
314    except pyuno.getClass("com.sun.star.lang.DisposedException"):
315        xListener = None
316        raise # means crashed, handle it later
317    except pyuno.getClass("com.sun.star.lang.IllegalArgumentException"):
318        pass # means could not open the file, ignore it
319    except:
320        if xDoc:
321#            print("CLOSING")
322            xDoc.close(True)
323        raise
324    finally:
325        if xListener:
326            xGEB.removeDocumentEventListener(xListener)
327
328def exportToODF(xContext, xDoc, baseName, t, component):
329    exportFileName = outdir + "/" + os.path.splitext(baseName)[0] + flatODFTypes[component][0]
330    print("exportToODF " + baseName + " => " + exportFileName)
331    props = [("FilterName", flatODFTypes[component][1]),
332             ("Overwrite", True)]
333    storeProps = tuple([mkPropertyValue(name, value) for (name, value) in props])
334    xDoc.storeToURL(exportFileName, tuple(storeProps))
335
336def handleCrash(file, disposed):
337#    print("File: " + file + " crashed")
338    with open("crashlog.txt", "a") as crashLog:
339        crashLog.write('Crash:' + file + ' ')
340        if disposed == 1:
341            crashLog.write('through disposed\n')
342#    crashed_files.append(file)
343# add here the remaining handling code for crashed files
344
345def alarm_handler(args):
346    args.kill()
347
348class HandleFileTest:
349    def __init__(self, file, state, component):
350        self.file = file
351        self.state = state
352        self.component = component
353    def run(self, xContext, connection):
354#        print("Loading document: " + self.file)
355        t = None
356        args = None
357        try:
358            url = "file://" + quote(self.file)
359            with open("file.log", "a") as fh:
360                fh.write(url + "\n")
361            xDoc = None
362            args = [connection]
363            t = threading.Timer(60, alarm_handler, args)
364            t.start()
365            xDoc = loadFromURL(xContext, url, t, self.component)
366            self.state.goodFiles.append(self.file)
367            exportToODF(xContext, xDoc, os.path.basename(urllib.parse.urlparse(url).path), t, self.component)
368        except pyuno.getClass("com.sun.star.beans.UnknownPropertyException"):
369#            print("caught UnknownPropertyException " + self.file)
370            if not t.is_alive():
371#                print("TIMEOUT!")
372                self.state.timeoutFiles.append(self.file)
373            else:
374                t.cancel()
375                handleCrash(self.file, 0)
376                self.state.badPropertyFiles.append(self.file)
377            connection.tearDown()
378            connection.setUp()
379        except pyuno.getClass("com.sun.star.lang.DisposedException"):
380#            print("caught DisposedException " + self.file)
381            if not t.is_alive():
382#                print("TIMEOUT!")
383                self.state.timeoutFiles.append(self.file)
384            else:
385                t.cancel()
386                handleCrash(self.file, 1)
387                self.state.badDisposedFiles.append(self.file)
388            connection.tearDown()
389            connection.setUp()
390        finally:
391            if t.is_alive():
392                t.cancel()
393            try:
394                if xDoc:
395                    t = threading.Timer(10, alarm_handler, args)
396                    t.start()
397                    xDoc.close(True)
398                    t.cancel()
399            except pyuno.getClass("com.sun.star.beans.UnknownPropertyException"):
400                print("caught UnknownPropertyException while closing")
401                self.state.badPropertyFiles.append(self.file)
402                connection.tearDown()
403                connection.setUp()
404            except pyuno.getClass("com.sun.star.lang.DisposedException"):
405                print("caught DisposedException while closing")
406                if t.is_alive():
407                    t.cancel()
408                else:
409                    self.state.badDisposedFiles.append(self.file)
410                connection.tearDown()
411                connection.setUp()
412#            print("...done with: " + self.file)
413
414class State:
415    def __init__(self):
416        self.goodFiles = []
417        self.badDisposedFiles = []
418        self.badPropertyFiles = []
419        self.timeoutFiles = []
420
421
422def write_state_report(files_list, start_time, report_filename, description):
423    with open(report_filename, "w") as fh:
424        fh.write("%s:\n" % description)
425        fh.write("Starttime: %s\n" % start_time.isoformat())
426        for f in files_list:
427            fh.write("%s\n" % f)
428
429
430def writeReport(state, startTime):
431    write_state_report(state.goodFiles, startTime, "goodFiles.log",
432                       "Files which loaded perfectly")
433    write_state_report(state.badDisposedFiles, startTime, "badDisposedFiles.log",
434                       "Files which crashed with DisposedException")
435    write_state_report(state.badPropertyFiles, startTime, "badPropertyFiles.log",
436                       "Files which crashed with UnknownPropertyException")
437    write_state_report(state.timeoutFiles, startTime, "timeoutFiles.log",
438                       "Files which timed out")
439
440def runHandleFileTests(opts):
441    startTime = datetime.datetime.now()
442    connection = PersistentConnection(opts)
443    global outdir
444    outdir = os.path.join(opts.outdir, startTime.strftime('%Y%m%d.%H%M%S'))
445    try:
446        tests = []
447        state = State()
448#        print("before map")
449        for component, validExtension in validFileExtensions.items():
450            files = []
451            for suffix in validExtension:
452                files.extend(getFiles(opts.dirs, suffix))
453            files.sort()
454            tests.extend( (HandleFileTest(file, state, component) for file in files) )
455        runConnectionTests(connection, simpleInvoke, tests)
456    finally:
457        connection.kill()
458        writeReport(state, startTime)
459
460def parseArgs(argv):
461    epilog = "'location' is a pathname, not a URL. 'outdir' and 'userdir' are URLs.\n" \
462             "The 'directory' parameters should be full absolute pathnames, not URLs."
463
464    parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter,
465                                     epilog=epilog)
466    parser.add_argument('--soffice', metavar='method:location', required=True,
467                        help="specify soffice instance to connect to\n"
468                             "supported methods: 'path', 'connect'")
469    parser.add_argument('--outdir', metavar='URL', required=True,
470                        help="specify the output directory for flat ODF exports")
471    parser.add_argument('--userdir', metavar='URL',
472                        help="specify user installation directory for 'path' method")
473    parser.add_argument('--valgrind', action='store_true',
474                        help="pass --valgrind to soffice for 'path' method")
475    parser.add_argument('dirs', metavar='directory', nargs='+')
476
477    args = parser.parse_args(argv[1:])
478
479    return args
480
481
482if __name__ == "__main__":
483    opts = parseArgs(sys.argv)
484    runHandleFileTests(opts)
485
486# vim:set shiftwidth=4 softtabstop=4 expandtab:
487