1#!/usr/bin/env python # -*- tab-width: 4; indent-tabs-mode: nil; py-indent-offset: 4 -*- 2# 3# Version: MPL 1.1 / GPLv3+ / LGPLv3+ 4# 5# The contents of this file are subject to the Mozilla Public License Version 6# 1.1 (the "License"); you may not use this file except in compliance with 7# the License or as specified alternatively below. You may obtain a copy of 8# the License at http://www.mozilla.org/MPL/ 9# 10# Software distributed under the License is distributed on an "AS IS" basis, 11# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12# for the specific language governing rights and limitations under the 13# License. 14# 15# Major Contributor(s): 16# Copyright (C) 2012 Red Hat, Inc., Michael Stahl <mstahl@redhat.com> 17# (initial developer) 18# 19# All Rights Reserved. 20# 21# For minor contributions see the git repository. 22# 23# Alternatively, the contents of this file may be used under the terms of 24# either the GNU General Public License Version 3 or later (the "GPLv3+"), or 25# the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"), 26# in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable 27# instead of those above. 28 29# Simple script to load a bunch of documents and export them as Flat ODF 30# 31# Personally I run it like this: 32# ~/lo/master-suse/instdir/program/python ~/lo/master-suse/bin/benchmark-document-loading --soffice=path:/home/tml/lo/master-suse/instdir/program/soffice --outdir=file://$PWD/out --userdir=file:///tmp/test $PWD/docs 33# 34 35import argparse 36import datetime 37import os 38import subprocess 39import sys 40import threading 41import time 42import urllib 43try: 44 from urllib.parse import quote 45except ImportError: 46 from urllib import quote 47import uuid 48 49try: 50 import pyuno 51 import uno 52 import unohelper 53except ImportError: 54 print("pyuno not found: try to set PYTHONPATH and URE_BOOTSTRAP variables") 55 print("PYTHONPATH=/installation/opt/program") 56 print("URE_BOOTSTRAP=file:///installation/opt/program/fundamentalrc") 57 raise 58 59try: 60 from com.sun.star.beans import PropertyValue 61 from com.sun.star.document import XDocumentEventListener 62 from com.sun.star.io import IOException, XOutputStream 63except ImportError: 64 print("UNO API class not found: try to set URE_BOOTSTRAP variable") 65 print("URE_BOOTSTRAP=file:///installation/opt/program/fundamentalrc") 66 raise 67 68validCalcFileExtensions = [ ".xlsx", ".xls", ".ods", ".fods" ] 69validWriterFileExtensions = [ ".docx" , ".rtf", ".odt", ".fodt", ".doc" ] 70validImpressFileExtensions = [ ".ppt", ".pptx", ".odp", ".fodp" ] 71validDrawFileExtensions = [ ".odg", ".fodg" ] 72validReverseFileExtensions = [ ".vsd", ".vdx", ".cdr", ".pub", ".wpd" ] 73validFileExtensions = {"calc": validCalcFileExtensions, 74 "writer": validWriterFileExtensions, 75 "impress": validImpressFileExtensions, 76 "draw": validDrawFileExtensions, 77 "reverse": validReverseFileExtensions} 78flatODFTypes = {"calc": (".fods", "OpenDocument Spreadsheet Flat XML"), 79 "writer": (".fodt", "OpenDocument Text Flat XML"), 80 "impress": (".fodp", "OpenDocument Presentation Flat XML"), 81 "draw": (".fodg", "OpenDocument Drawing Flat XML")} 82 83outdir = "" 84 85def partition(list, pred): 86 left = [] 87 right = [] 88 for e in list: 89 if pred(e): 90 left.append(e) 91 else: 92 right.append(e) 93 return (left, right) 94 95def filelist(directory, suffix): 96 if not directory: 97 raise Exception("filelist: empty directory") 98 if directory[-1] != "/": 99 directory += "/" 100 files = [directory + f for f in os.listdir(directory)] 101# print(files) 102 return [f for f in files 103 if os.path.isfile(f) and os.path.splitext(f)[1] == suffix] 104 105def getFiles(dirs, suffix): 106# print( dirs ) 107 files = [] 108 for d in dirs: 109 files += filelist(d, suffix) 110 return files 111 112### UNO utilities ### 113 114class OutputStream( unohelper.Base, XOutputStream ): 115 def __init__( self ): 116 self.closed = 0 117 118 def closeOutput(self): 119 self.closed = 1 120 121 def writeBytes( self, seq ): 122 sys.stdout.write( seq.value ) 123 124 def flush( self ): 125 pass 126 127class OfficeConnection: 128 def __init__(self, args): 129 self.args = args 130 self.soffice = None 131 self.socket = None 132 self.xContext = None 133 self.pro = None 134 def setUp(self): 135 (method, sep, rest) = self.args.soffice.partition(":") 136 if sep != ":": 137 raise Exception("soffice parameter does not specify method") 138 if method == "path": 139 socket = "pipe,name=pytest" + str(uuid.uuid1()) 140 userdir = self.args.userdir 141 if not userdir: 142 raise Exception("'path' method requires --userdir") 143 if not userdir.startswith("file://"): 144 raise Exception("--userdir must be file URL") 145 self.soffice = self.bootstrap(rest, userdir, socket) 146 elif method == "connect": 147 socket = rest 148 else: 149 raise Exception("unsupported connection method: " + method) 150 self.xContext = self.connect(socket) 151 152 def bootstrap(self, soffice, userdir, socket): 153 argv = [ soffice, "--accept=" + socket + ";urp", 154 "-env:UserInstallation=" + userdir, 155 "--quickstart=no", 156 "--norestore", "--nologo", "--headless" ] 157 if self.args.valgrind: 158 argv.append("--valgrind") 159 os.putenv("SAL_LOG", "-INFO-WARN") 160 os.putenv("LIBO_ONEWAY_STABLE_ODF_EXPORT", "YES") 161 self.pro = subprocess.Popen(argv) 162# print(self.pro.pid) 163 164 def connect(self, socket): 165 xLocalContext = uno.getComponentContext() 166 xUnoResolver = xLocalContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", xLocalContext) 167 url = "uno:" + socket + ";urp;StarOffice.ComponentContext" 168# print("OfficeConnection: connecting to: " + url) 169 while True: 170 try: 171 xContext = xUnoResolver.resolve(url) 172 return xContext 173# except com.sun.star.connection.NoConnectException 174 except pyuno.getClass("com.sun.star.connection.NoConnectException"): 175# print("NoConnectException: sleeping...") 176 time.sleep(1) 177 178 def tearDown(self): 179 if self.soffice: 180 if self.xContext: 181 try: 182# print("tearDown: calling terminate()...") 183 xMgr = self.xContext.ServiceManager 184 xDesktop = xMgr.createInstanceWithContext("com.sun.star.frame.Desktop", self.xContext) 185 xDesktop.terminate() 186# print("...done") 187# except com.sun.star.lang.DisposedException: 188 except pyuno.getClass("com.sun.star.beans.UnknownPropertyException"): 189# print("caught UnknownPropertyException while TearDown") 190 pass # ignore, also means disposed 191 except pyuno.getClass("com.sun.star.lang.DisposedException"): 192# print("caught DisposedException while TearDown") 193 pass # ignore 194 else: 195 self.soffice.terminate() 196 ret = self.soffice.wait() 197 self.xContext = None 198 self.socket = None 199 self.soffice = None 200 if ret != 0: 201 raise Exception("Exit status indicates failure: " + str(ret)) 202# return ret 203 def kill(self): 204 command = "kill " + str(self.pro.pid) 205 with open("killFile.log", "a") as killFile: 206 killFile.write(command + "\n") 207# print("kill") 208# print(command) 209 os.system(command) 210 211class PersistentConnection: 212 def __init__(self, args): 213 self.args = args 214 self.connection = None 215 def getContext(self): 216 return self.connection.xContext 217 def setUp(self): 218 assert(not self.connection) 219 conn = OfficeConnection(self.args) 220 conn.setUp() 221 self.connection = conn 222 def preTest(self): 223 assert(self.connection) 224 def postTest(self): 225 assert(self.connection) 226 def tearDown(self): 227 if self.connection: 228 try: 229 self.connection.tearDown() 230 finally: 231 self.connection = None 232 def kill(self): 233 if self.connection: 234 self.connection.kill() 235 236def simpleInvoke(connection, test): 237 try: 238 connection.preTest() 239 test.run(connection.getContext(), connection) 240 finally: 241 connection.postTest() 242 243def runConnectionTests(connection, invoker, tests): 244 try: 245 connection.setUp() 246 for test in tests: 247 invoker(connection, test) 248 finally: 249 pass 250 #connection.tearDown() 251 252class EventListener(XDocumentEventListener,unohelper.Base): 253 def __init__(self): 254 self.layoutFinished = False 255 def documentEventOccured(self, event): 256# print(str(event.EventName)) 257 if event.EventName == "OnLayoutFinished": 258 self.layoutFinished = True 259 def disposing(event): 260 pass 261 262def mkPropertyValue(name, value): 263 return uno.createUnoStruct("com.sun.star.beans.PropertyValue", 264 name, 0, value, 0) 265 266### tests ### 267 268def logTimeSpent(url, startTime): 269 print(os.path.basename(urllib.parse.urlparse(url).path) + "\t" + str(time.time()-startTime)) 270 271def loadFromURL(xContext, url, t, component): 272 xDesktop = xContext.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", xContext) 273 props = [("Hidden", True), ("ReadOnly", True)] # FilterName? 274 loadProps = tuple([mkPropertyValue(name, value) for (name, value) in props]) 275 xListener = None 276 if component == "writer": 277 xListener = EventListener() 278 xGEB = xContext.getValueByName( 279 "/singletons/com.sun.star.frame.theGlobalEventBroadcaster") 280 xGEB.addDocumentEventListener(xListener) 281 try: 282 xDoc = None 283 startTime = time.time() 284 xDoc = xDesktop.loadComponentFromURL(url, "_blank", 0, loadProps) 285 if component == "calc": 286 try: 287 if xDoc: 288 xDoc.calculateAll() 289 except AttributeError: 290 pass 291 t.cancel() 292 logTimeSpent(url, startTime) 293 return xDoc 294 elif component == "writer": 295 time_ = 0 296 t.cancel() 297 while time_ < 30: 298 if xListener.layoutFinished: 299 logTimeSpent(url, startTime) 300 return xDoc 301# print("delaying...") 302 time_ += 1 303 time.sleep(1) 304 else: 305 t.cancel() 306 logTimeSpent(url, startTime) 307 return xDoc 308 with open("file.log", "a") as fh: 309 fh.write("layout did not finish\n") 310 return xDoc 311 except pyuno.getClass("com.sun.star.beans.UnknownPropertyException"): 312 xListener = None 313 raise # means crashed, handle it later 314 except pyuno.getClass("com.sun.star.lang.DisposedException"): 315 xListener = None 316 raise # means crashed, handle it later 317 except pyuno.getClass("com.sun.star.lang.IllegalArgumentException"): 318 pass # means could not open the file, ignore it 319 except: 320 if xDoc: 321# print("CLOSING") 322 xDoc.close(True) 323 raise 324 finally: 325 if xListener: 326 xGEB.removeDocumentEventListener(xListener) 327 328def exportToODF(xContext, xDoc, baseName, t, component): 329 exportFileName = outdir + "/" + os.path.splitext(baseName)[0] + flatODFTypes[component][0] 330 print("exportToODF " + baseName + " => " + exportFileName) 331 props = [("FilterName", flatODFTypes[component][1]), 332 ("Overwrite", True)] 333 storeProps = tuple([mkPropertyValue(name, value) for (name, value) in props]) 334 xDoc.storeToURL(exportFileName, tuple(storeProps)) 335 336def handleCrash(file, disposed): 337# print("File: " + file + " crashed") 338 with open("crashlog.txt", "a") as crashLog: 339 crashLog.write('Crash:' + file + ' ') 340 if disposed == 1: 341 crashLog.write('through disposed\n') 342# crashed_files.append(file) 343# add here the remaining handling code for crashed files 344 345def alarm_handler(args): 346 args.kill() 347 348class HandleFileTest: 349 def __init__(self, file, state, component): 350 self.file = file 351 self.state = state 352 self.component = component 353 def run(self, xContext, connection): 354# print("Loading document: " + self.file) 355 t = None 356 args = None 357 try: 358 url = "file://" + quote(self.file) 359 with open("file.log", "a") as fh: 360 fh.write(url + "\n") 361 xDoc = None 362 args = [connection] 363 t = threading.Timer(60, alarm_handler, args) 364 t.start() 365 xDoc = loadFromURL(xContext, url, t, self.component) 366 self.state.goodFiles.append(self.file) 367 exportToODF(xContext, xDoc, os.path.basename(urllib.parse.urlparse(url).path), t, self.component) 368 except pyuno.getClass("com.sun.star.beans.UnknownPropertyException"): 369# print("caught UnknownPropertyException " + self.file) 370 if not t.is_alive(): 371# print("TIMEOUT!") 372 self.state.timeoutFiles.append(self.file) 373 else: 374 t.cancel() 375 handleCrash(self.file, 0) 376 self.state.badPropertyFiles.append(self.file) 377 connection.tearDown() 378 connection.setUp() 379 except pyuno.getClass("com.sun.star.lang.DisposedException"): 380# print("caught DisposedException " + self.file) 381 if not t.is_alive(): 382# print("TIMEOUT!") 383 self.state.timeoutFiles.append(self.file) 384 else: 385 t.cancel() 386 handleCrash(self.file, 1) 387 self.state.badDisposedFiles.append(self.file) 388 connection.tearDown() 389 connection.setUp() 390 finally: 391 if t.is_alive(): 392 t.cancel() 393 try: 394 if xDoc: 395 t = threading.Timer(10, alarm_handler, args) 396 t.start() 397 xDoc.close(True) 398 t.cancel() 399 except pyuno.getClass("com.sun.star.beans.UnknownPropertyException"): 400 print("caught UnknownPropertyException while closing") 401 self.state.badPropertyFiles.append(self.file) 402 connection.tearDown() 403 connection.setUp() 404 except pyuno.getClass("com.sun.star.lang.DisposedException"): 405 print("caught DisposedException while closing") 406 if t.is_alive(): 407 t.cancel() 408 else: 409 self.state.badDisposedFiles.append(self.file) 410 connection.tearDown() 411 connection.setUp() 412# print("...done with: " + self.file) 413 414class State: 415 def __init__(self): 416 self.goodFiles = [] 417 self.badDisposedFiles = [] 418 self.badPropertyFiles = [] 419 self.timeoutFiles = [] 420 421 422def write_state_report(files_list, start_time, report_filename, description): 423 with open(report_filename, "w") as fh: 424 fh.write("%s:\n" % description) 425 fh.write("Starttime: %s\n" % start_time.isoformat()) 426 for f in files_list: 427 fh.write("%s\n" % f) 428 429 430def writeReport(state, startTime): 431 write_state_report(state.goodFiles, startTime, "goodFiles.log", 432 "Files which loaded perfectly") 433 write_state_report(state.badDisposedFiles, startTime, "badDisposedFiles.log", 434 "Files which crashed with DisposedException") 435 write_state_report(state.badPropertyFiles, startTime, "badPropertyFiles.log", 436 "Files which crashed with UnknownPropertyException") 437 write_state_report(state.timeoutFiles, startTime, "timeoutFiles.log", 438 "Files which timed out") 439 440def runHandleFileTests(opts): 441 startTime = datetime.datetime.now() 442 connection = PersistentConnection(opts) 443 global outdir 444 outdir = os.path.join(opts.outdir, startTime.strftime('%Y%m%d.%H%M%S')) 445 try: 446 tests = [] 447 state = State() 448# print("before map") 449 for component, validExtension in validFileExtensions.items(): 450 files = [] 451 for suffix in validExtension: 452 files.extend(getFiles(opts.dirs, suffix)) 453 files.sort() 454 tests.extend( (HandleFileTest(file, state, component) for file in files) ) 455 runConnectionTests(connection, simpleInvoke, tests) 456 finally: 457 connection.kill() 458 writeReport(state, startTime) 459 460def parseArgs(argv): 461 epilog = "'location' is a pathname, not a URL. 'outdir' and 'userdir' are URLs.\n" \ 462 "The 'directory' parameters should be full absolute pathnames, not URLs." 463 464 parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter, 465 epilog=epilog) 466 parser.add_argument('--soffice', metavar='method:location', required=True, 467 help="specify soffice instance to connect to\n" 468 "supported methods: 'path', 'connect'") 469 parser.add_argument('--outdir', metavar='URL', required=True, 470 help="specify the output directory for flat ODF exports") 471 parser.add_argument('--userdir', metavar='URL', 472 help="specify user installation directory for 'path' method") 473 parser.add_argument('--valgrind', action='store_true', 474 help="pass --valgrind to soffice for 'path' method") 475 parser.add_argument('dirs', metavar='directory', nargs='+') 476 477 args = parser.parse_args(argv[1:]) 478 479 return args 480 481 482if __name__ == "__main__": 483 opts = parseArgs(sys.argv) 484 runHandleFileTests(opts) 485 486# vim:set shiftwidth=4 softtabstop=4 expandtab: 487