1#!/usr/bin/env python 2 3import sys, os, tempfile, subprocess, lxml.etree, zipfile, urllib, hashlib 4 5def getODFVersion(zip): 6 content = lxml.etree.parse(zip.open("content.xml", "r")) 7 return content.getroot().get( 8 "{urn:oasis:names:tc:opendocument:xmlns:office:1.0}version") 9 10def getJing(): 11 jingjar = "jing-20091111/bin/jing.jar" 12 path = os.path.join(sys.path[0], jingjar) 13 if os.path.isfile(path): 14 return path 15 print "Downloading jing.jar" 16 z = "jing-20091111.zip" 17 urllib.urlretrieve("http://jing-trang.googlecode.com/files/" + z, z); 18 zip = zipfile.ZipFile(z, "r"); 19 zip.extract(jingjar, sys.path[0]) 20 os.unlink(z) 21 f = open(path, "rb") 22 h = hashlib.sha1() 23 h.update(f.read()) 24 f.close() 25 hash = h.hexdigest() 26 if hash != "daa0cf7b1679264f8e68171f7f253255794773f7": 27 print "Wrong hash code: wrong file." 28 os.unlink(path) 29 return 30 return path 31 32schemas = { 33 "1.0": ["OpenDocument-schema-v1.0-os.rng", 34 "OpenDocument-manifest-schema-v1.0-os.rng"], 35 "1.1": ["OpenDocument-schema-v1.1.rng", 36 "OpenDocument-manifest-schema-v1.1.rng"], 37 "1.2": ["OpenDocument-v1.2-cs01-schema-calligra.rng", 38 "OpenDocument-v1.2-cs01-manifest-schema.rng"] 39} 40 41def getScriptPath(): 42 return os.path.dirname(os.path.realpath(sys.argv[0])) 43 44class jingodfvalidator: 45 def __init__(self, jingjar): 46 self.jingjar = jingjar; 47 self.xmlparser = lxml.etree.XMLParser() 48 xsltpath = os.path.join(getScriptPath(), "removeForeign.xsl") 49 self.removeForeignXSLT = self.loadXSLT(xsltpath) 50 51 def validate(self, odfpath): 52 try: 53 zip = zipfile.ZipFile(odfpath, 'r') 54 except: 55 self.validateFlatXML(odfpath) 56 return 57 odfversion = getODFVersion(zip) 58 if not odfversion in schemas: 59 return "Document has no version number" 60 err = self.validateFile(zip, 'content.xml', 61 schemas[odfversion][0]) 62 if (err): 63 return err 64 err = self.validateFile(zip, 'styles.xml', 65 schemas[odfversion][0]) 66 if (err): 67 return err 68 err = self.validateFile(zip, 'META-INF/manifest.xml', 69 schemas[odfversion][1]) 70 if (err): 71 return err 72 err = self.validateFile(zip, 'meta.xml', 73 schemas[odfversion][0]) 74 if (err): 75 return err 76 err = self.validateFile(zip, 'settings.xml', 77 schemas[odfversion][0]) 78 if (err): 79 return err 80 return None 81 82 def validateFlatXML(self, filepath): 83 schema = schemas["1.2"][0] 84 schema = os.path.join(sys.path[0], schema) 85 r = self.validateXML(schema, filepath) 86 if r: 87 return filepath + " is not valid." 88 89 def validateFile(self, zip, filepath, schema): 90 schema = os.path.join(sys.path[0], schema) 91 suffix = "_" + filepath.replace("/", "_") 92 tmp = tempfile.NamedTemporaryFile(suffix = suffix) 93 tmp.write(zip.open(filepath, "r").read()) 94 tmp.flush() 95 r = self.validateXML(schema, tmp.name) 96 tmp.close() 97 if r: 98 return filepath + " is not valid." 99 100 def loadXML(self, filepath): 101 return lxml.etree.parse(open(filepath, 'r'), self.xmlparser) 102 103 def loadXSLT(self, filepath): 104 xsl = self.loadXML(filepath) 105 ac = lxml.etree.XSLTAccessControl(read_network=False, write_file=False) 106 return lxml.etree.XSLT(xsl, access_control=ac) 107 108 def removeForeign(self, filepath): 109 xml = self.loadXML(filepath) 110 xml = self.removeForeignXSLT(xml) 111 xml.write(filepath) 112 113 # Validate the XML and optionally remove the foreign elements and attributes 114 # first. Calligra currently write ODF 1.2 Extended which is allowed to 115 # contain foreign elements and attributes. If Calligra adds a mode to save 116 # ODF 1.2, the validator should not remove them when validation. 117 def validateXML(self, schema, xmlpath, removeForeign = True): 118 if removeForeign: 119 self.removeForeign(xmlpath) 120 121 args = ["java", "-jar", self.jingjar, "-i", schema, xmlpath] 122 return subprocess.call(args) 123 124def createValidator(name): 125 xml = lxml.etree.parse(open(os.path.join(sys.path[0], name), "rb")) 126 return lxml.etree.RelaxNG(xml) 127 128class odfvalidator: 129 def __init__(self): 130 path = sys.path[0] 131 self.validators = {} 132 for key in schemas.keys(): 133 self.validators[key] = [ 134 createValidator(schemas[key][0]), 135 createValidator(schemas[key][1]) 136 ] 137 # returns error string on error, None otherwise 138 def validate(self, odfpath): 139 zip = zipfile.ZipFile(odfpath, 'r') 140 odfversion = getODFVersion(zip) 141 if not odfversion in schemas: 142 return "Document has no version number" 143 err = self.validateFile(zip, 'content.xml', 144 self.validators[odfversion][0]) 145 if (err): 146 return err 147 err = self.validateFile(zip, 'styles.xml', 148 self.validators[odfversion][0]) 149 if (err): 150 return err 151 err = self.validateFile(zip, 'META-INF/manifest.xml', 152 self.validators[odfversion][1]) 153 if (err): 154 return err 155 return None 156 157 def validateFile(self, zip, file, validator): 158 try: 159 xml = lxml.etree.XML(zip.read(file)); 160 except lxml.etree.XMLSyntaxError as e: 161 return file + ':' + str(e) 162 except KeyError as e: 163 return e 164 if not validator.validate(xml): 165 return file + ':' + str(validator.error_log.last_error) 166 167if __name__ == '__main__': 168 jingjar = getJing() 169 if jingjar: 170 validator = jingodfvalidator(jingjar) 171 else: 172 validator = odfvalidator() 173 for f in sys.argv[1:]: 174 if os.path.isfile(f): 175 e = validator.validate(f) 176 if e: 177 print str(e) 178