1#!/usr/bin/env python
2
3import sys, os, tempfile, subprocess, lxml.etree, zipfile, urllib, hashlib
4
5def getODFVersion(zip):
6	content = lxml.etree.parse(zip.open("content.xml", "r"))
7	return content.getroot().get(
8		"{urn:oasis:names:tc:opendocument:xmlns:office:1.0}version")
9
10def getJing():
11	jingjar = "jing-20091111/bin/jing.jar"
12	path = os.path.join(sys.path[0], jingjar)
13	if os.path.isfile(path):
14		return path
15	print "Downloading jing.jar"
16	z = "jing-20091111.zip"
17	urllib.urlretrieve("http://jing-trang.googlecode.com/files/" + z, z);
18	zip = zipfile.ZipFile(z, "r");
19	zip.extract(jingjar, sys.path[0])
20	os.unlink(z)
21	f = open(path, "rb")
22	h = hashlib.sha1()
23	h.update(f.read())
24	f.close()
25	hash = h.hexdigest()
26	if hash != "daa0cf7b1679264f8e68171f7f253255794773f7":
27		print "Wrong hash code: wrong file."
28		os.unlink(path)
29		return
30	return path
31
32schemas = {
33	"1.0": ["OpenDocument-schema-v1.0-os.rng",
34		"OpenDocument-manifest-schema-v1.0-os.rng"],
35	"1.1": ["OpenDocument-schema-v1.1.rng",
36		"OpenDocument-manifest-schema-v1.1.rng"],
37	"1.2": ["OpenDocument-v1.2-cs01-schema-calligra.rng",
38		"OpenDocument-v1.2-cs01-manifest-schema.rng"]
39}
40
41def getScriptPath():
42	return os.path.dirname(os.path.realpath(sys.argv[0]))
43
44class jingodfvalidator:
45	def __init__(self, jingjar):
46		self.jingjar = jingjar;
47		self.xmlparser = lxml.etree.XMLParser()
48		xsltpath = os.path.join(getScriptPath(), "removeForeign.xsl")
49		self.removeForeignXSLT = self.loadXSLT(xsltpath)
50
51	def validate(self, odfpath):
52		try:
53			zip = zipfile.ZipFile(odfpath, 'r')
54		except:
55			self.validateFlatXML(odfpath)
56			return
57		odfversion = getODFVersion(zip)
58		if not odfversion in schemas:
59			return "Document has no version number"
60		err = self.validateFile(zip, 'content.xml',
61				schemas[odfversion][0])
62		if (err):
63			return err
64		err = self.validateFile(zip, 'styles.xml',
65				schemas[odfversion][0])
66		if (err):
67			return err
68		err = self.validateFile(zip, 'META-INF/manifest.xml',
69				schemas[odfversion][1])
70		if (err):
71			return err
72		err = self.validateFile(zip, 'meta.xml',
73				schemas[odfversion][0])
74		if (err):
75			return err
76		err = self.validateFile(zip, 'settings.xml',
77				schemas[odfversion][0])
78		if (err):
79			return err
80		return None
81
82	def validateFlatXML(self, filepath):
83		schema = schemas["1.2"][0]
84		schema = os.path.join(sys.path[0], schema)
85		r = self.validateXML(schema, filepath)
86		if r:
87			return filepath + " is not valid."
88
89	def validateFile(self, zip, filepath, schema):
90		schema = os.path.join(sys.path[0], schema)
91		suffix = "_" + filepath.replace("/", "_")
92		tmp = tempfile.NamedTemporaryFile(suffix = suffix)
93		tmp.write(zip.open(filepath, "r").read())
94		tmp.flush()
95		r = self.validateXML(schema, tmp.name)
96		tmp.close()
97		if r:
98			return filepath + " is not valid."
99
100	def loadXML(self, filepath):
101		return lxml.etree.parse(open(filepath, 'r'), self.xmlparser)
102
103	def loadXSLT(self, filepath):
104		xsl = self.loadXML(filepath)
105		ac = lxml.etree.XSLTAccessControl(read_network=False, write_file=False)
106		return lxml.etree.XSLT(xsl, access_control=ac)
107
108	def removeForeign(self, filepath):
109		xml = self.loadXML(filepath)
110		xml = self.removeForeignXSLT(xml)
111		xml.write(filepath)
112
113	# Validate the XML and optionally remove the foreign elements and attributes
114	# first. Calligra currently write ODF 1.2 Extended which is allowed to
115	# contain foreign elements and attributes. If Calligra adds a mode to save
116	# ODF 1.2, the validator should not remove them when validation.
117	def validateXML(self, schema, xmlpath, removeForeign = True):
118		if removeForeign:
119			self.removeForeign(xmlpath)
120
121		args = ["java", "-jar", self.jingjar, "-i", schema, xmlpath]
122		return subprocess.call(args)
123
124def createValidator(name):
125	xml = lxml.etree.parse(open(os.path.join(sys.path[0], name), "rb"))
126	return lxml.etree.RelaxNG(xml)
127
128class odfvalidator:
129	def __init__(self):
130		path = sys.path[0]
131		self.validators = {}
132		for key in schemas.keys():
133			self.validators[key] = [
134				createValidator(schemas[key][0]),
135				createValidator(schemas[key][1])
136			]
137	# returns error string on error, None otherwise
138	def validate(self, odfpath):
139		zip = zipfile.ZipFile(odfpath, 'r')
140		odfversion = getODFVersion(zip)
141		if not odfversion in schemas:
142			return "Document has no version number"
143		err = self.validateFile(zip, 'content.xml',
144				self.validators[odfversion][0])
145		if (err):
146			return err
147		err = self.validateFile(zip, 'styles.xml',
148				self.validators[odfversion][0])
149		if (err):
150			return err
151		err = self.validateFile(zip, 'META-INF/manifest.xml',
152				self.validators[odfversion][1])
153		if (err):
154			return err
155		return None
156
157	def validateFile(self, zip, file, validator):
158		try:
159			xml = lxml.etree.XML(zip.read(file));
160		except lxml.etree.XMLSyntaxError as e:
161			return file + ':' + str(e)
162		except KeyError as e:
163			return e
164		if not validator.validate(xml):
165			return file + ':' + str(validator.error_log.last_error)
166
167if __name__ == '__main__':
168	jingjar = getJing()
169	if jingjar:
170		validator = jingodfvalidator(jingjar)
171	else:
172		validator = odfvalidator()
173	for f in sys.argv[1:]:
174		if os.path.isfile(f):
175			e = validator.validate(f)
176			if e:
177				print str(e)
178