1#!/usr/bin/python3
2# -*- coding: utf-8 -*-
3# Copyright (C) 2009 Søren Roug, European Environment Agency
4#
5# This is free software.  You may redistribute it under the terms
6# of the Apache license and the GNU General Public License Version
7# 2 or at your option any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public
15# License along with this program; if not, write to the Free Software
16# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
17#
18# Contributor(s):
19#
20import zipfile
21from xml.sax import make_parser,handler
22from xml.sax.xmlreader import InputSource
23import xml.sax.saxutils
24import sys
25from odf.opendocument import OpenDocument
26from odf import element, grammar
27from odf.namespaces import *
28from odf.attrconverters import attrconverters, cnv_string
29
30from io import BytesIO
31
32if sys.version_info[0]==3: unicode=str
33
34extension_attributes = {
35	 "OpenOffice.org" : {
36		(METANS,u'template'): (
37			(XLINKNS,u'role'),
38		),
39		(STYLENS,u'graphic-properties'): (
40			(STYLENS,u'background-transparency'),
41		),
42		(STYLENS,u'paragraph-properties'): (
43			(TEXTNS,u'enable-numbering'),
44                        (STYLENS,u'join-border'),
45		),
46		(STYLENS,u'table-cell-properties'): (
47			(STYLENS,u'writing-mode'),
48		),
49		(STYLENS,u'table-row-properties'): (
50			(STYLENS,u'keep-together'),
51		),
52	},
53	"KOffice" : {
54		(STYLENS,u'graphic-properties'): (
55			(KOFFICENS,u'frame-behavior-on-new-page'),
56		),
57		(DRAWNS,u'page'): (
58			(KOFFICENS,u'name'),
59		),
60		(PRESENTATIONNS,u'show-shape'): (
61			(KOFFICENS,u'order-id'),
62		),
63		(PRESENTATIONNS,u'hide-shape'): (
64			(KOFFICENS,u'order-id'),
65		),
66		(CHARTNS,u'legend'): (
67			(KOFFICENS,u'title'),
68		),
69	}
70}
71
72printed_errors = []
73
74def print_error(str):
75    if str not in printed_errors:
76        printed_errors.append(str)
77        print (str)
78
79def chop_arg(arg):
80    if len(arg) > 20:
81        return "%s..." % arg[0:20]
82    return arg
83
84def make_qname(tag):
85    return "%s:%s" % (nsdict.get(tag[0],tag[0]), tag[1])
86
87def allowed_attributes(tag):
88   return grammar.allowed_attributes.get(tag)
89
90
91class ODFElementHandler(handler.ContentHandler):
92    """ Extract headings from content.xml of an ODT file """
93    def __init__(self, document):
94        self.doc = document
95        self.tagstack = []
96        self.data = []
97        self.currtag = None
98
99    def characters(self, data):
100        self.data.append(data)
101
102    def startElementNS(self, tag, qname, attrs):
103        """ Pseudo-create an element
104        """
105        allowed_attrs = grammar.allowed_attributes.get(tag)
106        attrdict = {}
107        for (att,value) in attrs.items():
108            prefix = nsdict.get(att[0],att[0])
109            # Check if it is a known extension
110            notan_extension = True
111            for product, ext_attrs in extension_attributes.items():
112                allowed_ext_attrs = ext_attrs.get(tag)
113                if allowed_ext_attrs and att in allowed_ext_attrs:
114                    print_error("Warning: Attribute %s in element <%s> is illegal - %s extension"  % ( make_qname(att), make_qname(tag), product))
115                    notan_extension = False
116            # Check if it is an allowed attribute
117            if notan_extension and allowed_attrs and att not in allowed_attrs:
118                print_error("Error: Attribute %s:%s is not allowed in element <%s>" % ( prefix, att[1], make_qname(tag)))
119            # Check the value
120            try:
121                convert = attrconverters.get(att, cnv_string)
122                convert(att, value, tag)
123            except ValueError as res:
124                print_error("Error: Bad value '%s' for attribute %s:%s in  tag: <%s> - %s" %
125                    (chop_arg(value), prefix, att[1], make_qname(tag), res))
126
127        self.tagstack.append(tag)
128        self.data = []
129        # Check that the parent allows this child element
130        if tag not in ( (OFFICENS, 'document'), (OFFICENS, 'document-content'), (OFFICENS, 'document-styles'),
131             (OFFICENS, 'document-meta'), (OFFICENS, 'document-settings'),
132             (MANIFESTNS,'manifest')):
133            try:
134                parent = self.tagstack[-2]
135                allowed_children = grammar.allowed_children.get(parent)
136            except:
137                print_error("Error: This document starts with the wrong tag: <%s>" % make_qname(tag))
138                allowed_children = None
139            if allowed_children and tag not in allowed_children:
140                print_error("Error: Element %s is not allowed in element %s" % ( make_qname(tag), make_qname(parent)))
141        # Test that all mandatory attributes have been added.
142        required = grammar.required_attributes.get(tag)
143        if required:
144            for r in required:
145                if attrs.get(r) is None:
146                    print_error("Error: Required attribute missing: %s in <%s>" % (make_qname(r), make_qname(tag)))
147
148
149    def endElementNS(self, tag, qname):
150        self.currtag = self.tagstack.pop()
151        str = ''.join(self.data).strip()
152        # Check that only elements that can take text have text
153        # But only elements we know exist in grammar
154        if tag in grammar.allowed_children:
155            if str != '' and tag not in grammar.allows_text:
156                print_error("Error: %s does not allow text data" % make_qname(tag))
157        self.data = []
158
159class ODFDTDHandler(handler.DTDHandler):
160    def notationDecl(self, name, public_id, system_id):
161        """ Ignore DTDs """
162        print_error("Warning: ODF doesn't use DOCTYPEs")
163
164def exitwithusage(exitcode=2):
165    """ print out usage information """
166    sys.stderr.write("Usage: %s inputfile\n" % sys.argv[0])
167    sys.stderr.write("\tInputfile must be OpenDocument format\n")
168    sys.exit(exitcode)
169
170def lint(odffile):
171    if not zipfile.is_zipfile(odffile):
172        print_error("Error: This is not a zipped file")
173        return
174    zfd = zipfile.ZipFile(odffile)
175    try:
176        mimetype = zfd.read('mimetype')
177    except:
178        mimetype=''
179    d = OpenDocument(unicode(mimetype))
180    first = True
181    for zi in zfd.infolist():
182        if first:
183            if zi.filename == 'mimetype':
184                if zi.compress_type != zipfile.ZIP_STORED:
185                    print_error("Error: The 'mimetype' member must be stored - not deflated")
186                if zi.comment != "":
187                    print_error("Error: The 'mimetype' member must not have extra header info")
188            else:
189                print_error("Warning: The first member in the archive should be the mimetype")
190        first = False
191        if zi.filename in ('META-INF/manifest.xml', 'content.xml', 'meta.xml', 'styles.xml', 'settings.xml'):
192            content = zfd.read(zi.filename)
193            parser = make_parser()
194            parser.setFeature(handler.feature_namespaces, True)
195            parser.setFeature(handler.feature_external_ges, False)
196            parser.setContentHandler(ODFElementHandler(d))
197            dtdh = ODFDTDHandler()
198            parser.setDTDHandler(dtdh)
199            parser.setErrorHandler(handler.ErrorHandler())
200
201            inpsrc = InputSource()
202            if not isinstance(content, str):
203                content=content
204            inpsrc.setByteStream(BytesIO(content))
205            parser.parse(inpsrc)
206
207
208if len(sys.argv) != 2:
209    exitwithusage()
210lint(unicode(sys.argv[1]))
211
212
213
214# Local Variables: ***
215# mode: python     ***
216# End:             ***
217