1#!/usr/bin/python3 2# -*- coding: utf-8 -*- 3# Copyright (C) 2009 Søren Roug, European Environment Agency 4# 5# This is free software. You may redistribute it under the terms 6# of the Apache license and the GNU General Public License Version 7# 2 or at your option any later version. 8# 9# This program is distributed in the hope that it will be useful, 10# but WITHOUT ANY WARRANTY; without even the implied warranty of 11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12# GNU General Public License for more details. 13# 14# You should have received a copy of the GNU General Public 15# License along with this program; if not, write to the Free Software 16# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17# 18# Contributor(s): 19# 20import zipfile 21from xml.sax import make_parser,handler 22from xml.sax.xmlreader import InputSource 23import xml.sax.saxutils 24import sys 25from odf.opendocument import OpenDocument 26from odf import element, grammar 27from odf.namespaces import * 28from odf.attrconverters import attrconverters, cnv_string 29 30from io import BytesIO 31 32if sys.version_info[0]==3: unicode=str 33 34extension_attributes = { 35 "OpenOffice.org" : { 36 (METANS,u'template'): ( 37 (XLINKNS,u'role'), 38 ), 39 (STYLENS,u'graphic-properties'): ( 40 (STYLENS,u'background-transparency'), 41 ), 42 (STYLENS,u'paragraph-properties'): ( 43 (TEXTNS,u'enable-numbering'), 44 (STYLENS,u'join-border'), 45 ), 46 (STYLENS,u'table-cell-properties'): ( 47 (STYLENS,u'writing-mode'), 48 ), 49 (STYLENS,u'table-row-properties'): ( 50 (STYLENS,u'keep-together'), 51 ), 52 }, 53 "KOffice" : { 54 (STYLENS,u'graphic-properties'): ( 55 (KOFFICENS,u'frame-behavior-on-new-page'), 56 ), 57 (DRAWNS,u'page'): ( 58 (KOFFICENS,u'name'), 59 ), 60 (PRESENTATIONNS,u'show-shape'): ( 61 (KOFFICENS,u'order-id'), 62 ), 63 (PRESENTATIONNS,u'hide-shape'): ( 64 (KOFFICENS,u'order-id'), 65 ), 66 (CHARTNS,u'legend'): ( 67 (KOFFICENS,u'title'), 68 ), 69 } 70} 71 72printed_errors = [] 73 74def print_error(str): 75 if str not in printed_errors: 76 printed_errors.append(str) 77 print (str) 78 79def chop_arg(arg): 80 if len(arg) > 20: 81 return "%s..." % arg[0:20] 82 return arg 83 84def make_qname(tag): 85 return "%s:%s" % (nsdict.get(tag[0],tag[0]), tag[1]) 86 87def allowed_attributes(tag): 88 return grammar.allowed_attributes.get(tag) 89 90 91class ODFElementHandler(handler.ContentHandler): 92 """ Extract headings from content.xml of an ODT file """ 93 def __init__(self, document): 94 self.doc = document 95 self.tagstack = [] 96 self.data = [] 97 self.currtag = None 98 99 def characters(self, data): 100 self.data.append(data) 101 102 def startElementNS(self, tag, qname, attrs): 103 """ Pseudo-create an element 104 """ 105 allowed_attrs = grammar.allowed_attributes.get(tag) 106 attrdict = {} 107 for (att,value) in attrs.items(): 108 prefix = nsdict.get(att[0],att[0]) 109 # Check if it is a known extension 110 notan_extension = True 111 for product, ext_attrs in extension_attributes.items(): 112 allowed_ext_attrs = ext_attrs.get(tag) 113 if allowed_ext_attrs and att in allowed_ext_attrs: 114 print_error("Warning: Attribute %s in element <%s> is illegal - %s extension" % ( make_qname(att), make_qname(tag), product)) 115 notan_extension = False 116 # Check if it is an allowed attribute 117 if notan_extension and allowed_attrs and att not in allowed_attrs: 118 print_error("Error: Attribute %s:%s is not allowed in element <%s>" % ( prefix, att[1], make_qname(tag))) 119 # Check the value 120 try: 121 convert = attrconverters.get(att, cnv_string) 122 convert(att, value, tag) 123 except ValueError as res: 124 print_error("Error: Bad value '%s' for attribute %s:%s in tag: <%s> - %s" % 125 (chop_arg(value), prefix, att[1], make_qname(tag), res)) 126 127 self.tagstack.append(tag) 128 self.data = [] 129 # Check that the parent allows this child element 130 if tag not in ( (OFFICENS, 'document'), (OFFICENS, 'document-content'), (OFFICENS, 'document-styles'), 131 (OFFICENS, 'document-meta'), (OFFICENS, 'document-settings'), 132 (MANIFESTNS,'manifest')): 133 try: 134 parent = self.tagstack[-2] 135 allowed_children = grammar.allowed_children.get(parent) 136 except: 137 print_error("Error: This document starts with the wrong tag: <%s>" % make_qname(tag)) 138 allowed_children = None 139 if allowed_children and tag not in allowed_children: 140 print_error("Error: Element %s is not allowed in element %s" % ( make_qname(tag), make_qname(parent))) 141 # Test that all mandatory attributes have been added. 142 required = grammar.required_attributes.get(tag) 143 if required: 144 for r in required: 145 if attrs.get(r) is None: 146 print_error("Error: Required attribute missing: %s in <%s>" % (make_qname(r), make_qname(tag))) 147 148 149 def endElementNS(self, tag, qname): 150 self.currtag = self.tagstack.pop() 151 str = ''.join(self.data).strip() 152 # Check that only elements that can take text have text 153 # But only elements we know exist in grammar 154 if tag in grammar.allowed_children: 155 if str != '' and tag not in grammar.allows_text: 156 print_error("Error: %s does not allow text data" % make_qname(tag)) 157 self.data = [] 158 159class ODFDTDHandler(handler.DTDHandler): 160 def notationDecl(self, name, public_id, system_id): 161 """ Ignore DTDs """ 162 print_error("Warning: ODF doesn't use DOCTYPEs") 163 164def exitwithusage(exitcode=2): 165 """ print out usage information """ 166 sys.stderr.write("Usage: %s inputfile\n" % sys.argv[0]) 167 sys.stderr.write("\tInputfile must be OpenDocument format\n") 168 sys.exit(exitcode) 169 170def lint(odffile): 171 if not zipfile.is_zipfile(odffile): 172 print_error("Error: This is not a zipped file") 173 return 174 zfd = zipfile.ZipFile(odffile) 175 try: 176 mimetype = zfd.read('mimetype') 177 except: 178 mimetype='' 179 d = OpenDocument(unicode(mimetype)) 180 first = True 181 for zi in zfd.infolist(): 182 if first: 183 if zi.filename == 'mimetype': 184 if zi.compress_type != zipfile.ZIP_STORED: 185 print_error("Error: The 'mimetype' member must be stored - not deflated") 186 if zi.comment != "": 187 print_error("Error: The 'mimetype' member must not have extra header info") 188 else: 189 print_error("Warning: The first member in the archive should be the mimetype") 190 first = False 191 if zi.filename in ('META-INF/manifest.xml', 'content.xml', 'meta.xml', 'styles.xml', 'settings.xml'): 192 content = zfd.read(zi.filename) 193 parser = make_parser() 194 parser.setFeature(handler.feature_namespaces, True) 195 parser.setFeature(handler.feature_external_ges, False) 196 parser.setContentHandler(ODFElementHandler(d)) 197 dtdh = ODFDTDHandler() 198 parser.setDTDHandler(dtdh) 199 parser.setErrorHandler(handler.ErrorHandler()) 200 201 inpsrc = InputSource() 202 if not isinstance(content, str): 203 content=content 204 inpsrc.setByteStream(BytesIO(content)) 205 parser.parse(inpsrc) 206 207 208if len(sys.argv) != 2: 209 exitwithusage() 210lint(unicode(sys.argv[1])) 211 212 213 214# Local Variables: *** 215# mode: python *** 216# End: *** 217