1##############################################################################
2#
3# Copyright (c) 2001, 2002 Zope Foundation and Contributors.
4# All Rights Reserved.
5#
6# This software is subject to the provisions of the Zope Public License,
7# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
8# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
9# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
10# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
11# FOR A PARTICULAR PURPOSE.
12#
13##############################################################################
14"""Generic Expat-based XML parser base class.
15
16This creates a parser with namespace processing enabled.
17"""
18import logging
19
20
21class XMLParser(object):
22
23    ordered_attributes = 0
24
25    handler_names = [
26        "StartElementHandler",
27        "EndElementHandler",
28        "ProcessingInstructionHandler",
29        "CharacterDataHandler",
30        "UnparsedEntityDeclHandler",
31        "NotationDeclHandler",
32        "StartNamespaceDeclHandler",
33        "EndNamespaceDeclHandler",
34        "CommentHandler",
35        "StartCdataSectionHandler",
36        "EndCdataSectionHandler",
37        "DefaultHandler",
38        "DefaultHandlerExpand",
39        "NotStandaloneHandler",
40        "ExternalEntityRefHandler",
41        "XmlDeclHandler",
42        "StartDoctypeDeclHandler",
43        "EndDoctypeDeclHandler",
44        "ElementDeclHandler",
45        "AttlistDeclHandler"
46        ]
47
48    def __init__(self, encoding=None):
49        self.parser = p = self.createParser(encoding)
50        if self.ordered_attributes:
51            try:
52                self.parser.ordered_attributes = self.ordered_attributes
53            except AttributeError:
54                logging.warn("TAL.XMLParser: Can't set ordered_attributes")
55                self.ordered_attributes = 0
56        for name in self.handler_names:
57            method = getattr(self, name, None)
58            if method is not None:
59                try:
60                    setattr(p, name, method)
61                except AttributeError:
62                    logging.error("TAL.XMLParser: Can't set "
63                                  "expat handler %s" % name)
64
65    def createParser(self, encoding=None):
66        global XMLParseError
67        from xml.parsers import expat
68        XMLParseError = expat.ExpatError
69        return expat.ParserCreate(encoding, ' ')
70
71    def parseFile(self, filename):
72        self.parseStream(open(filename))
73
74    def parseString(self, s):
75        if isinstance(s, unicode):
76            # Expat cannot deal with unicode strings, only with
77            # encoded ones.  Also, its range of encodings is rather
78            # limited, UTF-8 is the safest bet here.
79            s = s.encode('utf-8')
80        self.parser.Parse(s, 1)
81
82    def parseURL(self, url):
83        import urllib
84        self.parseStream(urllib.urlopen(url))
85
86    def parseStream(self, stream):
87        self.parser.ParseFile(stream)
88
89    def parseFragment(self, s, end=0):
90        self.parser.Parse(s, end)
91
92    def getpos(self):
93        # Apparently ErrorLineNumber and ErrorLineNumber contain the current
94        # position even when there was no error.  This contradicts the official
95        # documentation[1], but expat.h[2] contains the following definition:
96        #
97        #   /* For backwards compatibility with previous versions. */
98        #   #define XML_GetErrorLineNumber   XML_GetCurrentLineNumber
99        #
100        # [1] http://python.org/doc/current/lib/xmlparser-objects.html
101        # [2] http://cvs.sourceforge.net/viewcvs.py/expat/expat/lib/expat.h
102        return (self.parser.ErrorLineNumber, self.parser.ErrorColumnNumber)
103
104