1# -*- coding: utf-8 -*-
2
3"""
4Test cases related to SAX I/O
5"""
6
7from __future__ import absolute_import
8
9import unittest
10from xml.dom import pulldom
11from xml.sax.handler import ContentHandler
12
13from .common_imports import HelperTestCase, make_doctest, BytesIO, _bytes
14from lxml import sax
15
16
17class ETreeSaxTestCase(HelperTestCase):
18
19    def test_etree_sax_simple(self):
20        tree = self.parse('<a>ab<b/>ba</a>')
21        xml_out = self._saxify_serialize(tree)
22        self.assertEqual(_bytes('<a>ab<b/>ba</a>'),
23                          xml_out)
24
25    def test_etree_sax_double(self):
26        tree = self.parse('<a>ab<b>bb</b>ba</a>')
27        xml_out = self._saxify_serialize(tree)
28        self.assertEqual(_bytes('<a>ab<b>bb</b>ba</a>'),
29                          xml_out)
30
31    def test_etree_sax_comment(self):
32        tree = self.parse('<a>ab<!-- TEST -->ba</a>')
33        xml_out = self._saxify_serialize(tree)
34        self.assertEqual(_bytes('<a>abba</a>'),
35                          xml_out)
36
37    def test_etree_sax_pi(self):
38        tree = self.parse('<a>ab<?this and that?>ba</a>')
39        xml_out = self._saxify_serialize(tree)
40        self.assertEqual(_bytes('<a>ab<?this and that?>ba</a>'),
41                          xml_out)
42
43    def test_etree_sax_comment_root(self):
44        tree = self.parse('<!-- TEST --><a>ab</a>')
45        xml_out = self._saxify_serialize(tree)
46        self.assertEqual(_bytes('<a>ab</a>'),
47                          xml_out)
48
49    def test_etree_sax_pi_root(self):
50        tree = self.parse('<?this and that?><a>ab</a>')
51        xml_out = self._saxify_serialize(tree)
52        self.assertEqual(_bytes('<?this and that?><a>ab</a>'),
53                          xml_out)
54
55    def test_etree_sax_attributes(self):
56        tree = self.parse('<a aa="5">ab<b b="5"/>ba</a>')
57        xml_out = self._saxify_serialize(tree)
58        self.assertEqual(_bytes('<a aa="5">ab<b b="5"/>ba</a>'),
59                          xml_out)
60
61    def test_etree_sax_ns1(self):
62        tree = self.parse('<a xmlns="bla">ab<b>bb</b>ba</a>')
63        new_tree = self._saxify_unsaxify(tree)
64        root = new_tree.getroot()
65        self.assertEqual('{bla}a',
66                         root.tag)
67        self.assertEqual('{bla}b',
68                         root[0].tag)
69
70    def test_etree_sax_ns2(self):
71        tree = self.parse('<a xmlns="blaA">ab<b:b xmlns:b="blaB">bb</b:b>ba</a>')
72        new_tree = self._saxify_unsaxify(tree)
73        root = new_tree.getroot()
74        self.assertEqual('{blaA}a',
75                         root.tag)
76        self.assertEqual('{blaB}b',
77                         root[0].tag)
78
79    def test_sax_to_pulldom(self):
80        tree = self.parse('<a xmlns="blaA">ab<b:b xmlns:b="blaB">bb</b:b>ba</a>')
81        handler = pulldom.SAX2DOM()
82        sax.saxify(tree, handler)
83        dom = handler.document
84
85        self.assertEqual('a',
86                         dom.firstChild.localName)
87        self.assertEqual('blaA',
88                         dom.firstChild.namespaceURI)
89        self.assertEqual(None,
90                         dom.firstChild.prefix)
91
92        children = dom.firstChild.childNodes
93        self.assertEqual('ab',
94                         children[0].nodeValue)
95        self.assertEqual('blaB',
96                         children[1].namespaceURI)
97        self.assertEqual('ba',
98                         children[2].nodeValue)
99
100    def test_sax_to_pulldom_multiple_namespaces(self):
101        tree = self.parse('<a xmlns="blaA" xmlns:a="blaA"></a>')
102        handler = pulldom.SAX2DOM()
103        sax.saxify(tree, handler)
104        dom = handler.document
105
106        # With multiple prefix definitions, the node should keep the one
107        # that was actually used, even if the others also are valid.
108        self.assertEqual('a',
109                         dom.firstChild.localName)
110        self.assertEqual('blaA',
111                         dom.firstChild.namespaceURI)
112        self.assertEqual(None,
113                         dom.firstChild.prefix)
114
115        tree = self.parse('<a:a xmlns="blaA" xmlns:a="blaA"></a:a>')
116        handler = pulldom.SAX2DOM()
117        sax.saxify(tree, handler)
118        dom = handler.document
119
120        self.assertEqual('a',
121                         dom.firstChild.localName)
122        self.assertEqual('blaA',
123                         dom.firstChild.namespaceURI)
124        self.assertEqual('a',
125                         dom.firstChild.prefix)
126
127    def test_element_sax(self):
128        tree = self.parse('<a><b/></a>')
129        a = tree.getroot()
130        b = a[0]
131
132        xml_out = self._saxify_serialize(a)
133        self.assertEqual(_bytes('<a><b/></a>'),
134                          xml_out)
135
136        xml_out = self._saxify_serialize(b)
137        self.assertEqual(_bytes('<b/>'),
138                          xml_out)
139
140    def test_element_sax_ns(self):
141        tree = self.parse('<a:a xmlns:a="blaA"><b/></a:a>')
142        a = tree.getroot()
143        b = a[0]
144
145        new_tree = self._saxify_unsaxify(a)
146        root = new_tree.getroot()
147        self.assertEqual('{blaA}a',
148                         root.tag)
149        self.assertEqual('b',
150                         root[0].tag)
151
152        new_tree = self._saxify_unsaxify(b)
153        root = new_tree.getroot()
154        self.assertEqual('b',
155                         root.tag)
156        self.assertEqual(0,
157                         len(root))
158
159    def test_etree_sax_handler_default_ns(self):
160        handler = sax.ElementTreeContentHandler()
161        handler.startDocument()
162        handler.startPrefixMapping(None, 'blaA')
163        handler.startElementNS(('blaA', 'a'), 'a', {})
164        handler.startPrefixMapping(None, 'blaB')
165        handler.startElementNS(('blaB', 'b'), 'b', {})
166        handler.endElementNS(  ('blaB', 'b'), 'b')
167        handler.endPrefixMapping(None)
168        handler.startElementNS(('blaA', 'c'), 'c', {})
169        handler.endElementNS(  ('blaA', 'c'), 'c')
170        handler.endElementNS(  ('blaA', 'a'), 'a')
171        handler.endPrefixMapping(None)
172        handler.endDocument()
173
174        new_tree = handler.etree
175        root = new_tree.getroot()
176        self.assertEqual('{blaA}a',
177                         root.tag)
178        self.assertEqual('{blaB}b',
179                         root[0].tag)
180        self.assertEqual('{blaA}c',
181                         root[1].tag)
182
183    def test_etree_sax_handler_default_ns_None(self):
184        handler = sax.ElementTreeContentHandler()
185        handler.startDocument()
186        handler.startPrefixMapping(None, 'blaA')
187        handler.startElementNS((None, 'a'), 'a', {})
188        handler.startPrefixMapping(None, 'blaB')
189        handler.startElementNS((None, 'b'), 'b', {})
190        handler.endElementNS(  (None, 'b'), 'b')
191        handler.endPrefixMapping(None)
192        handler.startElementNS((None, 'c'), 'c', {})
193        handler.endElementNS(  (None, 'c'), 'c')
194        handler.endElementNS(  (None, 'a'), 'a')
195        handler.endPrefixMapping(None)
196        handler.endDocument()
197
198        new_tree = handler.etree
199        root = new_tree.getroot()
200        self.assertEqual('{blaA}a',
201                         root.tag)
202        self.assertEqual('{blaB}b',
203                         root[0].tag)
204        self.assertEqual('{blaA}c',
205                         root[1].tag)
206
207    def test_etree_sax_redefine_ns(self):
208        handler = sax.ElementTreeContentHandler()
209        handler.startDocument()
210        handler.startPrefixMapping('ns', 'blaA')
211        handler.startElementNS(('blaA', 'a'), 'ns:a', {})
212        handler.startPrefixMapping('ns', 'blaB')
213        handler.startElementNS(('blaB', 'b'), 'ns:b', {})
214        handler.endElementNS(  ('blaB', 'b'), 'ns:b')
215        handler.endPrefixMapping('ns')
216        handler.startElementNS(('blaA', 'c'), 'ns:c', {})
217        handler.endElementNS(  ('blaA', 'c'), 'ns:c')
218        handler.endElementNS(  ('blaA', 'a'), 'ns:a')
219        handler.endPrefixMapping('ns')
220        handler.endDocument()
221
222        new_tree = handler.etree
223        root = new_tree.getroot()
224        self.assertEqual('{blaA}a',
225                         root.tag)
226        self.assertEqual('{blaB}b',
227                         root[0].tag)
228        self.assertEqual('{blaA}c',
229                         root[1].tag)
230
231    def test_etree_sax_no_ns(self):
232        handler = sax.ElementTreeContentHandler()
233        handler.startDocument()
234        handler.startElement('a', {})
235        handler.startElement('b', {})
236        handler.endElement('b')
237        handler.startElement('c') # with empty attributes
238        handler.endElement('c')
239        handler.endElement('a')
240        handler.endDocument()
241
242        new_tree = handler.etree
243        root = new_tree.getroot()
244        self.assertEqual('a', root.tag)
245        self.assertEqual('b', root[0].tag)
246        self.assertEqual('c', root[1].tag)
247
248    def test_etree_sax_no_ns_attributes(self):
249        handler = sax.ElementTreeContentHandler()
250        handler.startDocument()
251        handler.startElement('a', {"attr_a1": "a1"})
252        handler.startElement('b', {"attr_b1": "b1"})
253        handler.endElement('b')
254        handler.endElement('a')
255        handler.endDocument()
256
257        new_tree = handler.etree
258        root = new_tree.getroot()
259        self.assertEqual('a', root.tag)
260        self.assertEqual('b', root[0].tag)
261        self.assertEqual('a1', root.attrib["attr_a1"])
262        self.assertEqual('b1', root[0].attrib["attr_b1"])
263
264    def test_etree_sax_ns_attributes(self):
265        handler = sax.ElementTreeContentHandler()
266        handler.startDocument()
267
268        self.assertRaises(ValueError,
269            handler.startElement,
270            'a', {"blaA:attr_a1": "a1"}
271        )
272
273    def test_etree_sax_error(self):
274        handler = sax.ElementTreeContentHandler()
275        handler.startDocument()
276        handler.startElement('a')
277        self.assertRaises(sax.SaxError, handler.endElement, 'b')
278
279    def test_etree_sax_error2(self):
280        handler = sax.ElementTreeContentHandler()
281        handler.startDocument()
282        handler.startElement('a')
283        handler.startElement('b')
284        self.assertRaises(sax.SaxError, handler.endElement, 'a')
285
286    def _saxify_unsaxify(self, saxifiable):
287        handler = sax.ElementTreeContentHandler()
288        sax.ElementTreeProducer(saxifiable, handler).saxify()
289        return handler.etree
290
291    def _saxify_serialize(self, tree):
292        new_tree = self._saxify_unsaxify(tree)
293        f = BytesIO()
294        new_tree.write(f)
295        return f.getvalue().replace(_bytes('\n'), _bytes(''))
296
297
298class SimpleContentHandler(ContentHandler, object):
299    """A SAX content handler that just stores the events"""
300
301    def __init__(self):
302        self.sax_events = []
303        super(SimpleContentHandler, self).__init__()
304
305    def startDocument(self):
306        self.sax_events.append(('startDocument',))
307
308    def endDocument(self):
309        self.sax_events.append(('endDocument',))
310
311    def startPrefixMapping(self, prefix, uri):
312        self.sax_events.append(('startPrefixMapping', prefix, uri))
313
314    def endPrefixMapping(self, prefix):
315        self.sax_events.append(('endPrefixMapping', prefix))
316
317    def startElement(self, name, attrs):
318        self.sax_events.append(('startElement', name, dict(attrs)))
319
320    def endElement(self, name):
321        self.sax_events.append(('endElement', name))
322
323    def startElementNS(self, name, qname, attrs):
324        self.sax_events.append(('startElementNS', name, qname, attrs._qnames))
325
326    def endElementNS(self, name, qname):
327        self.sax_events.append(('endElementNS', name, qname))
328
329    def characters(self, content):
330        self.sax_events.append(('characters', content))
331
332    def ignorableWhitespace(self, whitespace):
333        self.sax_events.append(('ignorableWhitespace', whitespace))
334
335    def processingInstruction(self, target, data):
336        self.sax_events.append(('processingInstruction', target, data))
337
338    def skippedEntity(self, name):
339        self.sax_events.append(('skippedEntity', name))
340
341
342class NSPrefixSaxTestCase(HelperTestCase):
343    """Testing that namespaces generate the right SAX events"""
344
345    def _saxify(self, tree):
346        handler = SimpleContentHandler()
347        sax.ElementTreeProducer(tree, handler).saxify()
348        return handler.sax_events
349
350    def test_element_sax_ns_prefix(self):
351        # The name of the prefix should be preserved, if the uri is unique
352        tree = self.parse('<a:a xmlns:a="blaA" xmlns:c="blaC">'
353                          '<d a:attr="value" c:attr="value" /></a:a>')
354        a = tree.getroot()
355
356        self.assertEqual(
357            [('startElementNS', ('blaA', 'a'), 'a:a', {}),
358             ('startElementNS', (None, 'd'), 'd',
359              {('blaA', 'attr'): 'a:attr', ('blaC', 'attr'): 'c:attr'}),
360             ('endElementNS', (None, 'd'), 'd'),
361             ('endElementNS', ('blaA', 'a'), 'a:a'),
362            ],
363            self._saxify(a)[3:7])
364
365    def test_element_sax_default_ns_prefix(self):
366        # Default prefixes should also not get a generated prefix
367        tree = self.parse('<a xmlns="blaA"><b attr="value" /></a>')
368        a = tree.getroot()
369
370        self.assertEqual(
371            [('startDocument',),
372             # NS prefix should be None:
373             ('startPrefixMapping', None, 'blaA'),
374             ('startElementNS', ('blaA', 'a'), 'a', {}),
375             # Attribute prefix should be None:
376             ('startElementNS', ('blaA', 'b'), 'b', {(None, 'attr'): 'attr'}),
377             ('endElementNS', ('blaA', 'b'), 'b'),
378             ('endElementNS', ('blaA', 'a'), 'a'),
379             # Prefix should be None again:
380             ('endPrefixMapping', None),
381             ('endDocument',)],
382            self._saxify(a))
383
384        # Except for attributes, if there is both a default namespace
385        # and a named namespace with the same uri
386        tree = self.parse('<a xmlns="bla" xmlns:a="bla">'
387                          '<b a:attr="value" /></a>')
388        a = tree.getroot()
389
390        self.assertEqual(
391            ('startElementNS', ('bla', 'b'), 'b', {('bla', 'attr'): 'a:attr'}),
392            self._saxify(a)[4])
393
394    def test_element_sax_twin_ns_prefix(self):
395        # Make an element with an doubly registered uri
396        tree = self.parse('<a xmlns:b="bla" xmlns:c="bla">'
397                          '<d c:attr="attr" /></a>')
398        a = tree.getroot()
399
400        self.assertEqual(
401            # It should get the b prefix in this case
402            ('startElementNS', (None, 'd'), 'd', {('bla', 'attr'): 'b:attr'}),
403            self._saxify(a)[4])
404
405
406def test_suite():
407    suite = unittest.TestSuite()
408    suite.addTests([unittest.makeSuite(ETreeSaxTestCase)])
409    suite.addTests([unittest.makeSuite(NSPrefixSaxTestCase)])
410    suite.addTests(
411        [make_doctest('../../../doc/sax.txt')])
412    return suite
413
414
415if __name__ == '__main__':
416    print('to test use test.py %s' % __file__)
417