1import io
2import unittest
3import xml.sax
4
5from xml.sax.xmlreader import AttributesImpl
6from xml.sax.handler import feature_external_ges
7from xml.dom import pulldom
8
9from test.support import findfile
10
11
12tstfile = findfile("test.xml", subdir="xmltestdata")
13
14# A handy XML snippet, containing attributes, a namespace prefix, and a
15# self-closing tag:
16SMALL_SAMPLE = """<?xml version="1.0"?>
17<html xmlns="http://www.w3.org/1999/xhtml" xmlns:xdc="http://www.xml.com/books">
18<!-- A comment -->
19<title>Introduction to XSL</title>
20<hr/>
21<p><xdc:author xdc:attrib="prefixed attribute" attrib="other attrib">A. Namespace</xdc:author></p>
22</html>"""
23
24
25class PullDOMTestCase(unittest.TestCase):
26
27    def test_parse(self):
28        """Minimal test of DOMEventStream.parse()"""
29
30        # This just tests that parsing from a stream works. Actual parser
31        # semantics are tested using parseString with a more focused XML
32        # fragment.
33
34        # Test with a filename:
35        handler = pulldom.parse(tstfile)
36        self.addCleanup(handler.stream.close)
37        list(handler)
38
39        # Test with a file object:
40        with open(tstfile, "rb") as fin:
41            list(pulldom.parse(fin))
42
43    def test_parse_semantics(self):
44        """Test DOMEventStream parsing semantics."""
45
46        items = pulldom.parseString(SMALL_SAMPLE)
47        evt, node = next(items)
48        # Just check the node is a Document:
49        self.assertTrue(hasattr(node, "createElement"))
50        self.assertEqual(pulldom.START_DOCUMENT, evt)
51        evt, node = next(items)
52        self.assertEqual(pulldom.START_ELEMENT, evt)
53        self.assertEqual("html", node.tagName)
54        self.assertEqual(2, len(node.attributes))
55        self.assertEqual(node.attributes.getNamedItem("xmlns:xdc").value,
56              "http://www.xml.com/books")
57        evt, node = next(items)
58        self.assertEqual(pulldom.CHARACTERS, evt) # Line break
59        evt, node = next(items)
60        # XXX - A comment should be reported here!
61        # self.assertEqual(pulldom.COMMENT, evt)
62        # Line break after swallowed comment:
63        self.assertEqual(pulldom.CHARACTERS, evt)
64        evt, node = next(items)
65        self.assertEqual("title", node.tagName)
66        title_node = node
67        evt, node = next(items)
68        self.assertEqual(pulldom.CHARACTERS, evt)
69        self.assertEqual("Introduction to XSL", node.data)
70        evt, node = next(items)
71        self.assertEqual(pulldom.END_ELEMENT, evt)
72        self.assertEqual("title", node.tagName)
73        self.assertTrue(title_node is node)
74        evt, node = next(items)
75        self.assertEqual(pulldom.CHARACTERS, evt)
76        evt, node = next(items)
77        self.assertEqual(pulldom.START_ELEMENT, evt)
78        self.assertEqual("hr", node.tagName)
79        evt, node = next(items)
80        self.assertEqual(pulldom.END_ELEMENT, evt)
81        self.assertEqual("hr", node.tagName)
82        evt, node = next(items)
83        self.assertEqual(pulldom.CHARACTERS, evt)
84        evt, node = next(items)
85        self.assertEqual(pulldom.START_ELEMENT, evt)
86        self.assertEqual("p", node.tagName)
87        evt, node = next(items)
88        self.assertEqual(pulldom.START_ELEMENT, evt)
89        self.assertEqual("xdc:author", node.tagName)
90        evt, node = next(items)
91        self.assertEqual(pulldom.CHARACTERS, evt)
92        evt, node = next(items)
93        self.assertEqual(pulldom.END_ELEMENT, evt)
94        self.assertEqual("xdc:author", node.tagName)
95        evt, node = next(items)
96        self.assertEqual(pulldom.END_ELEMENT, evt)
97        evt, node = next(items)
98        self.assertEqual(pulldom.CHARACTERS, evt)
99        evt, node = next(items)
100        self.assertEqual(pulldom.END_ELEMENT, evt)
101        # XXX No END_DOCUMENT item is ever obtained:
102        #evt, node = next(items)
103        #self.assertEqual(pulldom.END_DOCUMENT, evt)
104
105    def test_expandItem(self):
106        """Ensure expandItem works as expected."""
107        items = pulldom.parseString(SMALL_SAMPLE)
108        # Loop through the nodes until we get to a "title" start tag:
109        for evt, item in items:
110            if evt == pulldom.START_ELEMENT and item.tagName == "title":
111                items.expandNode(item)
112                self.assertEqual(1, len(item.childNodes))
113                break
114        else:
115            self.fail("No \"title\" element detected in SMALL_SAMPLE!")
116        # Loop until we get to the next start-element:
117        for evt, node in items:
118            if evt == pulldom.START_ELEMENT:
119                break
120        self.assertEqual("hr", node.tagName,
121            "expandNode did not leave DOMEventStream in the correct state.")
122        # Attempt to expand a standalone element:
123        items.expandNode(node)
124        self.assertEqual(next(items)[0], pulldom.CHARACTERS)
125        evt, node = next(items)
126        self.assertEqual(node.tagName, "p")
127        items.expandNode(node)
128        next(items) # Skip character data
129        evt, node = next(items)
130        self.assertEqual(node.tagName, "html")
131        with self.assertRaises(StopIteration):
132            next(items)
133        items.clear()
134        self.assertIsNone(items.parser)
135        self.assertIsNone(items.stream)
136
137    @unittest.expectedFailure
138    def test_comment(self):
139        """PullDOM does not receive "comment" events."""
140        items = pulldom.parseString(SMALL_SAMPLE)
141        for evt, _ in items:
142            if evt == pulldom.COMMENT:
143                break
144        else:
145            self.fail("No comment was encountered")
146
147    @unittest.expectedFailure
148    def test_end_document(self):
149        """PullDOM does not receive "end-document" events."""
150        items = pulldom.parseString(SMALL_SAMPLE)
151        # Read all of the nodes up to and including </html>:
152        for evt, node in items:
153            if evt == pulldom.END_ELEMENT and node.tagName == "html":
154                break
155        try:
156            # Assert that the next node is END_DOCUMENT:
157            evt, node = next(items)
158            self.assertEqual(pulldom.END_DOCUMENT, evt)
159        except StopIteration:
160            self.fail(
161                "Ran out of events, but should have received END_DOCUMENT")
162
163    def test_external_ges_default(self):
164        parser = pulldom.parseString(SMALL_SAMPLE)
165        saxparser = parser.parser
166        ges = saxparser.getFeature(feature_external_ges)
167        self.assertEqual(ges, False)
168
169
170class ThoroughTestCase(unittest.TestCase):
171    """Test the hard-to-reach parts of pulldom."""
172
173    def test_thorough_parse(self):
174        """Test some of the hard-to-reach parts of PullDOM."""
175        self._test_thorough(pulldom.parse(None, parser=SAXExerciser()))
176
177    @unittest.expectedFailure
178    def test_sax2dom_fail(self):
179        """SAX2DOM can"t handle a PI before the root element."""
180        pd = SAX2DOMTestHelper(None, SAXExerciser(), 12)
181        self._test_thorough(pd)
182
183    def test_thorough_sax2dom(self):
184        """Test some of the hard-to-reach parts of SAX2DOM."""
185        pd = SAX2DOMTestHelper(None, SAX2DOMExerciser(), 12)
186        self._test_thorough(pd, False)
187
188    def _test_thorough(self, pd, before_root=True):
189        """Test some of the hard-to-reach parts of the parser, using a mock
190        parser."""
191
192        evt, node = next(pd)
193        self.assertEqual(pulldom.START_DOCUMENT, evt)
194        # Just check the node is a Document:
195        self.assertTrue(hasattr(node, "createElement"))
196
197        if before_root:
198            evt, node = next(pd)
199            self.assertEqual(pulldom.COMMENT, evt)
200            self.assertEqual("a comment", node.data)
201            evt, node = next(pd)
202            self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
203            self.assertEqual("target", node.target)
204            self.assertEqual("data", node.data)
205
206        evt, node = next(pd)
207        self.assertEqual(pulldom.START_ELEMENT, evt)
208        self.assertEqual("html", node.tagName)
209
210        evt, node = next(pd)
211        self.assertEqual(pulldom.COMMENT, evt)
212        self.assertEqual("a comment", node.data)
213        evt, node = next(pd)
214        self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
215        self.assertEqual("target", node.target)
216        self.assertEqual("data", node.data)
217
218        evt, node = next(pd)
219        self.assertEqual(pulldom.START_ELEMENT, evt)
220        self.assertEqual("p", node.tagName)
221
222        evt, node = next(pd)
223        self.assertEqual(pulldom.CHARACTERS, evt)
224        self.assertEqual("text", node.data)
225        evt, node = next(pd)
226        self.assertEqual(pulldom.END_ELEMENT, evt)
227        self.assertEqual("p", node.tagName)
228        evt, node = next(pd)
229        self.assertEqual(pulldom.END_ELEMENT, evt)
230        self.assertEqual("html", node.tagName)
231        evt, node = next(pd)
232        self.assertEqual(pulldom.END_DOCUMENT, evt)
233
234
235class SAXExerciser(object):
236    """A fake sax parser that calls some of the harder-to-reach sax methods to
237    ensure it emits the correct events"""
238
239    def setContentHandler(self, handler):
240        self._handler = handler
241
242    def parse(self, _):
243        h = self._handler
244        h.startDocument()
245
246        # The next two items ensure that items preceding the first
247        # start_element are properly stored and emitted:
248        h.comment("a comment")
249        h.processingInstruction("target", "data")
250
251        h.startElement("html", AttributesImpl({}))
252
253        h.comment("a comment")
254        h.processingInstruction("target", "data")
255
256        h.startElement("p", AttributesImpl({"class": "paraclass"}))
257        h.characters("text")
258        h.endElement("p")
259        h.endElement("html")
260        h.endDocument()
261
262    def stub(self, *args, **kwargs):
263        """Stub method. Does nothing."""
264        pass
265    setProperty = stub
266    setFeature = stub
267
268
269class SAX2DOMExerciser(SAXExerciser):
270    """The same as SAXExerciser, but without the processing instruction and
271    comment before the root element, because S2D can"t handle it"""
272
273    def parse(self, _):
274        h = self._handler
275        h.startDocument()
276        h.startElement("html", AttributesImpl({}))
277        h.comment("a comment")
278        h.processingInstruction("target", "data")
279        h.startElement("p", AttributesImpl({"class": "paraclass"}))
280        h.characters("text")
281        h.endElement("p")
282        h.endElement("html")
283        h.endDocument()
284
285
286class SAX2DOMTestHelper(pulldom.DOMEventStream):
287    """Allows us to drive SAX2DOM from a DOMEventStream."""
288
289    def reset(self):
290        self.pulldom = pulldom.SAX2DOM()
291        # This content handler relies on namespace support
292        self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
293        self.parser.setContentHandler(self.pulldom)
294
295
296class SAX2DOMTestCase(unittest.TestCase):
297
298    def confirm(self, test, testname="Test"):
299        self.assertTrue(test, testname)
300
301    def test_basic(self):
302        """Ensure SAX2DOM can parse from a stream."""
303        with io.StringIO(SMALL_SAMPLE) as fin:
304            sd = SAX2DOMTestHelper(fin, xml.sax.make_parser(),
305                                   len(SMALL_SAMPLE))
306            for evt, node in sd:
307                if evt == pulldom.START_ELEMENT and node.tagName == "html":
308                    break
309            # Because the buffer is the same length as the XML, all the
310            # nodes should have been parsed and added:
311            self.assertGreater(len(node.childNodes), 0)
312
313    def testSAX2DOM(self):
314        """Ensure SAX2DOM expands nodes as expected."""
315        sax2dom = pulldom.SAX2DOM()
316        sax2dom.startDocument()
317        sax2dom.startElement("doc", {})
318        sax2dom.characters("text")
319        sax2dom.startElement("subelm", {})
320        sax2dom.characters("text")
321        sax2dom.endElement("subelm")
322        sax2dom.characters("text")
323        sax2dom.endElement("doc")
324        sax2dom.endDocument()
325
326        doc = sax2dom.document
327        root = doc.documentElement
328        (text1, elm1, text2) = root.childNodes
329        text3 = elm1.childNodes[0]
330
331        self.assertIsNone(text1.previousSibling)
332        self.assertIs(text1.nextSibling, elm1)
333        self.assertIs(elm1.previousSibling, text1)
334        self.assertIs(elm1.nextSibling, text2)
335        self.assertIs(text2.previousSibling, elm1)
336        self.assertIsNone(text2.nextSibling)
337        self.assertIsNone(text3.previousSibling)
338        self.assertIsNone(text3.nextSibling)
339
340        self.assertIs(root.parentNode, doc)
341        self.assertIs(text1.parentNode, root)
342        self.assertIs(elm1.parentNode, root)
343        self.assertIs(text2.parentNode, root)
344        self.assertIs(text3.parentNode, elm1)
345        doc.unlink()
346
347
348if __name__ == "__main__":
349    unittest.main()
350