1import io 2import unittest 3import xml.sax 4 5from xml.sax.xmlreader import AttributesImpl 6from xml.sax.handler import feature_external_ges 7from xml.dom import pulldom 8 9from test.support import findfile 10 11 12tstfile = findfile("test.xml", subdir="xmltestdata") 13 14# A handy XML snippet, containing attributes, a namespace prefix, and a 15# self-closing tag: 16SMALL_SAMPLE = """<?xml version="1.0"?> 17<html xmlns="http://www.w3.org/1999/xhtml" xmlns:xdc="http://www.xml.com/books"> 18<!-- A comment --> 19<title>Introduction to XSL</title> 20<hr/> 21<p><xdc:author xdc:attrib="prefixed attribute" attrib="other attrib">A. Namespace</xdc:author></p> 22</html>""" 23 24 25class PullDOMTestCase(unittest.TestCase): 26 27 def test_parse(self): 28 """Minimal test of DOMEventStream.parse()""" 29 30 # This just tests that parsing from a stream works. Actual parser 31 # semantics are tested using parseString with a more focused XML 32 # fragment. 33 34 # Test with a filename: 35 handler = pulldom.parse(tstfile) 36 self.addCleanup(handler.stream.close) 37 list(handler) 38 39 # Test with a file object: 40 with open(tstfile, "rb") as fin: 41 list(pulldom.parse(fin)) 42 43 def test_parse_semantics(self): 44 """Test DOMEventStream parsing semantics.""" 45 46 items = pulldom.parseString(SMALL_SAMPLE) 47 evt, node = next(items) 48 # Just check the node is a Document: 49 self.assertTrue(hasattr(node, "createElement")) 50 self.assertEqual(pulldom.START_DOCUMENT, evt) 51 evt, node = next(items) 52 self.assertEqual(pulldom.START_ELEMENT, evt) 53 self.assertEqual("html", node.tagName) 54 self.assertEqual(2, len(node.attributes)) 55 self.assertEqual(node.attributes.getNamedItem("xmlns:xdc").value, 56 "http://www.xml.com/books") 57 evt, node = next(items) 58 self.assertEqual(pulldom.CHARACTERS, evt) # Line break 59 evt, node = next(items) 60 # XXX - A comment should be reported here! 61 # self.assertEqual(pulldom.COMMENT, evt) 62 # Line break after swallowed comment: 63 self.assertEqual(pulldom.CHARACTERS, evt) 64 evt, node = next(items) 65 self.assertEqual("title", node.tagName) 66 title_node = node 67 evt, node = next(items) 68 self.assertEqual(pulldom.CHARACTERS, evt) 69 self.assertEqual("Introduction to XSL", node.data) 70 evt, node = next(items) 71 self.assertEqual(pulldom.END_ELEMENT, evt) 72 self.assertEqual("title", node.tagName) 73 self.assertTrue(title_node is node) 74 evt, node = next(items) 75 self.assertEqual(pulldom.CHARACTERS, evt) 76 evt, node = next(items) 77 self.assertEqual(pulldom.START_ELEMENT, evt) 78 self.assertEqual("hr", node.tagName) 79 evt, node = next(items) 80 self.assertEqual(pulldom.END_ELEMENT, evt) 81 self.assertEqual("hr", node.tagName) 82 evt, node = next(items) 83 self.assertEqual(pulldom.CHARACTERS, evt) 84 evt, node = next(items) 85 self.assertEqual(pulldom.START_ELEMENT, evt) 86 self.assertEqual("p", node.tagName) 87 evt, node = next(items) 88 self.assertEqual(pulldom.START_ELEMENT, evt) 89 self.assertEqual("xdc:author", node.tagName) 90 evt, node = next(items) 91 self.assertEqual(pulldom.CHARACTERS, evt) 92 evt, node = next(items) 93 self.assertEqual(pulldom.END_ELEMENT, evt) 94 self.assertEqual("xdc:author", node.tagName) 95 evt, node = next(items) 96 self.assertEqual(pulldom.END_ELEMENT, evt) 97 evt, node = next(items) 98 self.assertEqual(pulldom.CHARACTERS, evt) 99 evt, node = next(items) 100 self.assertEqual(pulldom.END_ELEMENT, evt) 101 # XXX No END_DOCUMENT item is ever obtained: 102 #evt, node = next(items) 103 #self.assertEqual(pulldom.END_DOCUMENT, evt) 104 105 def test_expandItem(self): 106 """Ensure expandItem works as expected.""" 107 items = pulldom.parseString(SMALL_SAMPLE) 108 # Loop through the nodes until we get to a "title" start tag: 109 for evt, item in items: 110 if evt == pulldom.START_ELEMENT and item.tagName == "title": 111 items.expandNode(item) 112 self.assertEqual(1, len(item.childNodes)) 113 break 114 else: 115 self.fail("No \"title\" element detected in SMALL_SAMPLE!") 116 # Loop until we get to the next start-element: 117 for evt, node in items: 118 if evt == pulldom.START_ELEMENT: 119 break 120 self.assertEqual("hr", node.tagName, 121 "expandNode did not leave DOMEventStream in the correct state.") 122 # Attempt to expand a standalone element: 123 items.expandNode(node) 124 self.assertEqual(next(items)[0], pulldom.CHARACTERS) 125 evt, node = next(items) 126 self.assertEqual(node.tagName, "p") 127 items.expandNode(node) 128 next(items) # Skip character data 129 evt, node = next(items) 130 self.assertEqual(node.tagName, "html") 131 with self.assertRaises(StopIteration): 132 next(items) 133 items.clear() 134 self.assertIsNone(items.parser) 135 self.assertIsNone(items.stream) 136 137 @unittest.expectedFailure 138 def test_comment(self): 139 """PullDOM does not receive "comment" events.""" 140 items = pulldom.parseString(SMALL_SAMPLE) 141 for evt, _ in items: 142 if evt == pulldom.COMMENT: 143 break 144 else: 145 self.fail("No comment was encountered") 146 147 @unittest.expectedFailure 148 def test_end_document(self): 149 """PullDOM does not receive "end-document" events.""" 150 items = pulldom.parseString(SMALL_SAMPLE) 151 # Read all of the nodes up to and including </html>: 152 for evt, node in items: 153 if evt == pulldom.END_ELEMENT and node.tagName == "html": 154 break 155 try: 156 # Assert that the next node is END_DOCUMENT: 157 evt, node = next(items) 158 self.assertEqual(pulldom.END_DOCUMENT, evt) 159 except StopIteration: 160 self.fail( 161 "Ran out of events, but should have received END_DOCUMENT") 162 163 def test_external_ges_default(self): 164 parser = pulldom.parseString(SMALL_SAMPLE) 165 saxparser = parser.parser 166 ges = saxparser.getFeature(feature_external_ges) 167 self.assertEqual(ges, False) 168 169 170class ThoroughTestCase(unittest.TestCase): 171 """Test the hard-to-reach parts of pulldom.""" 172 173 def test_thorough_parse(self): 174 """Test some of the hard-to-reach parts of PullDOM.""" 175 self._test_thorough(pulldom.parse(None, parser=SAXExerciser())) 176 177 @unittest.expectedFailure 178 def test_sax2dom_fail(self): 179 """SAX2DOM can"t handle a PI before the root element.""" 180 pd = SAX2DOMTestHelper(None, SAXExerciser(), 12) 181 self._test_thorough(pd) 182 183 def test_thorough_sax2dom(self): 184 """Test some of the hard-to-reach parts of SAX2DOM.""" 185 pd = SAX2DOMTestHelper(None, SAX2DOMExerciser(), 12) 186 self._test_thorough(pd, False) 187 188 def _test_thorough(self, pd, before_root=True): 189 """Test some of the hard-to-reach parts of the parser, using a mock 190 parser.""" 191 192 evt, node = next(pd) 193 self.assertEqual(pulldom.START_DOCUMENT, evt) 194 # Just check the node is a Document: 195 self.assertTrue(hasattr(node, "createElement")) 196 197 if before_root: 198 evt, node = next(pd) 199 self.assertEqual(pulldom.COMMENT, evt) 200 self.assertEqual("a comment", node.data) 201 evt, node = next(pd) 202 self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt) 203 self.assertEqual("target", node.target) 204 self.assertEqual("data", node.data) 205 206 evt, node = next(pd) 207 self.assertEqual(pulldom.START_ELEMENT, evt) 208 self.assertEqual("html", node.tagName) 209 210 evt, node = next(pd) 211 self.assertEqual(pulldom.COMMENT, evt) 212 self.assertEqual("a comment", node.data) 213 evt, node = next(pd) 214 self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt) 215 self.assertEqual("target", node.target) 216 self.assertEqual("data", node.data) 217 218 evt, node = next(pd) 219 self.assertEqual(pulldom.START_ELEMENT, evt) 220 self.assertEqual("p", node.tagName) 221 222 evt, node = next(pd) 223 self.assertEqual(pulldom.CHARACTERS, evt) 224 self.assertEqual("text", node.data) 225 evt, node = next(pd) 226 self.assertEqual(pulldom.END_ELEMENT, evt) 227 self.assertEqual("p", node.tagName) 228 evt, node = next(pd) 229 self.assertEqual(pulldom.END_ELEMENT, evt) 230 self.assertEqual("html", node.tagName) 231 evt, node = next(pd) 232 self.assertEqual(pulldom.END_DOCUMENT, evt) 233 234 235class SAXExerciser(object): 236 """A fake sax parser that calls some of the harder-to-reach sax methods to 237 ensure it emits the correct events""" 238 239 def setContentHandler(self, handler): 240 self._handler = handler 241 242 def parse(self, _): 243 h = self._handler 244 h.startDocument() 245 246 # The next two items ensure that items preceding the first 247 # start_element are properly stored and emitted: 248 h.comment("a comment") 249 h.processingInstruction("target", "data") 250 251 h.startElement("html", AttributesImpl({})) 252 253 h.comment("a comment") 254 h.processingInstruction("target", "data") 255 256 h.startElement("p", AttributesImpl({"class": "paraclass"})) 257 h.characters("text") 258 h.endElement("p") 259 h.endElement("html") 260 h.endDocument() 261 262 def stub(self, *args, **kwargs): 263 """Stub method. Does nothing.""" 264 pass 265 setProperty = stub 266 setFeature = stub 267 268 269class SAX2DOMExerciser(SAXExerciser): 270 """The same as SAXExerciser, but without the processing instruction and 271 comment before the root element, because S2D can"t handle it""" 272 273 def parse(self, _): 274 h = self._handler 275 h.startDocument() 276 h.startElement("html", AttributesImpl({})) 277 h.comment("a comment") 278 h.processingInstruction("target", "data") 279 h.startElement("p", AttributesImpl({"class": "paraclass"})) 280 h.characters("text") 281 h.endElement("p") 282 h.endElement("html") 283 h.endDocument() 284 285 286class SAX2DOMTestHelper(pulldom.DOMEventStream): 287 """Allows us to drive SAX2DOM from a DOMEventStream.""" 288 289 def reset(self): 290 self.pulldom = pulldom.SAX2DOM() 291 # This content handler relies on namespace support 292 self.parser.setFeature(xml.sax.handler.feature_namespaces, 1) 293 self.parser.setContentHandler(self.pulldom) 294 295 296class SAX2DOMTestCase(unittest.TestCase): 297 298 def confirm(self, test, testname="Test"): 299 self.assertTrue(test, testname) 300 301 def test_basic(self): 302 """Ensure SAX2DOM can parse from a stream.""" 303 with io.StringIO(SMALL_SAMPLE) as fin: 304 sd = SAX2DOMTestHelper(fin, xml.sax.make_parser(), 305 len(SMALL_SAMPLE)) 306 for evt, node in sd: 307 if evt == pulldom.START_ELEMENT and node.tagName == "html": 308 break 309 # Because the buffer is the same length as the XML, all the 310 # nodes should have been parsed and added: 311 self.assertGreater(len(node.childNodes), 0) 312 313 def testSAX2DOM(self): 314 """Ensure SAX2DOM expands nodes as expected.""" 315 sax2dom = pulldom.SAX2DOM() 316 sax2dom.startDocument() 317 sax2dom.startElement("doc", {}) 318 sax2dom.characters("text") 319 sax2dom.startElement("subelm", {}) 320 sax2dom.characters("text") 321 sax2dom.endElement("subelm") 322 sax2dom.characters("text") 323 sax2dom.endElement("doc") 324 sax2dom.endDocument() 325 326 doc = sax2dom.document 327 root = doc.documentElement 328 (text1, elm1, text2) = root.childNodes 329 text3 = elm1.childNodes[0] 330 331 self.assertIsNone(text1.previousSibling) 332 self.assertIs(text1.nextSibling, elm1) 333 self.assertIs(elm1.previousSibling, text1) 334 self.assertIs(elm1.nextSibling, text2) 335 self.assertIs(text2.previousSibling, elm1) 336 self.assertIsNone(text2.nextSibling) 337 self.assertIsNone(text3.previousSibling) 338 self.assertIsNone(text3.nextSibling) 339 340 self.assertIs(root.parentNode, doc) 341 self.assertIs(text1.parentNode, root) 342 self.assertIs(elm1.parentNode, root) 343 self.assertIs(text2.parentNode, root) 344 self.assertIs(text3.parentNode, elm1) 345 doc.unlink() 346 347 348if __name__ == "__main__": 349 unittest.main() 350