1# regression test for SAX 2.0
2# $Id$
3
4from xml.sax import make_parser, ContentHandler, \
5                    SAXException, SAXReaderNotAvailable, SAXParseException
6import unittest
7from unittest import mock
8try:
9    make_parser()
10except SAXReaderNotAvailable:
11    # don't try to test this module if we cannot create a parser
12    raise unittest.SkipTest("no XML parsers available")
13from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \
14                             XMLFilterBase, prepare_input_source
15from xml.sax.expatreader import create_parser
16from xml.sax.handler import (feature_namespaces, feature_external_ges,
17                             LexicalHandler)
18from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl
19from io import BytesIO, StringIO
20import codecs
21import os.path
22import shutil
23import sys
24from urllib.error import URLError
25import urllib.request
26from test.support import os_helper
27from test.support import findfile
28from test.support.os_helper import FakePath, TESTFN
29
30
31TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata")
32TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata")
33try:
34    TEST_XMLFILE.encode("utf-8")
35    TEST_XMLFILE_OUT.encode("utf-8")
36except UnicodeEncodeError:
37    raise unittest.SkipTest("filename is not encodable to utf8")
38
39supports_nonascii_filenames = True
40if not os.path.supports_unicode_filenames:
41    try:
42        os_helper.TESTFN_UNICODE.encode(sys.getfilesystemencoding())
43    except (UnicodeError, TypeError):
44        # Either the file system encoding is None, or the file name
45        # cannot be encoded in the file system encoding.
46        supports_nonascii_filenames = False
47requires_nonascii_filenames = unittest.skipUnless(
48        supports_nonascii_filenames,
49        'Requires non-ascii filenames support')
50
51ns_uri = "http://www.python.org/xml-ns/saxtest/"
52
53class XmlTestBase(unittest.TestCase):
54    def verify_empty_attrs(self, attrs):
55        self.assertRaises(KeyError, attrs.getValue, "attr")
56        self.assertRaises(KeyError, attrs.getValueByQName, "attr")
57        self.assertRaises(KeyError, attrs.getNameByQName, "attr")
58        self.assertRaises(KeyError, attrs.getQNameByName, "attr")
59        self.assertRaises(KeyError, attrs.__getitem__, "attr")
60        self.assertEqual(attrs.getLength(), 0)
61        self.assertEqual(attrs.getNames(), [])
62        self.assertEqual(attrs.getQNames(), [])
63        self.assertEqual(len(attrs), 0)
64        self.assertNotIn("attr", attrs)
65        self.assertEqual(list(attrs.keys()), [])
66        self.assertEqual(attrs.get("attrs"), None)
67        self.assertEqual(attrs.get("attrs", 25), 25)
68        self.assertEqual(list(attrs.items()), [])
69        self.assertEqual(list(attrs.values()), [])
70
71    def verify_empty_nsattrs(self, attrs):
72        self.assertRaises(KeyError, attrs.getValue, (ns_uri, "attr"))
73        self.assertRaises(KeyError, attrs.getValueByQName, "ns:attr")
74        self.assertRaises(KeyError, attrs.getNameByQName, "ns:attr")
75        self.assertRaises(KeyError, attrs.getQNameByName, (ns_uri, "attr"))
76        self.assertRaises(KeyError, attrs.__getitem__, (ns_uri, "attr"))
77        self.assertEqual(attrs.getLength(), 0)
78        self.assertEqual(attrs.getNames(), [])
79        self.assertEqual(attrs.getQNames(), [])
80        self.assertEqual(len(attrs), 0)
81        self.assertNotIn((ns_uri, "attr"), attrs)
82        self.assertEqual(list(attrs.keys()), [])
83        self.assertEqual(attrs.get((ns_uri, "attr")), None)
84        self.assertEqual(attrs.get((ns_uri, "attr"), 25), 25)
85        self.assertEqual(list(attrs.items()), [])
86        self.assertEqual(list(attrs.values()), [])
87
88    def verify_attrs_wattr(self, attrs):
89        self.assertEqual(attrs.getLength(), 1)
90        self.assertEqual(attrs.getNames(), ["attr"])
91        self.assertEqual(attrs.getQNames(), ["attr"])
92        self.assertEqual(len(attrs), 1)
93        self.assertIn("attr", attrs)
94        self.assertEqual(list(attrs.keys()), ["attr"])
95        self.assertEqual(attrs.get("attr"), "val")
96        self.assertEqual(attrs.get("attr", 25), "val")
97        self.assertEqual(list(attrs.items()), [("attr", "val")])
98        self.assertEqual(list(attrs.values()), ["val"])
99        self.assertEqual(attrs.getValue("attr"), "val")
100        self.assertEqual(attrs.getValueByQName("attr"), "val")
101        self.assertEqual(attrs.getNameByQName("attr"), "attr")
102        self.assertEqual(attrs["attr"], "val")
103        self.assertEqual(attrs.getQNameByName("attr"), "attr")
104
105
106def xml_str(doc, encoding=None):
107    if encoding is None:
108        return doc
109    return '<?xml version="1.0" encoding="%s"?>\n%s' % (encoding, doc)
110
111def xml_bytes(doc, encoding, decl_encoding=...):
112    if decl_encoding is ...:
113        decl_encoding = encoding
114    return xml_str(doc, decl_encoding).encode(encoding, 'xmlcharrefreplace')
115
116def make_xml_file(doc, encoding, decl_encoding=...):
117    if decl_encoding is ...:
118        decl_encoding = encoding
119    with open(TESTFN, 'w', encoding=encoding, errors='xmlcharrefreplace') as f:
120        f.write(xml_str(doc, decl_encoding))
121
122
123class ParseTest(unittest.TestCase):
124    data = '<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>'
125
126    def tearDown(self):
127        os_helper.unlink(TESTFN)
128
129    def check_parse(self, f):
130        from xml.sax import parse
131        result = StringIO()
132        parse(f, XMLGenerator(result, 'utf-8'))
133        self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8'))
134
135    def test_parse_text(self):
136        encodings = ('us-ascii', 'iso-8859-1', 'utf-8',
137                     'utf-16', 'utf-16le', 'utf-16be')
138        for encoding in encodings:
139            self.check_parse(StringIO(xml_str(self.data, encoding)))
140            make_xml_file(self.data, encoding)
141            with open(TESTFN, 'r', encoding=encoding) as f:
142                self.check_parse(f)
143            self.check_parse(StringIO(self.data))
144            make_xml_file(self.data, encoding, None)
145            with open(TESTFN, 'r', encoding=encoding) as f:
146                self.check_parse(f)
147
148    def test_parse_bytes(self):
149        # UTF-8 is default encoding, US-ASCII is compatible with UTF-8,
150        # UTF-16 is autodetected
151        encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be')
152        for encoding in encodings:
153            self.check_parse(BytesIO(xml_bytes(self.data, encoding)))
154            make_xml_file(self.data, encoding)
155            self.check_parse(TESTFN)
156            with open(TESTFN, 'rb') as f:
157                self.check_parse(f)
158            self.check_parse(BytesIO(xml_bytes(self.data, encoding, None)))
159            make_xml_file(self.data, encoding, None)
160            self.check_parse(TESTFN)
161            with open(TESTFN, 'rb') as f:
162                self.check_parse(f)
163        # accept UTF-8 with BOM
164        self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', 'utf-8')))
165        make_xml_file(self.data, 'utf-8-sig', 'utf-8')
166        self.check_parse(TESTFN)
167        with open(TESTFN, 'rb') as f:
168            self.check_parse(f)
169        self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', None)))
170        make_xml_file(self.data, 'utf-8-sig', None)
171        self.check_parse(TESTFN)
172        with open(TESTFN, 'rb') as f:
173            self.check_parse(f)
174        # accept data with declared encoding
175        self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1')))
176        make_xml_file(self.data, 'iso-8859-1')
177        self.check_parse(TESTFN)
178        with open(TESTFN, 'rb') as f:
179            self.check_parse(f)
180        # fail on non-UTF-8 incompatible data without declared encoding
181        with self.assertRaises(SAXException):
182            self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1', None)))
183        make_xml_file(self.data, 'iso-8859-1', None)
184        with self.assertRaises(SAXException):
185            self.check_parse(TESTFN)
186        with open(TESTFN, 'rb') as f:
187            with self.assertRaises(SAXException):
188                self.check_parse(f)
189
190    def test_parse_path_object(self):
191        make_xml_file(self.data, 'utf-8', None)
192        self.check_parse(FakePath(TESTFN))
193
194    def test_parse_InputSource(self):
195        # accept data without declared but with explicitly specified encoding
196        make_xml_file(self.data, 'iso-8859-1', None)
197        with open(TESTFN, 'rb') as f:
198            input = InputSource()
199            input.setByteStream(f)
200            input.setEncoding('iso-8859-1')
201            self.check_parse(input)
202
203    def test_parse_close_source(self):
204        builtin_open = open
205        fileobj = None
206
207        def mock_open(*args):
208            nonlocal fileobj
209            fileobj = builtin_open(*args)
210            return fileobj
211
212        with mock.patch('xml.sax.saxutils.open', side_effect=mock_open):
213            make_xml_file(self.data, 'iso-8859-1', None)
214            with self.assertRaises(SAXException):
215                self.check_parse(TESTFN)
216            self.assertTrue(fileobj.closed)
217
218    def check_parseString(self, s):
219        from xml.sax import parseString
220        result = StringIO()
221        parseString(s, XMLGenerator(result, 'utf-8'))
222        self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8'))
223
224    def test_parseString_text(self):
225        encodings = ('us-ascii', 'iso-8859-1', 'utf-8',
226                     'utf-16', 'utf-16le', 'utf-16be')
227        for encoding in encodings:
228            self.check_parseString(xml_str(self.data, encoding))
229        self.check_parseString(self.data)
230
231    def test_parseString_bytes(self):
232        # UTF-8 is default encoding, US-ASCII is compatible with UTF-8,
233        # UTF-16 is autodetected
234        encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be')
235        for encoding in encodings:
236            self.check_parseString(xml_bytes(self.data, encoding))
237            self.check_parseString(xml_bytes(self.data, encoding, None))
238        # accept UTF-8 with BOM
239        self.check_parseString(xml_bytes(self.data, 'utf-8-sig', 'utf-8'))
240        self.check_parseString(xml_bytes(self.data, 'utf-8-sig', None))
241        # accept data with declared encoding
242        self.check_parseString(xml_bytes(self.data, 'iso-8859-1'))
243        # fail on non-UTF-8 incompatible data without declared encoding
244        with self.assertRaises(SAXException):
245            self.check_parseString(xml_bytes(self.data, 'iso-8859-1', None))
246
247class MakeParserTest(unittest.TestCase):
248    def test_make_parser2(self):
249        # Creating parsers several times in a row should succeed.
250        # Testing this because there have been failures of this kind
251        # before.
252        from xml.sax import make_parser
253        p = make_parser()
254        from xml.sax import make_parser
255        p = make_parser()
256        from xml.sax import make_parser
257        p = make_parser()
258        from xml.sax import make_parser
259        p = make_parser()
260        from xml.sax import make_parser
261        p = make_parser()
262        from xml.sax import make_parser
263        p = make_parser()
264
265    def test_make_parser3(self):
266        # Testing that make_parser can handle different types of
267        # iterables.
268        make_parser(['module'])
269        make_parser(('module', ))
270        make_parser({'module'})
271        make_parser(frozenset({'module'}))
272        make_parser({'module': None})
273        make_parser(iter(['module']))
274
275    def test_make_parser4(self):
276        # Testing that make_parser can handle empty iterables.
277        make_parser([])
278        make_parser(tuple())
279        make_parser(set())
280        make_parser(frozenset())
281        make_parser({})
282        make_parser(iter([]))
283
284    def test_make_parser5(self):
285        # Testing that make_parser can handle iterables with more than
286        # one item.
287        make_parser(['module1', 'module2'])
288        make_parser(('module1', 'module2'))
289        make_parser({'module1', 'module2'})
290        make_parser(frozenset({'module1', 'module2'}))
291        make_parser({'module1': None, 'module2': None})
292        make_parser(iter(['module1', 'module2']))
293
294# ===========================================================================
295#
296#   saxutils tests
297#
298# ===========================================================================
299
300class SaxutilsTest(unittest.TestCase):
301    # ===== escape
302    def test_escape_basic(self):
303        self.assertEqual(escape("Donald Duck & Co"), "Donald Duck &amp; Co")
304
305    def test_escape_all(self):
306        self.assertEqual(escape("<Donald Duck & Co>"),
307                         "&lt;Donald Duck &amp; Co&gt;")
308
309    def test_escape_extra(self):
310        self.assertEqual(escape("Hei på deg", {"å" : "&aring;"}),
311                         "Hei p&aring; deg")
312
313    # ===== unescape
314    def test_unescape_basic(self):
315        self.assertEqual(unescape("Donald Duck &amp; Co"), "Donald Duck & Co")
316
317    def test_unescape_all(self):
318        self.assertEqual(unescape("&lt;Donald Duck &amp; Co&gt;"),
319                         "<Donald Duck & Co>")
320
321    def test_unescape_extra(self):
322        self.assertEqual(unescape("Hei på deg", {"å" : "&aring;"}),
323                         "Hei p&aring; deg")
324
325    def test_unescape_amp_extra(self):
326        self.assertEqual(unescape("&amp;foo;", {"&foo;": "splat"}), "&foo;")
327
328    # ===== quoteattr
329    def test_quoteattr_basic(self):
330        self.assertEqual(quoteattr("Donald Duck & Co"),
331                         '"Donald Duck &amp; Co"')
332
333    def test_single_quoteattr(self):
334        self.assertEqual(quoteattr('Includes "double" quotes'),
335                         '\'Includes "double" quotes\'')
336
337    def test_double_quoteattr(self):
338        self.assertEqual(quoteattr("Includes 'single' quotes"),
339                         "\"Includes 'single' quotes\"")
340
341    def test_single_double_quoteattr(self):
342        self.assertEqual(quoteattr("Includes 'single' and \"double\" quotes"),
343                         "\"Includes 'single' and &quot;double&quot; quotes\"")
344
345    # ===== make_parser
346    def test_make_parser(self):
347        # Creating a parser should succeed - it should fall back
348        # to the expatreader
349        p = make_parser(['xml.parsers.no_such_parser'])
350
351
352class PrepareInputSourceTest(unittest.TestCase):
353
354    def setUp(self):
355        self.file = os_helper.TESTFN
356        with open(self.file, "w") as tmp:
357            tmp.write("This was read from a file.")
358
359    def tearDown(self):
360        os_helper.unlink(self.file)
361
362    def make_byte_stream(self):
363        return BytesIO(b"This is a byte stream.")
364
365    def make_character_stream(self):
366        return StringIO("This is a character stream.")
367
368    def checkContent(self, stream, content):
369        self.assertIsNotNone(stream)
370        self.assertEqual(stream.read(), content)
371        stream.close()
372
373
374    def test_character_stream(self):
375        # If the source is an InputSource with a character stream, use it.
376        src = InputSource(self.file)
377        src.setCharacterStream(self.make_character_stream())
378        prep = prepare_input_source(src)
379        self.assertIsNone(prep.getByteStream())
380        self.checkContent(prep.getCharacterStream(),
381                          "This is a character stream.")
382
383    def test_byte_stream(self):
384        # If the source is an InputSource that does not have a character
385        # stream but does have a byte stream, use the byte stream.
386        src = InputSource(self.file)
387        src.setByteStream(self.make_byte_stream())
388        prep = prepare_input_source(src)
389        self.assertIsNone(prep.getCharacterStream())
390        self.checkContent(prep.getByteStream(),
391                          b"This is a byte stream.")
392
393    def test_system_id(self):
394        # If the source is an InputSource that has neither a character
395        # stream nor a byte stream, open the system ID.
396        src = InputSource(self.file)
397        prep = prepare_input_source(src)
398        self.assertIsNone(prep.getCharacterStream())
399        self.checkContent(prep.getByteStream(),
400                          b"This was read from a file.")
401
402    def test_string(self):
403        # If the source is a string, use it as a system ID and open it.
404        prep = prepare_input_source(self.file)
405        self.assertIsNone(prep.getCharacterStream())
406        self.checkContent(prep.getByteStream(),
407                          b"This was read from a file.")
408
409    def test_path_objects(self):
410        # If the source is a Path object, use it as a system ID and open it.
411        prep = prepare_input_source(FakePath(self.file))
412        self.assertIsNone(prep.getCharacterStream())
413        self.checkContent(prep.getByteStream(),
414                          b"This was read from a file.")
415
416    def test_binary_file(self):
417        # If the source is a binary file-like object, use it as a byte
418        # stream.
419        prep = prepare_input_source(self.make_byte_stream())
420        self.assertIsNone(prep.getCharacterStream())
421        self.checkContent(prep.getByteStream(),
422                          b"This is a byte stream.")
423
424    def test_text_file(self):
425        # If the source is a text file-like object, use it as a character
426        # stream.
427        prep = prepare_input_source(self.make_character_stream())
428        self.assertIsNone(prep.getByteStream())
429        self.checkContent(prep.getCharacterStream(),
430                          "This is a character stream.")
431
432
433# ===== XMLGenerator
434
435class XmlgenTest:
436    def test_xmlgen_basic(self):
437        result = self.ioclass()
438        gen = XMLGenerator(result)
439        gen.startDocument()
440        gen.startElement("doc", {})
441        gen.endElement("doc")
442        gen.endDocument()
443
444        self.assertEqual(result.getvalue(), self.xml("<doc></doc>"))
445
446    def test_xmlgen_basic_empty(self):
447        result = self.ioclass()
448        gen = XMLGenerator(result, short_empty_elements=True)
449        gen.startDocument()
450        gen.startElement("doc", {})
451        gen.endElement("doc")
452        gen.endDocument()
453
454        self.assertEqual(result.getvalue(), self.xml("<doc/>"))
455
456    def test_xmlgen_content(self):
457        result = self.ioclass()
458        gen = XMLGenerator(result)
459
460        gen.startDocument()
461        gen.startElement("doc", {})
462        gen.characters("huhei")
463        gen.endElement("doc")
464        gen.endDocument()
465
466        self.assertEqual(result.getvalue(), self.xml("<doc>huhei</doc>"))
467
468    def test_xmlgen_content_empty(self):
469        result = self.ioclass()
470        gen = XMLGenerator(result, short_empty_elements=True)
471
472        gen.startDocument()
473        gen.startElement("doc", {})
474        gen.characters("huhei")
475        gen.endElement("doc")
476        gen.endDocument()
477
478        self.assertEqual(result.getvalue(), self.xml("<doc>huhei</doc>"))
479
480    def test_xmlgen_pi(self):
481        result = self.ioclass()
482        gen = XMLGenerator(result)
483
484        gen.startDocument()
485        gen.processingInstruction("test", "data")
486        gen.startElement("doc", {})
487        gen.endElement("doc")
488        gen.endDocument()
489
490        self.assertEqual(result.getvalue(),
491            self.xml("<?test data?><doc></doc>"))
492
493    def test_xmlgen_content_escape(self):
494        result = self.ioclass()
495        gen = XMLGenerator(result)
496
497        gen.startDocument()
498        gen.startElement("doc", {})
499        gen.characters("<huhei&")
500        gen.endElement("doc")
501        gen.endDocument()
502
503        self.assertEqual(result.getvalue(),
504            self.xml("<doc>&lt;huhei&amp;</doc>"))
505
506    def test_xmlgen_attr_escape(self):
507        result = self.ioclass()
508        gen = XMLGenerator(result)
509
510        gen.startDocument()
511        gen.startElement("doc", {"a": '"'})
512        gen.startElement("e", {"a": "'"})
513        gen.endElement("e")
514        gen.startElement("e", {"a": "'\""})
515        gen.endElement("e")
516        gen.startElement("e", {"a": "\n\r\t"})
517        gen.endElement("e")
518        gen.endElement("doc")
519        gen.endDocument()
520
521        self.assertEqual(result.getvalue(), self.xml(
522            "<doc a='\"'><e a=\"'\"></e>"
523            "<e a=\"'&quot;\"></e>"
524            "<e a=\"&#10;&#13;&#9;\"></e></doc>"))
525
526    def test_xmlgen_encoding(self):
527        encodings = ('iso-8859-15', 'utf-8', 'utf-8-sig',
528                     'utf-16', 'utf-16be', 'utf-16le',
529                     'utf-32', 'utf-32be', 'utf-32le')
530        for encoding in encodings:
531            result = self.ioclass()
532            gen = XMLGenerator(result, encoding=encoding)
533
534            gen.startDocument()
535            gen.startElement("doc", {"a": '\u20ac'})
536            gen.characters("\u20ac")
537            gen.endElement("doc")
538            gen.endDocument()
539
540            self.assertEqual(result.getvalue(),
541                self.xml('<doc a="\u20ac">\u20ac</doc>', encoding=encoding))
542
543    def test_xmlgen_unencodable(self):
544        result = self.ioclass()
545        gen = XMLGenerator(result, encoding='ascii')
546
547        gen.startDocument()
548        gen.startElement("doc", {"a": '\u20ac'})
549        gen.characters("\u20ac")
550        gen.endElement("doc")
551        gen.endDocument()
552
553        self.assertEqual(result.getvalue(),
554            self.xml('<doc a="&#8364;">&#8364;</doc>', encoding='ascii'))
555
556    def test_xmlgen_ignorable(self):
557        result = self.ioclass()
558        gen = XMLGenerator(result)
559
560        gen.startDocument()
561        gen.startElement("doc", {})
562        gen.ignorableWhitespace(" ")
563        gen.endElement("doc")
564        gen.endDocument()
565
566        self.assertEqual(result.getvalue(), self.xml("<doc> </doc>"))
567
568    def test_xmlgen_ignorable_empty(self):
569        result = self.ioclass()
570        gen = XMLGenerator(result, short_empty_elements=True)
571
572        gen.startDocument()
573        gen.startElement("doc", {})
574        gen.ignorableWhitespace(" ")
575        gen.endElement("doc")
576        gen.endDocument()
577
578        self.assertEqual(result.getvalue(), self.xml("<doc> </doc>"))
579
580    def test_xmlgen_encoding_bytes(self):
581        encodings = ('iso-8859-15', 'utf-8', 'utf-8-sig',
582                     'utf-16', 'utf-16be', 'utf-16le',
583                     'utf-32', 'utf-32be', 'utf-32le')
584        for encoding in encodings:
585            result = self.ioclass()
586            gen = XMLGenerator(result, encoding=encoding)
587
588            gen.startDocument()
589            gen.startElement("doc", {"a": '\u20ac'})
590            gen.characters("\u20ac".encode(encoding))
591            gen.ignorableWhitespace(" ".encode(encoding))
592            gen.endElement("doc")
593            gen.endDocument()
594
595            self.assertEqual(result.getvalue(),
596                self.xml('<doc a="\u20ac">\u20ac </doc>', encoding=encoding))
597
598    def test_xmlgen_ns(self):
599        result = self.ioclass()
600        gen = XMLGenerator(result)
601
602        gen.startDocument()
603        gen.startPrefixMapping("ns1", ns_uri)
604        gen.startElementNS((ns_uri, "doc"), "ns1:doc", {})
605        # add an unqualified name
606        gen.startElementNS((None, "udoc"), None, {})
607        gen.endElementNS((None, "udoc"), None)
608        gen.endElementNS((ns_uri, "doc"), "ns1:doc")
609        gen.endPrefixMapping("ns1")
610        gen.endDocument()
611
612        self.assertEqual(result.getvalue(), self.xml(
613           '<ns1:doc xmlns:ns1="%s"><udoc></udoc></ns1:doc>' %
614                                         ns_uri))
615
616    def test_xmlgen_ns_empty(self):
617        result = self.ioclass()
618        gen = XMLGenerator(result, short_empty_elements=True)
619
620        gen.startDocument()
621        gen.startPrefixMapping("ns1", ns_uri)
622        gen.startElementNS((ns_uri, "doc"), "ns1:doc", {})
623        # add an unqualified name
624        gen.startElementNS((None, "udoc"), None, {})
625        gen.endElementNS((None, "udoc"), None)
626        gen.endElementNS((ns_uri, "doc"), "ns1:doc")
627        gen.endPrefixMapping("ns1")
628        gen.endDocument()
629
630        self.assertEqual(result.getvalue(), self.xml(
631           '<ns1:doc xmlns:ns1="%s"><udoc/></ns1:doc>' %
632                                         ns_uri))
633
634    def test_1463026_1(self):
635        result = self.ioclass()
636        gen = XMLGenerator(result)
637
638        gen.startDocument()
639        gen.startElementNS((None, 'a'), 'a', {(None, 'b'):'c'})
640        gen.endElementNS((None, 'a'), 'a')
641        gen.endDocument()
642
643        self.assertEqual(result.getvalue(), self.xml('<a b="c"></a>'))
644
645    def test_1463026_1_empty(self):
646        result = self.ioclass()
647        gen = XMLGenerator(result, short_empty_elements=True)
648
649        gen.startDocument()
650        gen.startElementNS((None, 'a'), 'a', {(None, 'b'):'c'})
651        gen.endElementNS((None, 'a'), 'a')
652        gen.endDocument()
653
654        self.assertEqual(result.getvalue(), self.xml('<a b="c"/>'))
655
656    def test_1463026_2(self):
657        result = self.ioclass()
658        gen = XMLGenerator(result)
659
660        gen.startDocument()
661        gen.startPrefixMapping(None, 'qux')
662        gen.startElementNS(('qux', 'a'), 'a', {})
663        gen.endElementNS(('qux', 'a'), 'a')
664        gen.endPrefixMapping(None)
665        gen.endDocument()
666
667        self.assertEqual(result.getvalue(), self.xml('<a xmlns="qux"></a>'))
668
669    def test_1463026_2_empty(self):
670        result = self.ioclass()
671        gen = XMLGenerator(result, short_empty_elements=True)
672
673        gen.startDocument()
674        gen.startPrefixMapping(None, 'qux')
675        gen.startElementNS(('qux', 'a'), 'a', {})
676        gen.endElementNS(('qux', 'a'), 'a')
677        gen.endPrefixMapping(None)
678        gen.endDocument()
679
680        self.assertEqual(result.getvalue(), self.xml('<a xmlns="qux"/>'))
681
682    def test_1463026_3(self):
683        result = self.ioclass()
684        gen = XMLGenerator(result)
685
686        gen.startDocument()
687        gen.startPrefixMapping('my', 'qux')
688        gen.startElementNS(('qux', 'a'), 'a', {(None, 'b'):'c'})
689        gen.endElementNS(('qux', 'a'), 'a')
690        gen.endPrefixMapping('my')
691        gen.endDocument()
692
693        self.assertEqual(result.getvalue(),
694            self.xml('<my:a xmlns:my="qux" b="c"></my:a>'))
695
696    def test_1463026_3_empty(self):
697        result = self.ioclass()
698        gen = XMLGenerator(result, short_empty_elements=True)
699
700        gen.startDocument()
701        gen.startPrefixMapping('my', 'qux')
702        gen.startElementNS(('qux', 'a'), 'a', {(None, 'b'):'c'})
703        gen.endElementNS(('qux', 'a'), 'a')
704        gen.endPrefixMapping('my')
705        gen.endDocument()
706
707        self.assertEqual(result.getvalue(),
708            self.xml('<my:a xmlns:my="qux" b="c"/>'))
709
710    def test_5027_1(self):
711        # The xml prefix (as in xml:lang below) is reserved and bound by
712        # definition to http://www.w3.org/XML/1998/namespace.  XMLGenerator had
713        # a bug whereby a KeyError is raised because this namespace is missing
714        # from a dictionary.
715        #
716        # This test demonstrates the bug by parsing a document.
717        test_xml = StringIO(
718            '<?xml version="1.0"?>'
719            '<a:g1 xmlns:a="http://example.com/ns">'
720             '<a:g2 xml:lang="en">Hello</a:g2>'
721            '</a:g1>')
722
723        parser = make_parser()
724        parser.setFeature(feature_namespaces, True)
725        result = self.ioclass()
726        gen = XMLGenerator(result)
727        parser.setContentHandler(gen)
728        parser.parse(test_xml)
729
730        self.assertEqual(result.getvalue(),
731                         self.xml(
732                         '<a:g1 xmlns:a="http://example.com/ns">'
733                          '<a:g2 xml:lang="en">Hello</a:g2>'
734                         '</a:g1>'))
735
736    def test_5027_2(self):
737        # The xml prefix (as in xml:lang below) is reserved and bound by
738        # definition to http://www.w3.org/XML/1998/namespace.  XMLGenerator had
739        # a bug whereby a KeyError is raised because this namespace is missing
740        # from a dictionary.
741        #
742        # This test demonstrates the bug by direct manipulation of the
743        # XMLGenerator.
744        result = self.ioclass()
745        gen = XMLGenerator(result)
746
747        gen.startDocument()
748        gen.startPrefixMapping('a', 'http://example.com/ns')
749        gen.startElementNS(('http://example.com/ns', 'g1'), 'g1', {})
750        lang_attr = {('http://www.w3.org/XML/1998/namespace', 'lang'): 'en'}
751        gen.startElementNS(('http://example.com/ns', 'g2'), 'g2', lang_attr)
752        gen.characters('Hello')
753        gen.endElementNS(('http://example.com/ns', 'g2'), 'g2')
754        gen.endElementNS(('http://example.com/ns', 'g1'), 'g1')
755        gen.endPrefixMapping('a')
756        gen.endDocument()
757
758        self.assertEqual(result.getvalue(),
759                         self.xml(
760                         '<a:g1 xmlns:a="http://example.com/ns">'
761                          '<a:g2 xml:lang="en">Hello</a:g2>'
762                         '</a:g1>'))
763
764    def test_no_close_file(self):
765        result = self.ioclass()
766        def func(out):
767            gen = XMLGenerator(out)
768            gen.startDocument()
769            gen.startElement("doc", {})
770        func(result)
771        self.assertFalse(result.closed)
772
773    def test_xmlgen_fragment(self):
774        result = self.ioclass()
775        gen = XMLGenerator(result)
776
777        # Don't call gen.startDocument()
778        gen.startElement("foo", {"a": "1.0"})
779        gen.characters("Hello")
780        gen.endElement("foo")
781        gen.startElement("bar", {"b": "2.0"})
782        gen.endElement("bar")
783        # Don't call gen.endDocument()
784
785        self.assertEqual(result.getvalue(),
786            self.xml('<foo a="1.0">Hello</foo><bar b="2.0"></bar>')[len(self.xml('')):])
787
788class StringXmlgenTest(XmlgenTest, unittest.TestCase):
789    ioclass = StringIO
790
791    def xml(self, doc, encoding='iso-8859-1'):
792        return '<?xml version="1.0" encoding="%s"?>\n%s' % (encoding, doc)
793
794    test_xmlgen_unencodable = None
795
796class BytesXmlgenTest(XmlgenTest, unittest.TestCase):
797    ioclass = BytesIO
798
799    def xml(self, doc, encoding='iso-8859-1'):
800        return ('<?xml version="1.0" encoding="%s"?>\n%s' %
801                (encoding, doc)).encode(encoding, 'xmlcharrefreplace')
802
803class WriterXmlgenTest(BytesXmlgenTest):
804    class ioclass(list):
805        write = list.append
806        closed = False
807
808        def seekable(self):
809            return True
810
811        def tell(self):
812            # return 0 at start and not 0 after start
813            return len(self)
814
815        def getvalue(self):
816            return b''.join(self)
817
818class StreamWriterXmlgenTest(XmlgenTest, unittest.TestCase):
819    def ioclass(self):
820        raw = BytesIO()
821        writer = codecs.getwriter('ascii')(raw, 'xmlcharrefreplace')
822        writer.getvalue = raw.getvalue
823        return writer
824
825    def xml(self, doc, encoding='iso-8859-1'):
826        return ('<?xml version="1.0" encoding="%s"?>\n%s' %
827                (encoding, doc)).encode('ascii', 'xmlcharrefreplace')
828
829class StreamReaderWriterXmlgenTest(XmlgenTest, unittest.TestCase):
830    fname = os_helper.TESTFN + '-codecs'
831
832    def ioclass(self):
833        writer = codecs.open(self.fname, 'w', encoding='ascii',
834                             errors='xmlcharrefreplace', buffering=0)
835        def cleanup():
836            writer.close()
837            os_helper.unlink(self.fname)
838        self.addCleanup(cleanup)
839        def getvalue():
840            # Windows will not let use reopen without first closing
841            writer.close()
842            with open(writer.name, 'rb') as f:
843                return f.read()
844        writer.getvalue = getvalue
845        return writer
846
847    def xml(self, doc, encoding='iso-8859-1'):
848        return ('<?xml version="1.0" encoding="%s"?>\n%s' %
849                (encoding, doc)).encode('ascii', 'xmlcharrefreplace')
850
851start = b'<?xml version="1.0" encoding="iso-8859-1"?>\n'
852
853
854class XMLFilterBaseTest(unittest.TestCase):
855    def test_filter_basic(self):
856        result = BytesIO()
857        gen = XMLGenerator(result)
858        filter = XMLFilterBase()
859        filter.setContentHandler(gen)
860
861        filter.startDocument()
862        filter.startElement("doc", {})
863        filter.characters("content")
864        filter.ignorableWhitespace(" ")
865        filter.endElement("doc")
866        filter.endDocument()
867
868        self.assertEqual(result.getvalue(), start + b"<doc>content </doc>")
869
870# ===========================================================================
871#
872#   expatreader tests
873#
874# ===========================================================================
875
876with open(TEST_XMLFILE_OUT, 'rb') as f:
877    xml_test_out = f.read()
878
879class ExpatReaderTest(XmlTestBase):
880
881    # ===== XMLReader support
882
883    def test_expat_binary_file(self):
884        parser = create_parser()
885        result = BytesIO()
886        xmlgen = XMLGenerator(result)
887
888        parser.setContentHandler(xmlgen)
889        with open(TEST_XMLFILE, 'rb') as f:
890            parser.parse(f)
891
892        self.assertEqual(result.getvalue(), xml_test_out)
893
894    def test_expat_text_file(self):
895        parser = create_parser()
896        result = BytesIO()
897        xmlgen = XMLGenerator(result)
898
899        parser.setContentHandler(xmlgen)
900        with open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f:
901            parser.parse(f)
902
903        self.assertEqual(result.getvalue(), xml_test_out)
904
905    @requires_nonascii_filenames
906    def test_expat_binary_file_nonascii(self):
907        fname = os_helper.TESTFN_UNICODE
908        shutil.copyfile(TEST_XMLFILE, fname)
909        self.addCleanup(os_helper.unlink, fname)
910
911        parser = create_parser()
912        result = BytesIO()
913        xmlgen = XMLGenerator(result)
914
915        parser.setContentHandler(xmlgen)
916        parser.parse(open(fname, 'rb'))
917
918        self.assertEqual(result.getvalue(), xml_test_out)
919
920    def test_expat_binary_file_bytes_name(self):
921        fname = os.fsencode(TEST_XMLFILE)
922        parser = create_parser()
923        result = BytesIO()
924        xmlgen = XMLGenerator(result)
925
926        parser.setContentHandler(xmlgen)
927        with open(fname, 'rb') as f:
928            parser.parse(f)
929
930        self.assertEqual(result.getvalue(), xml_test_out)
931
932    def test_expat_binary_file_int_name(self):
933        parser = create_parser()
934        result = BytesIO()
935        xmlgen = XMLGenerator(result)
936
937        parser.setContentHandler(xmlgen)
938        with open(TEST_XMLFILE, 'rb') as f:
939            with open(f.fileno(), 'rb', closefd=False) as f2:
940                parser.parse(f2)
941
942        self.assertEqual(result.getvalue(), xml_test_out)
943
944    # ===== DTDHandler support
945
946    class TestDTDHandler:
947
948        def __init__(self):
949            self._notations = []
950            self._entities  = []
951
952        def notationDecl(self, name, publicId, systemId):
953            self._notations.append((name, publicId, systemId))
954
955        def unparsedEntityDecl(self, name, publicId, systemId, ndata):
956            self._entities.append((name, publicId, systemId, ndata))
957
958
959    class TestEntityRecorder:
960        def __init__(self):
961            self.entities = []
962
963        def resolveEntity(self, publicId, systemId):
964            self.entities.append((publicId, systemId))
965            source = InputSource()
966            source.setPublicId(publicId)
967            source.setSystemId(systemId)
968            return source
969
970    def test_expat_dtdhandler(self):
971        parser = create_parser()
972        handler = self.TestDTDHandler()
973        parser.setDTDHandler(handler)
974
975        parser.feed('<!DOCTYPE doc [\n')
976        parser.feed('  <!ENTITY img SYSTEM "expat.gif" NDATA GIF>\n')
977        parser.feed('  <!NOTATION GIF PUBLIC "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN">\n')
978        parser.feed(']>\n')
979        parser.feed('<doc></doc>')
980        parser.close()
981
982        self.assertEqual(handler._notations,
983            [("GIF", "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN", None)])
984        self.assertEqual(handler._entities, [("img", None, "expat.gif", "GIF")])
985
986    def test_expat_external_dtd_enabled(self):
987        # clear _opener global variable
988        self.addCleanup(urllib.request.urlcleanup)
989
990        parser = create_parser()
991        parser.setFeature(feature_external_ges, True)
992        resolver = self.TestEntityRecorder()
993        parser.setEntityResolver(resolver)
994
995        with self.assertRaises(URLError):
996            parser.feed(
997                '<!DOCTYPE external SYSTEM "unsupported://non-existing">\n'
998            )
999        self.assertEqual(
1000            resolver.entities, [(None, 'unsupported://non-existing')]
1001        )
1002
1003    def test_expat_external_dtd_default(self):
1004        parser = create_parser()
1005        resolver = self.TestEntityRecorder()
1006        parser.setEntityResolver(resolver)
1007
1008        parser.feed(
1009            '<!DOCTYPE external SYSTEM "unsupported://non-existing">\n'
1010        )
1011        parser.feed('<doc />')
1012        parser.close()
1013        self.assertEqual(resolver.entities, [])
1014
1015    # ===== EntityResolver support
1016
1017    class TestEntityResolver:
1018
1019        def resolveEntity(self, publicId, systemId):
1020            inpsrc = InputSource()
1021            inpsrc.setByteStream(BytesIO(b"<entity/>"))
1022            return inpsrc
1023
1024    def test_expat_entityresolver_enabled(self):
1025        parser = create_parser()
1026        parser.setFeature(feature_external_ges, True)
1027        parser.setEntityResolver(self.TestEntityResolver())
1028        result = BytesIO()
1029        parser.setContentHandler(XMLGenerator(result))
1030
1031        parser.feed('<!DOCTYPE doc [\n')
1032        parser.feed('  <!ENTITY test SYSTEM "whatever">\n')
1033        parser.feed(']>\n')
1034        parser.feed('<doc>&test;</doc>')
1035        parser.close()
1036
1037        self.assertEqual(result.getvalue(), start +
1038                         b"<doc><entity></entity></doc>")
1039
1040    def test_expat_entityresolver_default(self):
1041        parser = create_parser()
1042        self.assertEqual(parser.getFeature(feature_external_ges), False)
1043        parser.setEntityResolver(self.TestEntityResolver())
1044        result = BytesIO()
1045        parser.setContentHandler(XMLGenerator(result))
1046
1047        parser.feed('<!DOCTYPE doc [\n')
1048        parser.feed('  <!ENTITY test SYSTEM "whatever">\n')
1049        parser.feed(']>\n')
1050        parser.feed('<doc>&test;</doc>')
1051        parser.close()
1052
1053        self.assertEqual(result.getvalue(), start +
1054                         b"<doc></doc>")
1055
1056    # ===== Attributes support
1057
1058    class AttrGatherer(ContentHandler):
1059
1060        def startElement(self, name, attrs):
1061            self._attrs = attrs
1062
1063        def startElementNS(self, name, qname, attrs):
1064            self._attrs = attrs
1065
1066    def test_expat_attrs_empty(self):
1067        parser = create_parser()
1068        gather = self.AttrGatherer()
1069        parser.setContentHandler(gather)
1070
1071        parser.feed("<doc/>")
1072        parser.close()
1073
1074        self.verify_empty_attrs(gather._attrs)
1075
1076    def test_expat_attrs_wattr(self):
1077        parser = create_parser()
1078        gather = self.AttrGatherer()
1079        parser.setContentHandler(gather)
1080
1081        parser.feed("<doc attr='val'/>")
1082        parser.close()
1083
1084        self.verify_attrs_wattr(gather._attrs)
1085
1086    def test_expat_nsattrs_empty(self):
1087        parser = create_parser(1)
1088        gather = self.AttrGatherer()
1089        parser.setContentHandler(gather)
1090
1091        parser.feed("<doc/>")
1092        parser.close()
1093
1094        self.verify_empty_nsattrs(gather._attrs)
1095
1096    def test_expat_nsattrs_wattr(self):
1097        parser = create_parser(1)
1098        gather = self.AttrGatherer()
1099        parser.setContentHandler(gather)
1100
1101        parser.feed("<doc xmlns:ns='%s' ns:attr='val'/>" % ns_uri)
1102        parser.close()
1103
1104        attrs = gather._attrs
1105
1106        self.assertEqual(attrs.getLength(), 1)
1107        self.assertEqual(attrs.getNames(), [(ns_uri, "attr")])
1108        self.assertTrue((attrs.getQNames() == [] or
1109                         attrs.getQNames() == ["ns:attr"]))
1110        self.assertEqual(len(attrs), 1)
1111        self.assertIn((ns_uri, "attr"), attrs)
1112        self.assertEqual(attrs.get((ns_uri, "attr")), "val")
1113        self.assertEqual(attrs.get((ns_uri, "attr"), 25), "val")
1114        self.assertEqual(list(attrs.items()), [((ns_uri, "attr"), "val")])
1115        self.assertEqual(list(attrs.values()), ["val"])
1116        self.assertEqual(attrs.getValue((ns_uri, "attr")), "val")
1117        self.assertEqual(attrs[(ns_uri, "attr")], "val")
1118
1119    # ===== InputSource support
1120
1121    def test_expat_inpsource_filename(self):
1122        parser = create_parser()
1123        result = BytesIO()
1124        xmlgen = XMLGenerator(result)
1125
1126        parser.setContentHandler(xmlgen)
1127        parser.parse(TEST_XMLFILE)
1128
1129        self.assertEqual(result.getvalue(), xml_test_out)
1130
1131    def test_expat_inpsource_sysid(self):
1132        parser = create_parser()
1133        result = BytesIO()
1134        xmlgen = XMLGenerator(result)
1135
1136        parser.setContentHandler(xmlgen)
1137        parser.parse(InputSource(TEST_XMLFILE))
1138
1139        self.assertEqual(result.getvalue(), xml_test_out)
1140
1141    @requires_nonascii_filenames
1142    def test_expat_inpsource_sysid_nonascii(self):
1143        fname = os_helper.TESTFN_UNICODE
1144        shutil.copyfile(TEST_XMLFILE, fname)
1145        self.addCleanup(os_helper.unlink, fname)
1146
1147        parser = create_parser()
1148        result = BytesIO()
1149        xmlgen = XMLGenerator(result)
1150
1151        parser.setContentHandler(xmlgen)
1152        parser.parse(InputSource(fname))
1153
1154        self.assertEqual(result.getvalue(), xml_test_out)
1155
1156    def test_expat_inpsource_byte_stream(self):
1157        parser = create_parser()
1158        result = BytesIO()
1159        xmlgen = XMLGenerator(result)
1160
1161        parser.setContentHandler(xmlgen)
1162        inpsrc = InputSource()
1163        with open(TEST_XMLFILE, 'rb') as f:
1164            inpsrc.setByteStream(f)
1165            parser.parse(inpsrc)
1166
1167        self.assertEqual(result.getvalue(), xml_test_out)
1168
1169    def test_expat_inpsource_character_stream(self):
1170        parser = create_parser()
1171        result = BytesIO()
1172        xmlgen = XMLGenerator(result)
1173
1174        parser.setContentHandler(xmlgen)
1175        inpsrc = InputSource()
1176        with open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f:
1177            inpsrc.setCharacterStream(f)
1178            parser.parse(inpsrc)
1179
1180        self.assertEqual(result.getvalue(), xml_test_out)
1181
1182    # ===== IncrementalParser support
1183
1184    def test_expat_incremental(self):
1185        result = BytesIO()
1186        xmlgen = XMLGenerator(result)
1187        parser = create_parser()
1188        parser.setContentHandler(xmlgen)
1189
1190        parser.feed("<doc>")
1191        parser.feed("</doc>")
1192        parser.close()
1193
1194        self.assertEqual(result.getvalue(), start + b"<doc></doc>")
1195
1196    def test_expat_incremental_reset(self):
1197        result = BytesIO()
1198        xmlgen = XMLGenerator(result)
1199        parser = create_parser()
1200        parser.setContentHandler(xmlgen)
1201
1202        parser.feed("<doc>")
1203        parser.feed("text")
1204
1205        result = BytesIO()
1206        xmlgen = XMLGenerator(result)
1207        parser.setContentHandler(xmlgen)
1208        parser.reset()
1209
1210        parser.feed("<doc>")
1211        parser.feed("text")
1212        parser.feed("</doc>")
1213        parser.close()
1214
1215        self.assertEqual(result.getvalue(), start + b"<doc>text</doc>")
1216
1217    # ===== Locator support
1218
1219    def test_expat_locator_noinfo(self):
1220        result = BytesIO()
1221        xmlgen = XMLGenerator(result)
1222        parser = create_parser()
1223        parser.setContentHandler(xmlgen)
1224
1225        parser.feed("<doc>")
1226        parser.feed("</doc>")
1227        parser.close()
1228
1229        self.assertEqual(parser.getSystemId(), None)
1230        self.assertEqual(parser.getPublicId(), None)
1231        self.assertEqual(parser.getLineNumber(), 1)
1232
1233    def test_expat_locator_withinfo(self):
1234        result = BytesIO()
1235        xmlgen = XMLGenerator(result)
1236        parser = create_parser()
1237        parser.setContentHandler(xmlgen)
1238        parser.parse(TEST_XMLFILE)
1239
1240        self.assertEqual(parser.getSystemId(), TEST_XMLFILE)
1241        self.assertEqual(parser.getPublicId(), None)
1242
1243    @requires_nonascii_filenames
1244    def test_expat_locator_withinfo_nonascii(self):
1245        fname = os_helper.TESTFN_UNICODE
1246        shutil.copyfile(TEST_XMLFILE, fname)
1247        self.addCleanup(os_helper.unlink, fname)
1248
1249        result = BytesIO()
1250        xmlgen = XMLGenerator(result)
1251        parser = create_parser()
1252        parser.setContentHandler(xmlgen)
1253        parser.parse(fname)
1254
1255        self.assertEqual(parser.getSystemId(), fname)
1256        self.assertEqual(parser.getPublicId(), None)
1257
1258
1259# ===========================================================================
1260#
1261#   error reporting
1262#
1263# ===========================================================================
1264
1265class ErrorReportingTest(unittest.TestCase):
1266    def test_expat_inpsource_location(self):
1267        parser = create_parser()
1268        parser.setContentHandler(ContentHandler()) # do nothing
1269        source = InputSource()
1270        source.setByteStream(BytesIO(b"<foo bar foobar>"))   #ill-formed
1271        name = "a file name"
1272        source.setSystemId(name)
1273        try:
1274            parser.parse(source)
1275            self.fail()
1276        except SAXException as e:
1277            self.assertEqual(e.getSystemId(), name)
1278
1279    def test_expat_incomplete(self):
1280        parser = create_parser()
1281        parser.setContentHandler(ContentHandler()) # do nothing
1282        self.assertRaises(SAXParseException, parser.parse, StringIO("<foo>"))
1283        self.assertEqual(parser.getColumnNumber(), 5)
1284        self.assertEqual(parser.getLineNumber(), 1)
1285
1286    def test_sax_parse_exception_str(self):
1287        # pass various values from a locator to the SAXParseException to
1288        # make sure that the __str__() doesn't fall apart when None is
1289        # passed instead of an integer line and column number
1290        #
1291        # use "normal" values for the locator:
1292        str(SAXParseException("message", None,
1293                              self.DummyLocator(1, 1)))
1294        # use None for the line number:
1295        str(SAXParseException("message", None,
1296                              self.DummyLocator(None, 1)))
1297        # use None for the column number:
1298        str(SAXParseException("message", None,
1299                              self.DummyLocator(1, None)))
1300        # use None for both:
1301        str(SAXParseException("message", None,
1302                              self.DummyLocator(None, None)))
1303
1304    class DummyLocator:
1305        def __init__(self, lineno, colno):
1306            self._lineno = lineno
1307            self._colno = colno
1308
1309        def getPublicId(self):
1310            return "pubid"
1311
1312        def getSystemId(self):
1313            return "sysid"
1314
1315        def getLineNumber(self):
1316            return self._lineno
1317
1318        def getColumnNumber(self):
1319            return self._colno
1320
1321# ===========================================================================
1322#
1323#   xmlreader tests
1324#
1325# ===========================================================================
1326
1327class XmlReaderTest(XmlTestBase):
1328
1329    # ===== AttributesImpl
1330    def test_attrs_empty(self):
1331        self.verify_empty_attrs(AttributesImpl({}))
1332
1333    def test_attrs_wattr(self):
1334        self.verify_attrs_wattr(AttributesImpl({"attr" : "val"}))
1335
1336    def test_nsattrs_empty(self):
1337        self.verify_empty_nsattrs(AttributesNSImpl({}, {}))
1338
1339    def test_nsattrs_wattr(self):
1340        attrs = AttributesNSImpl({(ns_uri, "attr") : "val"},
1341                                 {(ns_uri, "attr") : "ns:attr"})
1342
1343        self.assertEqual(attrs.getLength(), 1)
1344        self.assertEqual(attrs.getNames(), [(ns_uri, "attr")])
1345        self.assertEqual(attrs.getQNames(), ["ns:attr"])
1346        self.assertEqual(len(attrs), 1)
1347        self.assertIn((ns_uri, "attr"), attrs)
1348        self.assertEqual(list(attrs.keys()), [(ns_uri, "attr")])
1349        self.assertEqual(attrs.get((ns_uri, "attr")), "val")
1350        self.assertEqual(attrs.get((ns_uri, "attr"), 25), "val")
1351        self.assertEqual(list(attrs.items()), [((ns_uri, "attr"), "val")])
1352        self.assertEqual(list(attrs.values()), ["val"])
1353        self.assertEqual(attrs.getValue((ns_uri, "attr")), "val")
1354        self.assertEqual(attrs.getValueByQName("ns:attr"), "val")
1355        self.assertEqual(attrs.getNameByQName("ns:attr"), (ns_uri, "attr"))
1356        self.assertEqual(attrs[(ns_uri, "attr")], "val")
1357        self.assertEqual(attrs.getQNameByName((ns_uri, "attr")), "ns:attr")
1358
1359
1360class LexicalHandlerTest(unittest.TestCase):
1361    def setUp(self):
1362        self.parser = None
1363
1364        self.specified_version = '1.0'
1365        self.specified_encoding = 'UTF-8'
1366        self.specified_doctype = 'wish'
1367        self.specified_entity_names = ('nbsp', 'source', 'target')
1368        self.specified_comment = ('Comment in a DTD',
1369                                  'Really! You think so?')
1370        self.test_data = StringIO()
1371        self.test_data.write('<?xml version="{}" encoding="{}"?>\n'.
1372                             format(self.specified_version,
1373                                    self.specified_encoding))
1374        self.test_data.write('<!DOCTYPE {} [\n'.
1375                             format(self.specified_doctype))
1376        self.test_data.write('<!-- {} -->\n'.
1377                             format(self.specified_comment[0]))
1378        self.test_data.write('<!ELEMENT {} (to,from,heading,body,footer)>\n'.
1379                             format(self.specified_doctype))
1380        self.test_data.write('<!ELEMENT to (#PCDATA)>\n')
1381        self.test_data.write('<!ELEMENT from (#PCDATA)>\n')
1382        self.test_data.write('<!ELEMENT heading (#PCDATA)>\n')
1383        self.test_data.write('<!ELEMENT body (#PCDATA)>\n')
1384        self.test_data.write('<!ELEMENT footer (#PCDATA)>\n')
1385        self.test_data.write('<!ENTITY {} "&#xA0;">\n'.
1386                             format(self.specified_entity_names[0]))
1387        self.test_data.write('<!ENTITY {} "Written by: Alexander.">\n'.
1388                             format(self.specified_entity_names[1]))
1389        self.test_data.write('<!ENTITY {} "Hope it gets to: Aristotle.">\n'.
1390                             format(self.specified_entity_names[2]))
1391        self.test_data.write(']>\n')
1392        self.test_data.write('<{}>'.format(self.specified_doctype))
1393        self.test_data.write('<to>Aristotle</to>\n')
1394        self.test_data.write('<from>Alexander</from>\n')
1395        self.test_data.write('<heading>Supplication</heading>\n')
1396        self.test_data.write('<body>Teach me patience!</body>\n')
1397        self.test_data.write('<footer>&{};&{};&{};</footer>\n'.
1398                             format(self.specified_entity_names[1],
1399                                    self.specified_entity_names[0],
1400                                    self.specified_entity_names[2]))
1401        self.test_data.write('<!-- {} -->\n'.format(self.specified_comment[1]))
1402        self.test_data.write('</{}>\n'.format(self.specified_doctype))
1403        self.test_data.seek(0)
1404
1405        # Data received from handlers - to be validated
1406        self.version = None
1407        self.encoding = None
1408        self.standalone = None
1409        self.doctype = None
1410        self.publicID = None
1411        self.systemID = None
1412        self.end_of_dtd = False
1413        self.comments = []
1414
1415    def test_handlers(self):
1416        class TestLexicalHandler(LexicalHandler):
1417            def __init__(self, test_harness, *args, **kwargs):
1418                super().__init__(*args, **kwargs)
1419                self.test_harness = test_harness
1420
1421            def startDTD(self, doctype, publicID, systemID):
1422                self.test_harness.doctype = doctype
1423                self.test_harness.publicID = publicID
1424                self.test_harness.systemID = systemID
1425
1426            def endDTD(self):
1427                self.test_harness.end_of_dtd = True
1428
1429            def comment(self, text):
1430                self.test_harness.comments.append(text)
1431
1432        self.parser = create_parser()
1433        self.parser.setContentHandler(ContentHandler())
1434        self.parser.setProperty(
1435            'http://xml.org/sax/properties/lexical-handler',
1436            TestLexicalHandler(self))
1437        source = InputSource()
1438        source.setCharacterStream(self.test_data)
1439        self.parser.parse(source)
1440        self.assertEqual(self.doctype, self.specified_doctype)
1441        self.assertIsNone(self.publicID)
1442        self.assertIsNone(self.systemID)
1443        self.assertTrue(self.end_of_dtd)
1444        self.assertEqual(len(self.comments),
1445                         len(self.specified_comment))
1446        self.assertEqual(f' {self.specified_comment[0]} ', self.comments[0])
1447
1448
1449class CDATAHandlerTest(unittest.TestCase):
1450    def setUp(self):
1451        self.parser = None
1452        self.specified_chars = []
1453        self.specified_chars.append(('Parseable character data', False))
1454        self.specified_chars.append(('<> &% - assorted other XML junk.', True))
1455        self.char_index = 0  # Used to index specified results within handlers
1456        self.test_data = StringIO()
1457        self.test_data.write('<root_doc>\n')
1458        self.test_data.write('<some_pcdata>\n')
1459        self.test_data.write(f'{self.specified_chars[0][0]}\n')
1460        self.test_data.write('</some_pcdata>\n')
1461        self.test_data.write('<some_cdata>\n')
1462        self.test_data.write(f'<![CDATA[{self.specified_chars[1][0]}]]>\n')
1463        self.test_data.write('</some_cdata>\n')
1464        self.test_data.write('</root_doc>\n')
1465        self.test_data.seek(0)
1466
1467        # Data received from handlers - to be validated
1468        self.chardata = []
1469        self.in_cdata = False
1470
1471    def test_handlers(self):
1472        class TestLexicalHandler(LexicalHandler):
1473            def __init__(self, test_harness, *args, **kwargs):
1474                super().__init__(*args, **kwargs)
1475                self.test_harness = test_harness
1476
1477            def startCDATA(self):
1478                self.test_harness.in_cdata = True
1479
1480            def endCDATA(self):
1481                self.test_harness.in_cdata = False
1482
1483        class TestCharHandler(ContentHandler):
1484            def __init__(self, test_harness, *args, **kwargs):
1485                super().__init__(*args, **kwargs)
1486                self.test_harness = test_harness
1487
1488            def characters(self, content):
1489                if content != '\n':
1490                    h = self.test_harness
1491                    t = h.specified_chars[h.char_index]
1492                    h.assertEqual(t[0], content)
1493                    h.assertEqual(t[1], h.in_cdata)
1494                    h.char_index += 1
1495
1496        self.parser = create_parser()
1497        self.parser.setContentHandler(TestCharHandler(self))
1498        self.parser.setProperty(
1499            'http://xml.org/sax/properties/lexical-handler',
1500            TestLexicalHandler(self))
1501        source = InputSource()
1502        source.setCharacterStream(self.test_data)
1503        self.parser.parse(source)
1504
1505        self.assertFalse(self.in_cdata)
1506        self.assertEqual(self.char_index, 2)
1507
1508
1509if __name__ == "__main__":
1510    unittest.main()
1511