1"""Regression tests for what was in Python 2's "urllib" module"""
2
3import urllib.parse
4import urllib.request
5import urllib.error
6import http.client
7import email.message
8import io
9import unittest
10from unittest.mock import patch
11from test import support
12from test.support import os_helper
13from test.support import warnings_helper
14import os
15try:
16    import ssl
17except ImportError:
18    ssl = None
19import sys
20import tempfile
21from nturl2path import url2pathname, pathname2url
22
23from base64 import b64encode
24import collections
25
26
27def hexescape(char):
28    """Escape char as RFC 2396 specifies"""
29    hex_repr = hex(ord(char))[2:].upper()
30    if len(hex_repr) == 1:
31        hex_repr = "0%s" % hex_repr
32    return "%" + hex_repr
33
34# Shortcut for testing FancyURLopener
35_urlopener = None
36
37
38def urlopen(url, data=None, proxies=None):
39    """urlopen(url [, data]) -> open file-like object"""
40    global _urlopener
41    if proxies is not None:
42        opener = urllib.request.FancyURLopener(proxies=proxies)
43    elif not _urlopener:
44        opener = FancyURLopener()
45        _urlopener = opener
46    else:
47        opener = _urlopener
48    if data is None:
49        return opener.open(url)
50    else:
51        return opener.open(url, data)
52
53
54def FancyURLopener():
55    with warnings_helper.check_warnings(
56            ('FancyURLopener style of invoking requests is deprecated.',
57            DeprecationWarning)):
58        return urllib.request.FancyURLopener()
59
60
61def fakehttp(fakedata, mock_close=False):
62    class FakeSocket(io.BytesIO):
63        io_refs = 1
64
65        def sendall(self, data):
66            FakeHTTPConnection.buf = data
67
68        def makefile(self, *args, **kwds):
69            self.io_refs += 1
70            return self
71
72        def read(self, amt=None):
73            if self.closed:
74                return b""
75            return io.BytesIO.read(self, amt)
76
77        def readline(self, length=None):
78            if self.closed:
79                return b""
80            return io.BytesIO.readline(self, length)
81
82        def close(self):
83            self.io_refs -= 1
84            if self.io_refs == 0:
85                io.BytesIO.close(self)
86
87    class FakeHTTPConnection(http.client.HTTPConnection):
88
89        # buffer to store data for verification in urlopen tests.
90        buf = None
91
92        def connect(self):
93            self.sock = FakeSocket(self.fakedata)
94            type(self).fakesock = self.sock
95
96        if mock_close:
97            # bpo-36918: HTTPConnection destructor calls close() which calls
98            # flush(). Problem: flush() calls self.fp.flush() which raises
99            # "ValueError: I/O operation on closed file" which is logged as an
100            # "Exception ignored in". Override close() to silence this error.
101            def close(self):
102                pass
103    FakeHTTPConnection.fakedata = fakedata
104
105    return FakeHTTPConnection
106
107
108class FakeHTTPMixin(object):
109    def fakehttp(self, fakedata, mock_close=False):
110        fake_http_class = fakehttp(fakedata, mock_close=mock_close)
111        self._connection_class = http.client.HTTPConnection
112        http.client.HTTPConnection = fake_http_class
113
114    def unfakehttp(self):
115        http.client.HTTPConnection = self._connection_class
116
117
118class FakeFTPMixin(object):
119    def fakeftp(self):
120        class FakeFtpWrapper(object):
121            def __init__(self,  user, passwd, host, port, dirs, timeout=None,
122                     persistent=True):
123                pass
124
125            def retrfile(self, file, type):
126                return io.BytesIO(), 0
127
128            def close(self):
129                pass
130
131        self._ftpwrapper_class = urllib.request.ftpwrapper
132        urllib.request.ftpwrapper = FakeFtpWrapper
133
134    def unfakeftp(self):
135        urllib.request.ftpwrapper = self._ftpwrapper_class
136
137
138class urlopen_FileTests(unittest.TestCase):
139    """Test urlopen() opening a temporary file.
140
141    Try to test as much functionality as possible so as to cut down on reliance
142    on connecting to the Net for testing.
143
144    """
145
146    def setUp(self):
147        # Create a temp file to use for testing
148        self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
149                          "ascii")
150        f = open(os_helper.TESTFN, 'wb')
151        try:
152            f.write(self.text)
153        finally:
154            f.close()
155        self.pathname = os_helper.TESTFN
156        self.quoted_pathname = urllib.parse.quote(self.pathname)
157        self.returned_obj = urlopen("file:%s" % self.quoted_pathname)
158
159    def tearDown(self):
160        """Shut down the open object"""
161        self.returned_obj.close()
162        os.remove(os_helper.TESTFN)
163
164    def test_interface(self):
165        # Make sure object returned by urlopen() has the specified methods
166        for attr in ("read", "readline", "readlines", "fileno",
167                     "close", "info", "geturl", "getcode", "__iter__"):
168            self.assertTrue(hasattr(self.returned_obj, attr),
169                         "object returned by urlopen() lacks %s attribute" %
170                         attr)
171
172    def test_read(self):
173        self.assertEqual(self.text, self.returned_obj.read())
174
175    def test_readline(self):
176        self.assertEqual(self.text, self.returned_obj.readline())
177        self.assertEqual(b'', self.returned_obj.readline(),
178                         "calling readline() after exhausting the file did not"
179                         " return an empty string")
180
181    def test_readlines(self):
182        lines_list = self.returned_obj.readlines()
183        self.assertEqual(len(lines_list), 1,
184                         "readlines() returned the wrong number of lines")
185        self.assertEqual(lines_list[0], self.text,
186                         "readlines() returned improper text")
187
188    def test_fileno(self):
189        file_num = self.returned_obj.fileno()
190        self.assertIsInstance(file_num, int, "fileno() did not return an int")
191        self.assertEqual(os.read(file_num, len(self.text)), self.text,
192                         "Reading on the file descriptor returned by fileno() "
193                         "did not return the expected text")
194
195    def test_close(self):
196        # Test close() by calling it here and then having it be called again
197        # by the tearDown() method for the test
198        self.returned_obj.close()
199
200    def test_headers(self):
201        self.assertIsInstance(self.returned_obj.headers, email.message.Message)
202
203    def test_url(self):
204        self.assertEqual(self.returned_obj.url, self.quoted_pathname)
205
206    def test_status(self):
207        self.assertIsNone(self.returned_obj.status)
208
209    def test_info(self):
210        self.assertIsInstance(self.returned_obj.info(), email.message.Message)
211
212    def test_geturl(self):
213        self.assertEqual(self.returned_obj.geturl(), self.quoted_pathname)
214
215    def test_getcode(self):
216        self.assertIsNone(self.returned_obj.getcode())
217
218    def test_iter(self):
219        # Test iterator
220        # Don't need to count number of iterations since test would fail the
221        # instant it returned anything beyond the first line from the
222        # comparison.
223        # Use the iterator in the usual implicit way to test for ticket #4608.
224        for line in self.returned_obj:
225            self.assertEqual(line, self.text)
226
227    def test_relativelocalfile(self):
228        self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname)
229
230
231class ProxyTests(unittest.TestCase):
232
233    def setUp(self):
234        # Records changes to env vars
235        self.env = os_helper.EnvironmentVarGuard()
236        # Delete all proxy related env vars
237        for k in list(os.environ):
238            if 'proxy' in k.lower():
239                self.env.unset(k)
240
241    def tearDown(self):
242        # Restore all proxy related env vars
243        self.env.__exit__()
244        del self.env
245
246    def test_getproxies_environment_keep_no_proxies(self):
247        self.env.set('NO_PROXY', 'localhost')
248        proxies = urllib.request.getproxies_environment()
249        # getproxies_environment use lowered case truncated (no '_proxy') keys
250        self.assertEqual('localhost', proxies['no'])
251        # List of no_proxies with space.
252        self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234')
253        self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
254        self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com:8888'))
255        self.assertTrue(urllib.request.proxy_bypass_environment('newdomain.com:1234'))
256
257    def test_proxy_cgi_ignore(self):
258        try:
259            self.env.set('HTTP_PROXY', 'http://somewhere:3128')
260            proxies = urllib.request.getproxies_environment()
261            self.assertEqual('http://somewhere:3128', proxies['http'])
262            self.env.set('REQUEST_METHOD', 'GET')
263            proxies = urllib.request.getproxies_environment()
264            self.assertNotIn('http', proxies)
265        finally:
266            self.env.unset('REQUEST_METHOD')
267            self.env.unset('HTTP_PROXY')
268
269    def test_proxy_bypass_environment_host_match(self):
270        bypass = urllib.request.proxy_bypass_environment
271        self.env.set('NO_PROXY',
272                     'localhost, anotherdomain.com, newdomain.com:1234, .d.o.t')
273        self.assertTrue(bypass('localhost'))
274        self.assertTrue(bypass('LocalHost'))                 # MixedCase
275        self.assertTrue(bypass('LOCALHOST'))                 # UPPERCASE
276        self.assertTrue(bypass('.localhost'))
277        self.assertTrue(bypass('newdomain.com:1234'))
278        self.assertTrue(bypass('.newdomain.com:1234'))
279        self.assertTrue(bypass('foo.d.o.t'))                 # issue 29142
280        self.assertTrue(bypass('d.o.t'))
281        self.assertTrue(bypass('anotherdomain.com:8888'))
282        self.assertTrue(bypass('.anotherdomain.com:8888'))
283        self.assertTrue(bypass('www.newdomain.com:1234'))
284        self.assertFalse(bypass('prelocalhost'))
285        self.assertFalse(bypass('newdomain.com'))            # no port
286        self.assertFalse(bypass('newdomain.com:1235'))       # wrong port
287
288    def test_proxy_bypass_environment_always_match(self):
289        bypass = urllib.request.proxy_bypass_environment
290        self.env.set('NO_PROXY', '*')
291        self.assertTrue(bypass('newdomain.com'))
292        self.assertTrue(bypass('newdomain.com:1234'))
293        self.env.set('NO_PROXY', '*, anotherdomain.com')
294        self.assertTrue(bypass('anotherdomain.com'))
295        self.assertFalse(bypass('newdomain.com'))
296        self.assertFalse(bypass('newdomain.com:1234'))
297
298    def test_proxy_bypass_environment_newline(self):
299        bypass = urllib.request.proxy_bypass_environment
300        self.env.set('NO_PROXY',
301                     'localhost, anotherdomain.com, newdomain.com:1234')
302        self.assertFalse(bypass('localhost\n'))
303        self.assertFalse(bypass('anotherdomain.com:8888\n'))
304        self.assertFalse(bypass('newdomain.com:1234\n'))
305
306
307class ProxyTests_withOrderedEnv(unittest.TestCase):
308
309    def setUp(self):
310        # We need to test conditions, where variable order _is_ significant
311        self._saved_env = os.environ
312        # Monkey patch os.environ, start with empty fake environment
313        os.environ = collections.OrderedDict()
314
315    def tearDown(self):
316        os.environ = self._saved_env
317
318    def test_getproxies_environment_prefer_lowercase(self):
319        # Test lowercase preference with removal
320        os.environ['no_proxy'] = ''
321        os.environ['No_Proxy'] = 'localhost'
322        self.assertFalse(urllib.request.proxy_bypass_environment('localhost'))
323        self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
324        os.environ['http_proxy'] = ''
325        os.environ['HTTP_PROXY'] = 'http://somewhere:3128'
326        proxies = urllib.request.getproxies_environment()
327        self.assertEqual({}, proxies)
328        # Test lowercase preference of proxy bypass and correct matching including ports
329        os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234'
330        os.environ['No_Proxy'] = 'xyz.com'
331        self.assertTrue(urllib.request.proxy_bypass_environment('localhost'))
332        self.assertTrue(urllib.request.proxy_bypass_environment('noproxy.com:5678'))
333        self.assertTrue(urllib.request.proxy_bypass_environment('my.proxy:1234'))
334        self.assertFalse(urllib.request.proxy_bypass_environment('my.proxy'))
335        self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
336        # Test lowercase preference with replacement
337        os.environ['http_proxy'] = 'http://somewhere:3128'
338        os.environ['Http_Proxy'] = 'http://somewhereelse:3128'
339        proxies = urllib.request.getproxies_environment()
340        self.assertEqual('http://somewhere:3128', proxies['http'])
341
342
343class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
344    """Test urlopen() opening a fake http connection."""
345
346    def check_read(self, ver):
347        self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
348        try:
349            fp = urlopen("http://python.org/")
350            self.assertEqual(fp.readline(), b"Hello!")
351            self.assertEqual(fp.readline(), b"")
352            self.assertEqual(fp.geturl(), 'http://python.org/')
353            self.assertEqual(fp.getcode(), 200)
354        finally:
355            self.unfakehttp()
356
357    def test_url_fragment(self):
358        # Issue #11703: geturl() omits fragments in the original URL.
359        url = 'http://docs.python.org/library/urllib.html#OK'
360        self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
361        try:
362            fp = urllib.request.urlopen(url)
363            self.assertEqual(fp.geturl(), url)
364        finally:
365            self.unfakehttp()
366
367    def test_willclose(self):
368        self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
369        try:
370            resp = urlopen("http://www.python.org")
371            self.assertTrue(resp.fp.will_close)
372        finally:
373            self.unfakehttp()
374
375    @unittest.skipUnless(ssl, "ssl module required")
376    def test_url_path_with_control_char_rejected(self):
377        for char_no in list(range(0, 0x21)) + [0x7f]:
378            char = chr(char_no)
379            schemeless_url = f"//localhost:7777/test{char}/"
380            self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
381            try:
382                # We explicitly test urllib.request.urlopen() instead of the top
383                # level 'def urlopen()' function defined in this... (quite ugly)
384                # test suite.  They use different url opening codepaths.  Plain
385                # urlopen uses FancyURLOpener which goes via a codepath that
386                # calls urllib.parse.quote() on the URL which makes all of the
387                # above attempts at injection within the url _path_ safe.
388                escaped_char_repr = repr(char).replace('\\', r'\\')
389                InvalidURL = http.client.InvalidURL
390                with self.assertRaisesRegex(
391                    InvalidURL, f"contain control.*{escaped_char_repr}"):
392                    urllib.request.urlopen(f"http:{schemeless_url}")
393                with self.assertRaisesRegex(
394                    InvalidURL, f"contain control.*{escaped_char_repr}"):
395                    urllib.request.urlopen(f"https:{schemeless_url}")
396                # This code path quotes the URL so there is no injection.
397                resp = urlopen(f"http:{schemeless_url}")
398                self.assertNotIn(char, resp.geturl())
399            finally:
400                self.unfakehttp()
401
402    @unittest.skipUnless(ssl, "ssl module required")
403    def test_url_path_with_newline_header_injection_rejected(self):
404        self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
405        host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"
406        schemeless_url = "//" + host + ":8080/test/?test=a"
407        try:
408            # We explicitly test urllib.request.urlopen() instead of the top
409            # level 'def urlopen()' function defined in this... (quite ugly)
410            # test suite.  They use different url opening codepaths.  Plain
411            # urlopen uses FancyURLOpener which goes via a codepath that
412            # calls urllib.parse.quote() on the URL which makes all of the
413            # above attempts at injection within the url _path_ safe.
414            InvalidURL = http.client.InvalidURL
415            with self.assertRaisesRegex(
416                InvalidURL, r"contain control.*\\r.*(found at least . .)"):
417                urllib.request.urlopen(f"http:{schemeless_url}")
418            with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
419                urllib.request.urlopen(f"https:{schemeless_url}")
420            # This code path quotes the URL so there is no injection.
421            resp = urlopen(f"http:{schemeless_url}")
422            self.assertNotIn(' ', resp.geturl())
423            self.assertNotIn('\r', resp.geturl())
424            self.assertNotIn('\n', resp.geturl())
425        finally:
426            self.unfakehttp()
427
428    @unittest.skipUnless(ssl, "ssl module required")
429    def test_url_host_with_control_char_rejected(self):
430        for char_no in list(range(0, 0x21)) + [0x7f]:
431            char = chr(char_no)
432            schemeless_url = f"//localhost{char}/test/"
433            self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
434            try:
435                escaped_char_repr = repr(char).replace('\\', r'\\')
436                InvalidURL = http.client.InvalidURL
437                with self.assertRaisesRegex(
438                    InvalidURL, f"contain control.*{escaped_char_repr}"):
439                    urlopen(f"http:{schemeless_url}")
440                with self.assertRaisesRegex(InvalidURL, f"contain control.*{escaped_char_repr}"):
441                    urlopen(f"https:{schemeless_url}")
442            finally:
443                self.unfakehttp()
444
445    @unittest.skipUnless(ssl, "ssl module required")
446    def test_url_host_with_newline_header_injection_rejected(self):
447        self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
448        host = "localhost\r\nX-injected: header\r\n"
449        schemeless_url = "//" + host + ":8080/test/?test=a"
450        try:
451            InvalidURL = http.client.InvalidURL
452            with self.assertRaisesRegex(
453                InvalidURL, r"contain control.*\\r"):
454                urlopen(f"http:{schemeless_url}")
455            with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
456                urlopen(f"https:{schemeless_url}")
457        finally:
458            self.unfakehttp()
459
460    def test_read_0_9(self):
461        # "0.9" response accepted (but not "simple responses" without
462        # a status line)
463        self.check_read(b"0.9")
464
465    def test_read_1_0(self):
466        self.check_read(b"1.0")
467
468    def test_read_1_1(self):
469        self.check_read(b"1.1")
470
471    def test_read_bogus(self):
472        # urlopen() should raise OSError for many error codes.
473        self.fakehttp(b'''HTTP/1.1 401 Authentication Required
474Date: Wed, 02 Jan 2008 03:03:54 GMT
475Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
476Connection: close
477Content-Type: text/html; charset=iso-8859-1
478''', mock_close=True)
479        try:
480            self.assertRaises(OSError, urlopen, "http://python.org/")
481        finally:
482            self.unfakehttp()
483
484    def test_invalid_redirect(self):
485        # urlopen() should raise OSError for many error codes.
486        self.fakehttp(b'''HTTP/1.1 302 Found
487Date: Wed, 02 Jan 2008 03:03:54 GMT
488Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
489Location: file://guidocomputer.athome.com:/python/license
490Connection: close
491Content-Type: text/html; charset=iso-8859-1
492''', mock_close=True)
493        try:
494            msg = "Redirection to url 'file:"
495            with self.assertRaisesRegex(urllib.error.HTTPError, msg):
496                urlopen("http://python.org/")
497        finally:
498            self.unfakehttp()
499
500    def test_redirect_limit_independent(self):
501        # Ticket #12923: make sure independent requests each use their
502        # own retry limit.
503        for i in range(FancyURLopener().maxtries):
504            self.fakehttp(b'''HTTP/1.1 302 Found
505Location: file://guidocomputer.athome.com:/python/license
506Connection: close
507''', mock_close=True)
508            try:
509                self.assertRaises(urllib.error.HTTPError, urlopen,
510                    "http://something")
511            finally:
512                self.unfakehttp()
513
514    def test_empty_socket(self):
515        # urlopen() raises OSError if the underlying socket does not send any
516        # data. (#1680230)
517        self.fakehttp(b'')
518        try:
519            self.assertRaises(OSError, urlopen, "http://something")
520        finally:
521            self.unfakehttp()
522
523    def test_missing_localfile(self):
524        # Test for #10836
525        with self.assertRaises(urllib.error.URLError) as e:
526            urlopen('file://localhost/a/file/which/doesnot/exists.py')
527        self.assertTrue(e.exception.filename)
528        self.assertTrue(e.exception.reason)
529
530    def test_file_notexists(self):
531        fd, tmp_file = tempfile.mkstemp()
532        tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
533        try:
534            self.assertTrue(os.path.exists(tmp_file))
535            with urlopen(tmp_fileurl) as fobj:
536                self.assertTrue(fobj)
537        finally:
538            os.close(fd)
539            os.unlink(tmp_file)
540        self.assertFalse(os.path.exists(tmp_file))
541        with self.assertRaises(urllib.error.URLError):
542            urlopen(tmp_fileurl)
543
544    def test_ftp_nohost(self):
545        test_ftp_url = 'ftp:///path'
546        with self.assertRaises(urllib.error.URLError) as e:
547            urlopen(test_ftp_url)
548        self.assertFalse(e.exception.filename)
549        self.assertTrue(e.exception.reason)
550
551    def test_ftp_nonexisting(self):
552        with self.assertRaises(urllib.error.URLError) as e:
553            urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
554        self.assertFalse(e.exception.filename)
555        self.assertTrue(e.exception.reason)
556
557    @patch.object(urllib.request, 'MAXFTPCACHE', 0)
558    def test_ftp_cache_pruning(self):
559        self.fakeftp()
560        try:
561            urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
562            urlopen('ftp://localhost')
563        finally:
564            self.unfakeftp()
565
566    def test_userpass_inurl(self):
567        self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
568        try:
569            fp = urlopen("http://user:pass@python.org/")
570            self.assertEqual(fp.readline(), b"Hello!")
571            self.assertEqual(fp.readline(), b"")
572            self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
573            self.assertEqual(fp.getcode(), 200)
574        finally:
575            self.unfakehttp()
576
577    def test_userpass_inurl_w_spaces(self):
578        self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
579        try:
580            userpass = "a b:c d"
581            url = "http://{}@python.org/".format(userpass)
582            fakehttp_wrapper = http.client.HTTPConnection
583            authorization = ("Authorization: Basic %s\r\n" %
584                             b64encode(userpass.encode("ASCII")).decode("ASCII"))
585            fp = urlopen(url)
586            # The authorization header must be in place
587            self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
588            self.assertEqual(fp.readline(), b"Hello!")
589            self.assertEqual(fp.readline(), b"")
590            # the spaces are quoted in URL so no match
591            self.assertNotEqual(fp.geturl(), url)
592            self.assertEqual(fp.getcode(), 200)
593        finally:
594            self.unfakehttp()
595
596    def test_URLopener_deprecation(self):
597        with warnings_helper.check_warnings(('',DeprecationWarning)):
598            urllib.request.URLopener()
599
600    @unittest.skipUnless(ssl, "ssl module required")
601    def test_cafile_and_context(self):
602        context = ssl.create_default_context()
603        with warnings_helper.check_warnings(('', DeprecationWarning)):
604            with self.assertRaises(ValueError):
605                urllib.request.urlopen(
606                    "https://localhost", cafile="/nonexistent/path", context=context
607                )
608
609
610class urlopen_DataTests(unittest.TestCase):
611    """Test urlopen() opening a data URL."""
612
613    def setUp(self):
614        # clear _opener global variable
615        self.addCleanup(urllib.request.urlcleanup)
616
617        # text containing URL special- and unicode-characters
618        self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
619        # 2x1 pixel RGB PNG image with one black and one white pixel
620        self.image = (
621            b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
622            b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
623            b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
624            b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
625
626        self.text_url = (
627            "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
628            "D%26%20%C3%B6%20%C3%84%20")
629        self.text_url_base64 = (
630            "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
631            "sJT0mIPYgxCA%3D")
632        # base64 encoded data URL that contains ignorable spaces,
633        # such as "\n", " ", "%0A", and "%20".
634        self.image_url = (
635            "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAIAAAABCAIAAAB7\n"
636            "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
637            "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
638
639        self.text_url_resp = urllib.request.urlopen(self.text_url)
640        self.text_url_base64_resp = urllib.request.urlopen(
641            self.text_url_base64)
642        self.image_url_resp = urllib.request.urlopen(self.image_url)
643
644    def test_interface(self):
645        # Make sure object returned by urlopen() has the specified methods
646        for attr in ("read", "readline", "readlines",
647                     "close", "info", "geturl", "getcode", "__iter__"):
648            self.assertTrue(hasattr(self.text_url_resp, attr),
649                         "object returned by urlopen() lacks %s attribute" %
650                         attr)
651
652    def test_info(self):
653        self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
654        self.assertEqual(self.text_url_base64_resp.info().get_params(),
655            [('text/plain', ''), ('charset', 'ISO-8859-1')])
656        self.assertEqual(self.image_url_resp.info()['content-length'],
657            str(len(self.image)))
658        self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
659            [('text/plain', ''), ('charset', 'US-ASCII')])
660
661    def test_geturl(self):
662        self.assertEqual(self.text_url_resp.geturl(), self.text_url)
663        self.assertEqual(self.text_url_base64_resp.geturl(),
664            self.text_url_base64)
665        self.assertEqual(self.image_url_resp.geturl(), self.image_url)
666
667    def test_read_text(self):
668        self.assertEqual(self.text_url_resp.read().decode(
669            dict(self.text_url_resp.info().get_params())['charset']), self.text)
670
671    def test_read_text_base64(self):
672        self.assertEqual(self.text_url_base64_resp.read().decode(
673            dict(self.text_url_base64_resp.info().get_params())['charset']),
674            self.text)
675
676    def test_read_image(self):
677        self.assertEqual(self.image_url_resp.read(), self.image)
678
679    def test_missing_comma(self):
680        self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
681
682    def test_invalid_base64_data(self):
683        # missing padding character
684        self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
685
686
687class urlretrieve_FileTests(unittest.TestCase):
688    """Test urllib.urlretrieve() on local files"""
689
690    def setUp(self):
691        # clear _opener global variable
692        self.addCleanup(urllib.request.urlcleanup)
693
694        # Create a list of temporary files. Each item in the list is a file
695        # name (absolute path or relative to the current working directory).
696        # All files in this list will be deleted in the tearDown method. Note,
697        # this only helps to makes sure temporary files get deleted, but it
698        # does nothing about trying to close files that may still be open. It
699        # is the responsibility of the developer to properly close files even
700        # when exceptional conditions occur.
701        self.tempFiles = []
702
703        # Create a temporary file.
704        self.registerFileForCleanUp(os_helper.TESTFN)
705        self.text = b'testing urllib.urlretrieve'
706        try:
707            FILE = open(os_helper.TESTFN, 'wb')
708            FILE.write(self.text)
709            FILE.close()
710        finally:
711            try: FILE.close()
712            except: pass
713
714    def tearDown(self):
715        # Delete the temporary files.
716        for each in self.tempFiles:
717            try: os.remove(each)
718            except: pass
719
720    def constructLocalFileUrl(self, filePath):
721        filePath = os.path.abspath(filePath)
722        try:
723            filePath.encode("utf-8")
724        except UnicodeEncodeError:
725            raise unittest.SkipTest("filePath is not encodable to utf8")
726        return "file://%s" % urllib.request.pathname2url(filePath)
727
728    def createNewTempFile(self, data=b""):
729        """Creates a new temporary file containing the specified data,
730        registers the file for deletion during the test fixture tear down, and
731        returns the absolute path of the file."""
732
733        newFd, newFilePath = tempfile.mkstemp()
734        try:
735            self.registerFileForCleanUp(newFilePath)
736            newFile = os.fdopen(newFd, "wb")
737            newFile.write(data)
738            newFile.close()
739        finally:
740            try: newFile.close()
741            except: pass
742        return newFilePath
743
744    def registerFileForCleanUp(self, fileName):
745        self.tempFiles.append(fileName)
746
747    def test_basic(self):
748        # Make sure that a local file just gets its own location returned and
749        # a headers value is returned.
750        result = urllib.request.urlretrieve("file:%s" % os_helper.TESTFN)
751        self.assertEqual(result[0], os_helper.TESTFN)
752        self.assertIsInstance(result[1], email.message.Message,
753                              "did not get an email.message.Message instance "
754                              "as second returned value")
755
756    def test_copy(self):
757        # Test that setting the filename argument works.
758        second_temp = "%s.2" % os_helper.TESTFN
759        self.registerFileForCleanUp(second_temp)
760        result = urllib.request.urlretrieve(self.constructLocalFileUrl(
761            os_helper.TESTFN), second_temp)
762        self.assertEqual(second_temp, result[0])
763        self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
764                                                  "made")
765        FILE = open(second_temp, 'rb')
766        try:
767            text = FILE.read()
768            FILE.close()
769        finally:
770            try: FILE.close()
771            except: pass
772        self.assertEqual(self.text, text)
773
774    def test_reporthook(self):
775        # Make sure that the reporthook works.
776        def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
777            self.assertIsInstance(block_count, int)
778            self.assertIsInstance(block_read_size, int)
779            self.assertIsInstance(file_size, int)
780            self.assertEqual(block_count, count_holder[0])
781            count_holder[0] = count_holder[0] + 1
782        second_temp = "%s.2" % os_helper.TESTFN
783        self.registerFileForCleanUp(second_temp)
784        urllib.request.urlretrieve(
785            self.constructLocalFileUrl(os_helper.TESTFN),
786            second_temp, hooktester)
787
788    def test_reporthook_0_bytes(self):
789        # Test on zero length file. Should call reporthook only 1 time.
790        report = []
791        def hooktester(block_count, block_read_size, file_size, _report=report):
792            _report.append((block_count, block_read_size, file_size))
793        srcFileName = self.createNewTempFile()
794        urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
795            os_helper.TESTFN, hooktester)
796        self.assertEqual(len(report), 1)
797        self.assertEqual(report[0][2], 0)
798
799    def test_reporthook_5_bytes(self):
800        # Test on 5 byte file. Should call reporthook only 2 times (once when
801        # the "network connection" is established and once when the block is
802        # read).
803        report = []
804        def hooktester(block_count, block_read_size, file_size, _report=report):
805            _report.append((block_count, block_read_size, file_size))
806        srcFileName = self.createNewTempFile(b"x" * 5)
807        urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
808            os_helper.TESTFN, hooktester)
809        self.assertEqual(len(report), 2)
810        self.assertEqual(report[0][2], 5)
811        self.assertEqual(report[1][2], 5)
812
813    def test_reporthook_8193_bytes(self):
814        # Test on 8193 byte file. Should call reporthook only 3 times (once
815        # when the "network connection" is established, once for the next 8192
816        # bytes, and once for the last byte).
817        report = []
818        def hooktester(block_count, block_read_size, file_size, _report=report):
819            _report.append((block_count, block_read_size, file_size))
820        srcFileName = self.createNewTempFile(b"x" * 8193)
821        urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
822            os_helper.TESTFN, hooktester)
823        self.assertEqual(len(report), 3)
824        self.assertEqual(report[0][2], 8193)
825        self.assertEqual(report[0][1], 8192)
826        self.assertEqual(report[1][1], 8192)
827        self.assertEqual(report[2][1], 8192)
828
829
830class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
831    """Test urllib.urlretrieve() using fake http connections"""
832
833    def test_short_content_raises_ContentTooShortError(self):
834        self.addCleanup(urllib.request.urlcleanup)
835
836        self.fakehttp(b'''HTTP/1.1 200 OK
837Date: Wed, 02 Jan 2008 03:03:54 GMT
838Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
839Connection: close
840Content-Length: 100
841Content-Type: text/html; charset=iso-8859-1
842
843FF
844''')
845
846        def _reporthook(par1, par2, par3):
847            pass
848
849        with self.assertRaises(urllib.error.ContentTooShortError):
850            try:
851                urllib.request.urlretrieve(support.TEST_HTTP_URL,
852                                           reporthook=_reporthook)
853            finally:
854                self.unfakehttp()
855
856    def test_short_content_raises_ContentTooShortError_without_reporthook(self):
857        self.addCleanup(urllib.request.urlcleanup)
858
859        self.fakehttp(b'''HTTP/1.1 200 OK
860Date: Wed, 02 Jan 2008 03:03:54 GMT
861Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
862Connection: close
863Content-Length: 100
864Content-Type: text/html; charset=iso-8859-1
865
866FF
867''')
868        with self.assertRaises(urllib.error.ContentTooShortError):
869            try:
870                urllib.request.urlretrieve(support.TEST_HTTP_URL)
871            finally:
872                self.unfakehttp()
873
874
875class QuotingTests(unittest.TestCase):
876    r"""Tests for urllib.quote() and urllib.quote_plus()
877
878    According to RFC 3986 (Uniform Resource Identifiers), to escape a
879    character you write it as '%' + <2 character US-ASCII hex value>.
880    The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
881    character properly. Case does not matter on the hex letters.
882
883    The various character sets specified are:
884
885    Reserved characters : ";/?:@&=+$,"
886        Have special meaning in URIs and must be escaped if not being used for
887        their special meaning
888    Data characters : letters, digits, and "-_.!~*'()"
889        Unreserved and do not need to be escaped; can be, though, if desired
890    Control characters : 0x00 - 0x1F, 0x7F
891        Have no use in URIs so must be escaped
892    space : 0x20
893        Must be escaped
894    Delimiters : '<>#%"'
895        Must be escaped
896    Unwise : "{}|\^[]`"
897        Must be escaped
898
899    """
900
901    def test_never_quote(self):
902        # Make sure quote() does not quote letters, digits, and "_,.-"
903        do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
904                                 "abcdefghijklmnopqrstuvwxyz",
905                                 "0123456789",
906                                 "_.-~"])
907        result = urllib.parse.quote(do_not_quote)
908        self.assertEqual(do_not_quote, result,
909                         "using quote(): %r != %r" % (do_not_quote, result))
910        result = urllib.parse.quote_plus(do_not_quote)
911        self.assertEqual(do_not_quote, result,
912                        "using quote_plus(): %r != %r" % (do_not_quote, result))
913
914    def test_default_safe(self):
915        # Test '/' is default value for 'safe' parameter
916        self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
917
918    def test_safe(self):
919        # Test setting 'safe' parameter does what it should do
920        quote_by_default = "<>"
921        result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
922        self.assertEqual(quote_by_default, result,
923                         "using quote(): %r != %r" % (quote_by_default, result))
924        result = urllib.parse.quote_plus(quote_by_default,
925                                         safe=quote_by_default)
926        self.assertEqual(quote_by_default, result,
927                         "using quote_plus(): %r != %r" %
928                         (quote_by_default, result))
929        # Safe expressed as bytes rather than str
930        result = urllib.parse.quote(quote_by_default, safe=b"<>")
931        self.assertEqual(quote_by_default, result,
932                         "using quote(): %r != %r" % (quote_by_default, result))
933        # "Safe" non-ASCII characters should have no effect
934        # (Since URIs are not allowed to have non-ASCII characters)
935        result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
936        expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
937        self.assertEqual(expect, result,
938                         "using quote(): %r != %r" %
939                         (expect, result))
940        # Same as above, but using a bytes rather than str
941        result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
942        expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
943        self.assertEqual(expect, result,
944                         "using quote(): %r != %r" %
945                         (expect, result))
946
947    def test_default_quoting(self):
948        # Make sure all characters that should be quoted are by default sans
949        # space (separate test for that).
950        should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
951        should_quote.append(r'<>#%"{}|\^[]`')
952        should_quote.append(chr(127)) # For 0x7F
953        should_quote = ''.join(should_quote)
954        for char in should_quote:
955            result = urllib.parse.quote(char)
956            self.assertEqual(hexescape(char), result,
957                             "using quote(): "
958                             "%s should be escaped to %s, not %s" %
959                             (char, hexescape(char), result))
960            result = urllib.parse.quote_plus(char)
961            self.assertEqual(hexescape(char), result,
962                             "using quote_plus(): "
963                             "%s should be escapes to %s, not %s" %
964                             (char, hexescape(char), result))
965        del should_quote
966        partial_quote = "ab[]cd"
967        expected = "ab%5B%5Dcd"
968        result = urllib.parse.quote(partial_quote)
969        self.assertEqual(expected, result,
970                         "using quote(): %r != %r" % (expected, result))
971        result = urllib.parse.quote_plus(partial_quote)
972        self.assertEqual(expected, result,
973                         "using quote_plus(): %r != %r" % (expected, result))
974
975    def test_quoting_space(self):
976        # Make sure quote() and quote_plus() handle spaces as specified in
977        # their unique way
978        result = urllib.parse.quote(' ')
979        self.assertEqual(result, hexescape(' '),
980                         "using quote(): %r != %r" % (result, hexescape(' ')))
981        result = urllib.parse.quote_plus(' ')
982        self.assertEqual(result, '+',
983                         "using quote_plus(): %r != +" % result)
984        given = "a b cd e f"
985        expect = given.replace(' ', hexescape(' '))
986        result = urllib.parse.quote(given)
987        self.assertEqual(expect, result,
988                         "using quote(): %r != %r" % (expect, result))
989        expect = given.replace(' ', '+')
990        result = urllib.parse.quote_plus(given)
991        self.assertEqual(expect, result,
992                         "using quote_plus(): %r != %r" % (expect, result))
993
994    def test_quoting_plus(self):
995        self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
996                         'alpha%2Bbeta+gamma')
997        self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
998                         'alpha+beta+gamma')
999        # Test with bytes
1000        self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
1001                         'alpha%2Bbeta+gamma')
1002        # Test with safe bytes
1003        self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
1004                         'alpha+beta+gamma')
1005
1006    def test_quote_bytes(self):
1007        # Bytes should quote directly to percent-encoded values
1008        given = b"\xa2\xd8ab\xff"
1009        expect = "%A2%D8ab%FF"
1010        result = urllib.parse.quote(given)
1011        self.assertEqual(expect, result,
1012                         "using quote(): %r != %r" % (expect, result))
1013        # Encoding argument should raise type error on bytes input
1014        self.assertRaises(TypeError, urllib.parse.quote, given,
1015                            encoding="latin-1")
1016        # quote_from_bytes should work the same
1017        result = urllib.parse.quote_from_bytes(given)
1018        self.assertEqual(expect, result,
1019                         "using quote_from_bytes(): %r != %r"
1020                         % (expect, result))
1021
1022    def test_quote_with_unicode(self):
1023        # Characters in Latin-1 range, encoded by default in UTF-8
1024        given = "\xa2\xd8ab\xff"
1025        expect = "%C2%A2%C3%98ab%C3%BF"
1026        result = urllib.parse.quote(given)
1027        self.assertEqual(expect, result,
1028                         "using quote(): %r != %r" % (expect, result))
1029        # Characters in Latin-1 range, encoded by with None (default)
1030        result = urllib.parse.quote(given, encoding=None, errors=None)
1031        self.assertEqual(expect, result,
1032                         "using quote(): %r != %r" % (expect, result))
1033        # Characters in Latin-1 range, encoded with Latin-1
1034        given = "\xa2\xd8ab\xff"
1035        expect = "%A2%D8ab%FF"
1036        result = urllib.parse.quote(given, encoding="latin-1")
1037        self.assertEqual(expect, result,
1038                         "using quote(): %r != %r" % (expect, result))
1039        # Characters in BMP, encoded by default in UTF-8
1040        given = "\u6f22\u5b57"              # "Kanji"
1041        expect = "%E6%BC%A2%E5%AD%97"
1042        result = urllib.parse.quote(given)
1043        self.assertEqual(expect, result,
1044                         "using quote(): %r != %r" % (expect, result))
1045        # Characters in BMP, encoded with Latin-1
1046        given = "\u6f22\u5b57"
1047        self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
1048                                    encoding="latin-1")
1049        # Characters in BMP, encoded with Latin-1, with replace error handling
1050        given = "\u6f22\u5b57"
1051        expect = "%3F%3F"                   # "??"
1052        result = urllib.parse.quote(given, encoding="latin-1",
1053                                    errors="replace")
1054        self.assertEqual(expect, result,
1055                         "using quote(): %r != %r" % (expect, result))
1056        # Characters in BMP, Latin-1, with xmlcharref error handling
1057        given = "\u6f22\u5b57"
1058        expect = "%26%2328450%3B%26%2323383%3B"     # "&#28450;&#23383;"
1059        result = urllib.parse.quote(given, encoding="latin-1",
1060                                    errors="xmlcharrefreplace")
1061        self.assertEqual(expect, result,
1062                         "using quote(): %r != %r" % (expect, result))
1063
1064    def test_quote_plus_with_unicode(self):
1065        # Encoding (latin-1) test for quote_plus
1066        given = "\xa2\xd8 \xff"
1067        expect = "%A2%D8+%FF"
1068        result = urllib.parse.quote_plus(given, encoding="latin-1")
1069        self.assertEqual(expect, result,
1070                         "using quote_plus(): %r != %r" % (expect, result))
1071        # Errors test for quote_plus
1072        given = "ab\u6f22\u5b57 cd"
1073        expect = "ab%3F%3F+cd"
1074        result = urllib.parse.quote_plus(given, encoding="latin-1",
1075                                         errors="replace")
1076        self.assertEqual(expect, result,
1077                         "using quote_plus(): %r != %r" % (expect, result))
1078
1079
1080class UnquotingTests(unittest.TestCase):
1081    """Tests for unquote() and unquote_plus()
1082
1083    See the doc string for quoting_Tests for details on quoting and such.
1084
1085    """
1086
1087    def test_unquoting(self):
1088        # Make sure unquoting of all ASCII values works
1089        escape_list = []
1090        for num in range(128):
1091            given = hexescape(chr(num))
1092            expect = chr(num)
1093            result = urllib.parse.unquote(given)
1094            self.assertEqual(expect, result,
1095                             "using unquote(): %r != %r" % (expect, result))
1096            result = urllib.parse.unquote_plus(given)
1097            self.assertEqual(expect, result,
1098                             "using unquote_plus(): %r != %r" %
1099                             (expect, result))
1100            escape_list.append(given)
1101        escape_string = ''.join(escape_list)
1102        del escape_list
1103        result = urllib.parse.unquote(escape_string)
1104        self.assertEqual(result.count('%'), 1,
1105                         "using unquote(): not all characters escaped: "
1106                         "%s" % result)
1107        self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
1108        self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
1109
1110    def test_unquoting_badpercent(self):
1111        # Test unquoting on bad percent-escapes
1112        given = '%xab'
1113        expect = given
1114        result = urllib.parse.unquote(given)
1115        self.assertEqual(expect, result, "using unquote(): %r != %r"
1116                         % (expect, result))
1117        given = '%x'
1118        expect = given
1119        result = urllib.parse.unquote(given)
1120        self.assertEqual(expect, result, "using unquote(): %r != %r"
1121                         % (expect, result))
1122        given = '%'
1123        expect = given
1124        result = urllib.parse.unquote(given)
1125        self.assertEqual(expect, result, "using unquote(): %r != %r"
1126                         % (expect, result))
1127        # unquote_to_bytes
1128        given = '%xab'
1129        expect = bytes(given, 'ascii')
1130        result = urllib.parse.unquote_to_bytes(given)
1131        self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1132                         % (expect, result))
1133        given = '%x'
1134        expect = bytes(given, 'ascii')
1135        result = urllib.parse.unquote_to_bytes(given)
1136        self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1137                         % (expect, result))
1138        given = '%'
1139        expect = bytes(given, 'ascii')
1140        result = urllib.parse.unquote_to_bytes(given)
1141        self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1142                         % (expect, result))
1143        self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
1144        self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
1145
1146    def test_unquoting_mixed_case(self):
1147        # Test unquoting on mixed-case hex digits in the percent-escapes
1148        given = '%Ab%eA'
1149        expect = b'\xab\xea'
1150        result = urllib.parse.unquote_to_bytes(given)
1151        self.assertEqual(expect, result,
1152                         "using unquote_to_bytes(): %r != %r"
1153                         % (expect, result))
1154
1155    def test_unquoting_parts(self):
1156        # Make sure unquoting works when have non-quoted characters
1157        # interspersed
1158        given = 'ab%sd' % hexescape('c')
1159        expect = "abcd"
1160        result = urllib.parse.unquote(given)
1161        self.assertEqual(expect, result,
1162                         "using quote(): %r != %r" % (expect, result))
1163        result = urllib.parse.unquote_plus(given)
1164        self.assertEqual(expect, result,
1165                         "using unquote_plus(): %r != %r" % (expect, result))
1166
1167    def test_unquoting_plus(self):
1168        # Test difference between unquote() and unquote_plus()
1169        given = "are+there+spaces..."
1170        expect = given
1171        result = urllib.parse.unquote(given)
1172        self.assertEqual(expect, result,
1173                         "using unquote(): %r != %r" % (expect, result))
1174        expect = given.replace('+', ' ')
1175        result = urllib.parse.unquote_plus(given)
1176        self.assertEqual(expect, result,
1177                         "using unquote_plus(): %r != %r" % (expect, result))
1178
1179    def test_unquote_to_bytes(self):
1180        given = 'br%C3%BCckner_sapporo_20050930.doc'
1181        expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
1182        result = urllib.parse.unquote_to_bytes(given)
1183        self.assertEqual(expect, result,
1184                         "using unquote_to_bytes(): %r != %r"
1185                         % (expect, result))
1186        # Test on a string with unescaped non-ASCII characters
1187        # (Technically an invalid URI; expect those characters to be UTF-8
1188        # encoded).
1189        result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
1190        expect = b'\xe6\xbc\xa2\xc3\xbc'    # UTF-8 for "\u6f22\u00fc"
1191        self.assertEqual(expect, result,
1192                         "using unquote_to_bytes(): %r != %r"
1193                         % (expect, result))
1194        # Test with a bytes as input
1195        given = b'%A2%D8ab%FF'
1196        expect = b'\xa2\xd8ab\xff'
1197        result = urllib.parse.unquote_to_bytes(given)
1198        self.assertEqual(expect, result,
1199                         "using unquote_to_bytes(): %r != %r"
1200                         % (expect, result))
1201        # Test with a bytes as input, with unescaped non-ASCII bytes
1202        # (Technically an invalid URI; expect those bytes to be preserved)
1203        given = b'%A2\xd8ab%FF'
1204        expect = b'\xa2\xd8ab\xff'
1205        result = urllib.parse.unquote_to_bytes(given)
1206        self.assertEqual(expect, result,
1207                         "using unquote_to_bytes(): %r != %r"
1208                         % (expect, result))
1209
1210    def test_unquote_with_unicode(self):
1211        # Characters in the Latin-1 range, encoded with UTF-8
1212        given = 'br%C3%BCckner_sapporo_20050930.doc'
1213        expect = 'br\u00fcckner_sapporo_20050930.doc'
1214        result = urllib.parse.unquote(given)
1215        self.assertEqual(expect, result,
1216                         "using unquote(): %r != %r" % (expect, result))
1217        # Characters in the Latin-1 range, encoded with None (default)
1218        result = urllib.parse.unquote(given, encoding=None, errors=None)
1219        self.assertEqual(expect, result,
1220                         "using unquote(): %r != %r" % (expect, result))
1221
1222        # Characters in the Latin-1 range, encoded with Latin-1
1223        result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
1224                                      encoding="latin-1")
1225        expect = 'br\u00fcckner_sapporo_20050930.doc'
1226        self.assertEqual(expect, result,
1227                         "using unquote(): %r != %r" % (expect, result))
1228
1229        # Characters in BMP, encoded with UTF-8
1230        given = "%E6%BC%A2%E5%AD%97"
1231        expect = "\u6f22\u5b57"             # "Kanji"
1232        result = urllib.parse.unquote(given)
1233        self.assertEqual(expect, result,
1234                         "using unquote(): %r != %r" % (expect, result))
1235
1236        # Decode with UTF-8, invalid sequence
1237        given = "%F3%B1"
1238        expect = "\ufffd"                   # Replacement character
1239        result = urllib.parse.unquote(given)
1240        self.assertEqual(expect, result,
1241                         "using unquote(): %r != %r" % (expect, result))
1242
1243        # Decode with UTF-8, invalid sequence, replace errors
1244        result = urllib.parse.unquote(given, errors="replace")
1245        self.assertEqual(expect, result,
1246                         "using unquote(): %r != %r" % (expect, result))
1247
1248        # Decode with UTF-8, invalid sequence, ignoring errors
1249        given = "%F3%B1"
1250        expect = ""
1251        result = urllib.parse.unquote(given, errors="ignore")
1252        self.assertEqual(expect, result,
1253                         "using unquote(): %r != %r" % (expect, result))
1254
1255        # A mix of non-ASCII and percent-encoded characters, UTF-8
1256        result = urllib.parse.unquote("\u6f22%C3%BC")
1257        expect = '\u6f22\u00fc'
1258        self.assertEqual(expect, result,
1259                         "using unquote(): %r != %r" % (expect, result))
1260
1261        # A mix of non-ASCII and percent-encoded characters, Latin-1
1262        # (Note, the string contains non-Latin-1-representable characters)
1263        result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
1264        expect = '\u6f22\u00fc'
1265        self.assertEqual(expect, result,
1266                         "using unquote(): %r != %r" % (expect, result))
1267
1268    def test_unquoting_with_bytes_input(self):
1269        # ASCII characters decoded to a string
1270        given = b'blueberryjam'
1271        expect = 'blueberryjam'
1272        result = urllib.parse.unquote(given)
1273        self.assertEqual(expect, result,
1274                         "using unquote(): %r != %r" % (expect, result))
1275
1276        # A mix of non-ASCII hex-encoded characters and ASCII characters
1277        given = b'bl\xc3\xa5b\xc3\xa6rsyltet\xc3\xb8y'
1278        expect = 'bl\u00e5b\u00e6rsyltet\u00f8y'
1279        result = urllib.parse.unquote(given)
1280        self.assertEqual(expect, result,
1281                         "using unquote(): %r != %r" % (expect, result))
1282
1283        # A mix of non-ASCII percent-encoded characters and ASCII characters
1284        given = b'bl%c3%a5b%c3%a6rsyltet%c3%b8j'
1285        expect = 'bl\u00e5b\u00e6rsyltet\u00f8j'
1286        result = urllib.parse.unquote(given)
1287        self.assertEqual(expect, result,
1288                         "using unquote(): %r != %r" % (expect, result))
1289
1290
1291class urlencode_Tests(unittest.TestCase):
1292    """Tests for urlencode()"""
1293
1294    def help_inputtype(self, given, test_type):
1295        """Helper method for testing different input types.
1296
1297        'given' must lead to only the pairs:
1298            * 1st, 1
1299            * 2nd, 2
1300            * 3rd, 3
1301
1302        Test cannot assume anything about order.  Docs make no guarantee and
1303        have possible dictionary input.
1304
1305        """
1306        expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
1307        result = urllib.parse.urlencode(given)
1308        for expected in expect_somewhere:
1309            self.assertIn(expected, result,
1310                         "testing %s: %s not found in %s" %
1311                         (test_type, expected, result))
1312        self.assertEqual(result.count('&'), 2,
1313                         "testing %s: expected 2 '&'s; got %s" %
1314                         (test_type, result.count('&')))
1315        amp_location = result.index('&')
1316        on_amp_left = result[amp_location - 1]
1317        on_amp_right = result[amp_location + 1]
1318        self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
1319                     "testing %s: '&' not located in proper place in %s" %
1320                     (test_type, result))
1321        self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
1322                         "testing %s: "
1323                         "unexpected number of characters: %s != %s" %
1324                         (test_type, len(result), (5 * 3) + 2))
1325
1326    def test_using_mapping(self):
1327        # Test passing in a mapping object as an argument.
1328        self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
1329                            "using dict as input type")
1330
1331    def test_using_sequence(self):
1332        # Test passing in a sequence of two-item sequences as an argument.
1333        self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
1334                            "using sequence of two-item tuples as input")
1335
1336    def test_quoting(self):
1337        # Make sure keys and values are quoted using quote_plus()
1338        given = {"&":"="}
1339        expect = "%s=%s" % (hexescape('&'), hexescape('='))
1340        result = urllib.parse.urlencode(given)
1341        self.assertEqual(expect, result)
1342        given = {"key name":"A bunch of pluses"}
1343        expect = "key+name=A+bunch+of+pluses"
1344        result = urllib.parse.urlencode(given)
1345        self.assertEqual(expect, result)
1346
1347    def test_doseq(self):
1348        # Test that passing True for 'doseq' parameter works correctly
1349        given = {'sequence':['1', '2', '3']}
1350        expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
1351        result = urllib.parse.urlencode(given)
1352        self.assertEqual(expect, result)
1353        result = urllib.parse.urlencode(given, True)
1354        for value in given["sequence"]:
1355            expect = "sequence=%s" % value
1356            self.assertIn(expect, result)
1357        self.assertEqual(result.count('&'), 2,
1358                         "Expected 2 '&'s, got %s" % result.count('&'))
1359
1360    def test_empty_sequence(self):
1361        self.assertEqual("", urllib.parse.urlencode({}))
1362        self.assertEqual("", urllib.parse.urlencode([]))
1363
1364    def test_nonstring_values(self):
1365        self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
1366        self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
1367
1368    def test_nonstring_seq_values(self):
1369        self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
1370        self.assertEqual("a=None&a=a",
1371                         urllib.parse.urlencode({"a": [None, "a"]}, True))
1372        data = collections.OrderedDict([("a", 1), ("b", 1)])
1373        self.assertEqual("a=a&a=b",
1374                         urllib.parse.urlencode({"a": data}, True))
1375
1376    def test_urlencode_encoding(self):
1377        # ASCII encoding. Expect %3F with errors="replace'
1378        given = (('\u00a0', '\u00c1'),)
1379        expect = '%3F=%3F'
1380        result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
1381        self.assertEqual(expect, result)
1382
1383        # Default is UTF-8 encoding.
1384        given = (('\u00a0', '\u00c1'),)
1385        expect = '%C2%A0=%C3%81'
1386        result = urllib.parse.urlencode(given)
1387        self.assertEqual(expect, result)
1388
1389        # Latin-1 encoding.
1390        given = (('\u00a0', '\u00c1'),)
1391        expect = '%A0=%C1'
1392        result = urllib.parse.urlencode(given, encoding="latin-1")
1393        self.assertEqual(expect, result)
1394
1395    def test_urlencode_encoding_doseq(self):
1396        # ASCII Encoding. Expect %3F with errors="replace'
1397        given = (('\u00a0', '\u00c1'),)
1398        expect = '%3F=%3F'
1399        result = urllib.parse.urlencode(given, doseq=True,
1400                                        encoding="ASCII", errors="replace")
1401        self.assertEqual(expect, result)
1402
1403        # ASCII Encoding. On a sequence of values.
1404        given = (("\u00a0", (1, "\u00c1")),)
1405        expect = '%3F=1&%3F=%3F'
1406        result = urllib.parse.urlencode(given, True,
1407                                        encoding="ASCII", errors="replace")
1408        self.assertEqual(expect, result)
1409
1410        # Utf-8
1411        given = (("\u00a0", "\u00c1"),)
1412        expect = '%C2%A0=%C3%81'
1413        result = urllib.parse.urlencode(given, True)
1414        self.assertEqual(expect, result)
1415
1416        given = (("\u00a0", (42, "\u00c1")),)
1417        expect = '%C2%A0=42&%C2%A0=%C3%81'
1418        result = urllib.parse.urlencode(given, True)
1419        self.assertEqual(expect, result)
1420
1421        # latin-1
1422        given = (("\u00a0", "\u00c1"),)
1423        expect = '%A0=%C1'
1424        result = urllib.parse.urlencode(given, True, encoding="latin-1")
1425        self.assertEqual(expect, result)
1426
1427        given = (("\u00a0", (42, "\u00c1")),)
1428        expect = '%A0=42&%A0=%C1'
1429        result = urllib.parse.urlencode(given, True, encoding="latin-1")
1430        self.assertEqual(expect, result)
1431
1432    def test_urlencode_bytes(self):
1433        given = ((b'\xa0\x24', b'\xc1\x24'),)
1434        expect = '%A0%24=%C1%24'
1435        result = urllib.parse.urlencode(given)
1436        self.assertEqual(expect, result)
1437        result = urllib.parse.urlencode(given, True)
1438        self.assertEqual(expect, result)
1439
1440        # Sequence of values
1441        given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
1442        expect = '%A0%24=42&%A0%24=%C1%24'
1443        result = urllib.parse.urlencode(given, True)
1444        self.assertEqual(expect, result)
1445
1446    def test_urlencode_encoding_safe_parameter(self):
1447
1448        # Send '$' (\x24) as safe character
1449        # Default utf-8 encoding
1450
1451        given = ((b'\xa0\x24', b'\xc1\x24'),)
1452        result = urllib.parse.urlencode(given, safe=":$")
1453        expect = '%A0$=%C1$'
1454        self.assertEqual(expect, result)
1455
1456        given = ((b'\xa0\x24', b'\xc1\x24'),)
1457        result = urllib.parse.urlencode(given, doseq=True, safe=":$")
1458        expect = '%A0$=%C1$'
1459        self.assertEqual(expect, result)
1460
1461        # Safe parameter in sequence
1462        given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1463        expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1464        result = urllib.parse.urlencode(given, True, safe=":$")
1465        self.assertEqual(expect, result)
1466
1467        # Test all above in latin-1 encoding
1468
1469        given = ((b'\xa0\x24', b'\xc1\x24'),)
1470        result = urllib.parse.urlencode(given, safe=":$",
1471                                        encoding="latin-1")
1472        expect = '%A0$=%C1$'
1473        self.assertEqual(expect, result)
1474
1475        given = ((b'\xa0\x24', b'\xc1\x24'),)
1476        expect = '%A0$=%C1$'
1477        result = urllib.parse.urlencode(given, doseq=True, safe=":$",
1478                                        encoding="latin-1")
1479
1480        given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1481        expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1482        result = urllib.parse.urlencode(given, True, safe=":$",
1483                                        encoding="latin-1")
1484        self.assertEqual(expect, result)
1485
1486class Pathname_Tests(unittest.TestCase):
1487    """Test pathname2url() and url2pathname()"""
1488
1489    def test_basic(self):
1490        # Make sure simple tests pass
1491        expected_path = os.path.join("parts", "of", "a", "path")
1492        expected_url = "parts/of/a/path"
1493        result = urllib.request.pathname2url(expected_path)
1494        self.assertEqual(expected_url, result,
1495                         "pathname2url() failed; %s != %s" %
1496                         (result, expected_url))
1497        result = urllib.request.url2pathname(expected_url)
1498        self.assertEqual(expected_path, result,
1499                         "url2pathame() failed; %s != %s" %
1500                         (result, expected_path))
1501
1502    def test_quoting(self):
1503        # Test automatic quoting and unquoting works for pathnam2url() and
1504        # url2pathname() respectively
1505        given = os.path.join("needs", "quot=ing", "here")
1506        expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1507        result = urllib.request.pathname2url(given)
1508        self.assertEqual(expect, result,
1509                         "pathname2url() failed; %s != %s" %
1510                         (expect, result))
1511        expect = given
1512        result = urllib.request.url2pathname(result)
1513        self.assertEqual(expect, result,
1514                         "url2pathname() failed; %s != %s" %
1515                         (expect, result))
1516        given = os.path.join("make sure", "using_quote")
1517        expect = "%s/using_quote" % urllib.parse.quote("make sure")
1518        result = urllib.request.pathname2url(given)
1519        self.assertEqual(expect, result,
1520                         "pathname2url() failed; %s != %s" %
1521                         (expect, result))
1522        given = "make+sure/using_unquote"
1523        expect = os.path.join("make+sure", "using_unquote")
1524        result = urllib.request.url2pathname(given)
1525        self.assertEqual(expect, result,
1526                         "url2pathname() failed; %s != %s" %
1527                         (expect, result))
1528
1529    @unittest.skipUnless(sys.platform == 'win32',
1530                         'test specific to the nturl2path functions.')
1531    def test_prefixes(self):
1532        # Test special prefixes are correctly handled in pathname2url()
1533        given = '\\\\?\\C:\\dir'
1534        expect = '///C:/dir'
1535        result = urllib.request.pathname2url(given)
1536        self.assertEqual(expect, result,
1537                         "pathname2url() failed; %s != %s" %
1538                         (expect, result))
1539        given = '\\\\?\\unc\\server\\share\\dir'
1540        expect = '/server/share/dir'
1541        result = urllib.request.pathname2url(given)
1542        self.assertEqual(expect, result,
1543                         "pathname2url() failed; %s != %s" %
1544                         (expect, result))
1545
1546
1547    @unittest.skipUnless(sys.platform == 'win32',
1548                         'test specific to the urllib.url2path function.')
1549    def test_ntpath(self):
1550        given = ('/C:/', '///C:/', '/C|//')
1551        expect = 'C:\\'
1552        for url in given:
1553            result = urllib.request.url2pathname(url)
1554            self.assertEqual(expect, result,
1555                             'urllib.request..url2pathname() failed; %s != %s' %
1556                             (expect, result))
1557        given = '///C|/path'
1558        expect = 'C:\\path'
1559        result = urllib.request.url2pathname(given)
1560        self.assertEqual(expect, result,
1561                         'urllib.request.url2pathname() failed; %s != %s' %
1562                         (expect, result))
1563
1564class Utility_Tests(unittest.TestCase):
1565    """Testcase to test the various utility functions in the urllib."""
1566
1567    def test_thishost(self):
1568        """Test the urllib.request.thishost utility function returns a tuple"""
1569        self.assertIsInstance(urllib.request.thishost(), tuple)
1570
1571
1572class URLopener_Tests(FakeHTTPMixin, unittest.TestCase):
1573    """Testcase to test the open method of URLopener class."""
1574
1575    def test_quoted_open(self):
1576        class DummyURLopener(urllib.request.URLopener):
1577            def open_spam(self, url):
1578                return url
1579        with warnings_helper.check_warnings(
1580                ('DummyURLopener style of invoking requests is deprecated.',
1581                DeprecationWarning)):
1582            self.assertEqual(DummyURLopener().open(
1583                'spam://example/ /'),'//example/%20/')
1584
1585            # test the safe characters are not quoted by urlopen
1586            self.assertEqual(DummyURLopener().open(
1587                "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1588                "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
1589
1590    @warnings_helper.ignore_warnings(category=DeprecationWarning)
1591    def test_urlopener_retrieve_file(self):
1592        with os_helper.temp_dir() as tmpdir:
1593            fd, tmpfile = tempfile.mkstemp(dir=tmpdir)
1594            os.close(fd)
1595            fileurl = "file:" + urllib.request.pathname2url(tmpfile)
1596            filename, _ = urllib.request.URLopener().retrieve(fileurl)
1597            # Some buildbots have TEMP folder that uses a lowercase drive letter.
1598            self.assertEqual(os.path.normcase(filename), os.path.normcase(tmpfile))
1599
1600    @warnings_helper.ignore_warnings(category=DeprecationWarning)
1601    def test_urlopener_retrieve_remote(self):
1602        url = "http://www.python.org/file.txt"
1603        self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
1604        self.addCleanup(self.unfakehttp)
1605        filename, _ = urllib.request.URLopener().retrieve(url)
1606        self.assertEqual(os.path.splitext(filename)[1], ".txt")
1607
1608    @warnings_helper.ignore_warnings(category=DeprecationWarning)
1609    def test_local_file_open(self):
1610        # bpo-35907, CVE-2019-9948: urllib must reject local_file:// scheme
1611        class DummyURLopener(urllib.request.URLopener):
1612            def open_local_file(self, url):
1613                return url
1614        for url in ('local_file://example', 'local-file://example'):
1615            self.assertRaises(OSError, urllib.request.urlopen, url)
1616            self.assertRaises(OSError, urllib.request.URLopener().open, url)
1617            self.assertRaises(OSError, urllib.request.URLopener().retrieve, url)
1618            self.assertRaises(OSError, DummyURLopener().open, url)
1619            self.assertRaises(OSError, DummyURLopener().retrieve, url)
1620
1621
1622class RequestTests(unittest.TestCase):
1623    """Unit tests for urllib.request.Request."""
1624
1625    def test_default_values(self):
1626        Request = urllib.request.Request
1627        request = Request("http://www.python.org")
1628        self.assertEqual(request.get_method(), 'GET')
1629        request = Request("http://www.python.org", {})
1630        self.assertEqual(request.get_method(), 'POST')
1631
1632    def test_with_method_arg(self):
1633        Request = urllib.request.Request
1634        request = Request("http://www.python.org", method='HEAD')
1635        self.assertEqual(request.method, 'HEAD')
1636        self.assertEqual(request.get_method(), 'HEAD')
1637        request = Request("http://www.python.org", {}, method='HEAD')
1638        self.assertEqual(request.method, 'HEAD')
1639        self.assertEqual(request.get_method(), 'HEAD')
1640        request = Request("http://www.python.org", method='GET')
1641        self.assertEqual(request.get_method(), 'GET')
1642        request.method = 'HEAD'
1643        self.assertEqual(request.get_method(), 'HEAD')
1644
1645
1646class URL2PathNameTests(unittest.TestCase):
1647
1648    def test_converting_drive_letter(self):
1649        self.assertEqual(url2pathname("///C|"), 'C:')
1650        self.assertEqual(url2pathname("///C:"), 'C:')
1651        self.assertEqual(url2pathname("///C|/"), 'C:\\')
1652
1653    def test_converting_when_no_drive_letter(self):
1654        # cannot end a raw string in \
1655        self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\')
1656        self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\')
1657
1658    def test_simple_compare(self):
1659        self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"),
1660                         r'C:\foo\bar\spam.foo')
1661
1662    def test_non_ascii_drive_letter(self):
1663        self.assertRaises(IOError, url2pathname, "///\u00e8|/")
1664
1665    def test_roundtrip_url2pathname(self):
1666        list_of_paths = ['C:',
1667                         r'\\\C\test\\',
1668                         r'C:\foo\bar\spam.foo'
1669                         ]
1670        for path in list_of_paths:
1671            self.assertEqual(url2pathname(pathname2url(path)), path)
1672
1673class PathName2URLTests(unittest.TestCase):
1674
1675    def test_converting_drive_letter(self):
1676        self.assertEqual(pathname2url("C:"), '///C:')
1677        self.assertEqual(pathname2url("C:\\"), '///C:')
1678
1679    def test_converting_when_no_drive_letter(self):
1680        self.assertEqual(pathname2url(r"\\\folder\test" "\\"),
1681                         '/////folder/test/')
1682        self.assertEqual(pathname2url(r"\\folder\test" "\\"),
1683                         '////folder/test/')
1684        self.assertEqual(pathname2url(r"\folder\test" "\\"),
1685                         '/folder/test/')
1686
1687    def test_simple_compare(self):
1688        self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'),
1689                         "///C:/foo/bar/spam.foo" )
1690
1691    def test_long_drive_letter(self):
1692        self.assertRaises(IOError, pathname2url, "XX:\\")
1693
1694    def test_roundtrip_pathname2url(self):
1695        list_of_paths = ['///C:',
1696                         '/////folder/test/',
1697                         '///C:/foo/bar/spam.foo']
1698        for path in list_of_paths:
1699            self.assertEqual(pathname2url(url2pathname(path)), path)
1700
1701if __name__ == '__main__':
1702    unittest.main()
1703