1"""
2Test the implementation of the PEP 540: the UTF-8 Mode.
3"""
4
5import locale
6import subprocess
7import sys
8import textwrap
9import unittest
10from test import support
11from test.support.script_helper import assert_python_ok, assert_python_failure
12from test.support import os_helper
13
14
15MS_WINDOWS = (sys.platform == 'win32')
16POSIX_LOCALES = ('C', 'POSIX')
17VXWORKS = (sys.platform == "vxworks")
18
19class UTF8ModeTests(unittest.TestCase):
20    DEFAULT_ENV = {
21        'PYTHONUTF8': '',
22        'PYTHONLEGACYWINDOWSFSENCODING': '',
23        'PYTHONCOERCECLOCALE': '0',
24    }
25
26    def posix_locale(self):
27        loc = locale.setlocale(locale.LC_CTYPE, None)
28        return (loc in POSIX_LOCALES)
29
30    def get_output(self, *args, failure=False, **kw):
31        kw = dict(self.DEFAULT_ENV, **kw)
32        if failure:
33            out = assert_python_failure(*args, **kw)
34            out = out[2]
35        else:
36            out = assert_python_ok(*args, **kw)
37            out = out[1]
38        return out.decode().rstrip("\n\r")
39
40    @unittest.skipIf(MS_WINDOWS, 'Windows has no POSIX locale')
41    def test_posix_locale(self):
42        code = 'import sys; print(sys.flags.utf8_mode)'
43
44        for loc in POSIX_LOCALES:
45            with self.subTest(LC_ALL=loc):
46                out = self.get_output('-c', code, LC_ALL=loc)
47                self.assertEqual(out, '1')
48
49    def test_xoption(self):
50        code = 'import sys; print(sys.flags.utf8_mode)'
51
52        out = self.get_output('-X', 'utf8', '-c', code)
53        self.assertEqual(out, '1')
54
55        # undocumented but accepted syntax: -X utf8=1
56        out = self.get_output('-X', 'utf8=1', '-c', code)
57        self.assertEqual(out, '1')
58
59        out = self.get_output('-X', 'utf8=0', '-c', code)
60        self.assertEqual(out, '0')
61
62        if MS_WINDOWS:
63            # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 Mode
64            # and has the priority over -X utf8
65            out = self.get_output('-X', 'utf8', '-c', code,
66                                  PYTHONLEGACYWINDOWSFSENCODING='1')
67            self.assertEqual(out, '0')
68
69    def test_env_var(self):
70        code = 'import sys; print(sys.flags.utf8_mode)'
71
72        out = self.get_output('-c', code, PYTHONUTF8='1')
73        self.assertEqual(out, '1')
74
75        out = self.get_output('-c', code, PYTHONUTF8='0')
76        self.assertEqual(out, '0')
77
78        # -X utf8 has the priority over PYTHONUTF8
79        out = self.get_output('-X', 'utf8=0', '-c', code, PYTHONUTF8='1')
80        self.assertEqual(out, '0')
81
82        if MS_WINDOWS:
83            # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode
84            # and has the priority over PYTHONUTF8
85            out = self.get_output('-X', 'utf8', '-c', code, PYTHONUTF8='1',
86                                  PYTHONLEGACYWINDOWSFSENCODING='1')
87            self.assertEqual(out, '0')
88
89        # Cannot test with the POSIX locale, since the POSIX locale enables
90        # the UTF-8 mode
91        if not self.posix_locale():
92            # PYTHONUTF8 should be ignored if -E is used
93            out = self.get_output('-E', '-c', code, PYTHONUTF8='1')
94            self.assertEqual(out, '0')
95
96        # invalid mode
97        out = self.get_output('-c', code, PYTHONUTF8='xxx', failure=True)
98        self.assertIn('invalid PYTHONUTF8 environment variable value',
99                      out.rstrip())
100
101    def test_filesystemencoding(self):
102        code = textwrap.dedent('''
103            import sys
104            print("{}/{}".format(sys.getfilesystemencoding(),
105                                 sys.getfilesystemencodeerrors()))
106        ''')
107
108        if MS_WINDOWS:
109            expected = 'utf-8/surrogatepass'
110        else:
111            expected = 'utf-8/surrogateescape'
112
113        out = self.get_output('-X', 'utf8', '-c', code)
114        self.assertEqual(out, expected)
115
116        if MS_WINDOWS:
117            # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode
118            # and has the priority over -X utf8 and PYTHONUTF8
119            out = self.get_output('-X', 'utf8', '-c', code,
120                                  PYTHONUTF8='strict',
121                                  PYTHONLEGACYWINDOWSFSENCODING='1')
122            self.assertEqual(out, 'mbcs/replace')
123
124    def test_stdio(self):
125        code = textwrap.dedent('''
126            import sys
127            print(f"stdin: {sys.stdin.encoding}/{sys.stdin.errors}")
128            print(f"stdout: {sys.stdout.encoding}/{sys.stdout.errors}")
129            print(f"stderr: {sys.stderr.encoding}/{sys.stderr.errors}")
130        ''')
131
132        out = self.get_output('-X', 'utf8', '-c', code,
133                              PYTHONIOENCODING='')
134        self.assertEqual(out.splitlines(),
135                         ['stdin: utf-8/surrogateescape',
136                          'stdout: utf-8/surrogateescape',
137                          'stderr: utf-8/backslashreplace'])
138
139        # PYTHONIOENCODING has the priority over PYTHONUTF8
140        out = self.get_output('-X', 'utf8', '-c', code,
141                              PYTHONIOENCODING="latin1")
142        self.assertEqual(out.splitlines(),
143                         ['stdin: iso8859-1/strict',
144                          'stdout: iso8859-1/strict',
145                          'stderr: iso8859-1/backslashreplace'])
146
147        out = self.get_output('-X', 'utf8', '-c', code,
148                              PYTHONIOENCODING=":namereplace")
149        self.assertEqual(out.splitlines(),
150                         ['stdin: utf-8/namereplace',
151                          'stdout: utf-8/namereplace',
152                          'stderr: utf-8/backslashreplace'])
153
154    def test_io(self):
155        code = textwrap.dedent('''
156            import sys
157            filename = sys.argv[1]
158            with open(filename) as fp:
159                print(f"{fp.encoding}/{fp.errors}")
160        ''')
161        filename = __file__
162
163        out = self.get_output('-c', code, filename, PYTHONUTF8='1')
164        self.assertEqual(out, 'UTF-8/strict')
165
166    def _check_io_encoding(self, module, encoding=None, errors=None):
167        filename = __file__
168
169        # Encoding explicitly set
170        args = []
171        if encoding:
172            args.append(f'encoding={encoding!r}')
173        if errors:
174            args.append(f'errors={errors!r}')
175        code = textwrap.dedent('''
176            import sys
177            from %s import open
178            filename = sys.argv[1]
179            with open(filename, %s) as fp:
180                print(f"{fp.encoding}/{fp.errors}")
181        ''') % (module, ', '.join(args))
182        out = self.get_output('-c', code, filename,
183                              PYTHONUTF8='1')
184
185        if not encoding:
186            encoding = 'UTF-8'
187        if not errors:
188            errors = 'strict'
189        self.assertEqual(out, f'{encoding}/{errors}')
190
191    def check_io_encoding(self, module):
192        self._check_io_encoding(module, encoding="latin1")
193        self._check_io_encoding(module, errors="namereplace")
194        self._check_io_encoding(module,
195                                encoding="latin1", errors="namereplace")
196
197    def test_io_encoding(self):
198        self.check_io_encoding('io')
199
200    def test_pyio_encoding(self):
201        self.check_io_encoding('_pyio')
202
203    def test_locale_getpreferredencoding(self):
204        code = 'import locale; print(locale.getpreferredencoding(False), locale.getpreferredencoding(True))'
205        out = self.get_output('-X', 'utf8', '-c', code)
206        self.assertEqual(out, 'UTF-8 UTF-8')
207
208        for loc in POSIX_LOCALES:
209            with self.subTest(LC_ALL=loc):
210                out = self.get_output('-X', 'utf8', '-c', code, LC_ALL=loc)
211                self.assertEqual(out, 'UTF-8 UTF-8')
212
213    @unittest.skipIf(MS_WINDOWS, 'test specific to Unix')
214    def test_cmd_line(self):
215        arg = 'h\xe9\u20ac'.encode('utf-8')
216        arg_utf8 = arg.decode('utf-8')
217        arg_ascii = arg.decode('ascii', 'surrogateescape')
218        code = 'import locale, sys; print("%s:%s" % (locale.getpreferredencoding(), ascii(sys.argv[1:])))'
219
220        def check(utf8_opt, expected, **kw):
221            out = self.get_output('-X', utf8_opt, '-c', code, arg, **kw)
222            args = out.partition(':')[2].rstrip()
223            self.assertEqual(args, ascii(expected), out)
224
225        check('utf8', [arg_utf8])
226        for loc in POSIX_LOCALES:
227            with self.subTest(LC_ALL=loc):
228                check('utf8', [arg_utf8], LC_ALL=loc)
229
230        if sys.platform == 'darwin' or support.is_android or VXWORKS:
231            c_arg = arg_utf8
232        elif sys.platform.startswith("aix"):
233            c_arg = arg.decode('iso-8859-1')
234        else:
235            c_arg = arg_ascii
236        for loc in POSIX_LOCALES:
237            with self.subTest(LC_ALL=loc):
238                check('utf8=0', [c_arg], LC_ALL=loc)
239
240    def test_optim_level(self):
241        # CPython: check that Py_Main() doesn't increment Py_OptimizeFlag
242        # twice when -X utf8 requires to parse the configuration twice (when
243        # the encoding changes after reading the configuration, the
244        # configuration is read again with the new encoding).
245        code = 'import sys; print(sys.flags.optimize)'
246        out = self.get_output('-X', 'utf8', '-O', '-c', code)
247        self.assertEqual(out, '1')
248        out = self.get_output('-X', 'utf8', '-OO', '-c', code)
249        self.assertEqual(out, '2')
250
251        code = 'import sys; print(sys.flags.ignore_environment)'
252        out = self.get_output('-X', 'utf8', '-E', '-c', code)
253        self.assertEqual(out, '1')
254
255    @unittest.skipIf(MS_WINDOWS,
256                     "os.device_encoding() doesn't implement "
257                     "the UTF-8 Mode on Windows")
258    def test_device_encoding(self):
259        # Use stdout as TTY
260        if not sys.stdout.isatty():
261            self.skipTest("sys.stdout is not a TTY")
262
263        filename = 'out.txt'
264        self.addCleanup(os_helper.unlink, filename)
265
266        code = (f'import os, sys; fd = sys.stdout.fileno(); '
267                f'out = open({filename!r}, "w", encoding="utf-8"); '
268                f'print(os.isatty(fd), os.device_encoding(fd), file=out); '
269                f'out.close()')
270        cmd = [sys.executable, '-X', 'utf8', '-c', code]
271        # The stdout TTY is inherited to the child process
272        proc = subprocess.run(cmd, text=True)
273        self.assertEqual(proc.returncode, 0, proc)
274
275        # In UTF-8 Mode, device_encoding(fd) returns "UTF-8" if fd is a TTY
276        with open(filename, encoding="utf8") as fp:
277            out = fp.read().rstrip()
278        self.assertEqual(out, 'True UTF-8')
279
280
281if __name__ == "__main__":
282    unittest.main()
283