1""" 2Test the implementation of the PEP 540: the UTF-8 Mode. 3""" 4 5import locale 6import sys 7import textwrap 8import unittest 9from test import support 10from test.support.script_helper import assert_python_ok, assert_python_failure 11 12 13MS_WINDOWS = (sys.platform == 'win32') 14POSIX_LOCALES = ('C', 'POSIX') 15VXWORKS = (sys.platform == "vxworks") 16 17class UTF8ModeTests(unittest.TestCase): 18 DEFAULT_ENV = { 19 'PYTHONUTF8': '', 20 'PYTHONLEGACYWINDOWSFSENCODING': '', 21 'PYTHONCOERCECLOCALE': '0', 22 } 23 24 def posix_locale(self): 25 loc = locale.setlocale(locale.LC_CTYPE, None) 26 return (loc in POSIX_LOCALES) 27 28 def get_output(self, *args, failure=False, **kw): 29 kw = dict(self.DEFAULT_ENV, **kw) 30 if failure: 31 out = assert_python_failure(*args, **kw) 32 out = out[2] 33 else: 34 out = assert_python_ok(*args, **kw) 35 out = out[1] 36 return out.decode().rstrip("\n\r") 37 38 @unittest.skipIf(MS_WINDOWS, 'Windows has no POSIX locale') 39 def test_posix_locale(self): 40 code = 'import sys; print(sys.flags.utf8_mode)' 41 42 for loc in POSIX_LOCALES: 43 with self.subTest(LC_ALL=loc): 44 out = self.get_output('-c', code, LC_ALL=loc) 45 self.assertEqual(out, '1') 46 47 def test_xoption(self): 48 code = 'import sys; print(sys.flags.utf8_mode)' 49 50 out = self.get_output('-X', 'utf8', '-c', code) 51 self.assertEqual(out, '1') 52 53 # undocumented but accepted syntax: -X utf8=1 54 out = self.get_output('-X', 'utf8=1', '-c', code) 55 self.assertEqual(out, '1') 56 57 out = self.get_output('-X', 'utf8=0', '-c', code) 58 self.assertEqual(out, '0') 59 60 if MS_WINDOWS: 61 # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 Mode 62 # and has the priority over -X utf8 63 out = self.get_output('-X', 'utf8', '-c', code, 64 PYTHONLEGACYWINDOWSFSENCODING='1') 65 self.assertEqual(out, '0') 66 67 def test_env_var(self): 68 code = 'import sys; print(sys.flags.utf8_mode)' 69 70 out = self.get_output('-c', code, PYTHONUTF8='1') 71 self.assertEqual(out, '1') 72 73 out = self.get_output('-c', code, PYTHONUTF8='0') 74 self.assertEqual(out, '0') 75 76 # -X utf8 has the priority over PYTHONUTF8 77 out = self.get_output('-X', 'utf8=0', '-c', code, PYTHONUTF8='1') 78 self.assertEqual(out, '0') 79 80 if MS_WINDOWS: 81 # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode 82 # and has the priority over PYTHONUTF8 83 out = self.get_output('-X', 'utf8', '-c', code, PYTHONUTF8='1', 84 PYTHONLEGACYWINDOWSFSENCODING='1') 85 self.assertEqual(out, '0') 86 87 # Cannot test with the POSIX locale, since the POSIX locale enables 88 # the UTF-8 mode 89 if not self.posix_locale(): 90 # PYTHONUTF8 should be ignored if -E is used 91 out = self.get_output('-E', '-c', code, PYTHONUTF8='1') 92 self.assertEqual(out, '0') 93 94 # invalid mode 95 out = self.get_output('-c', code, PYTHONUTF8='xxx', failure=True) 96 self.assertIn('invalid PYTHONUTF8 environment variable value', 97 out.rstrip()) 98 99 def test_filesystemencoding(self): 100 code = textwrap.dedent(''' 101 import sys 102 print("{}/{}".format(sys.getfilesystemencoding(), 103 sys.getfilesystemencodeerrors())) 104 ''') 105 106 if MS_WINDOWS: 107 expected = 'utf-8/surrogatepass' 108 else: 109 expected = 'utf-8/surrogateescape' 110 111 out = self.get_output('-X', 'utf8', '-c', code) 112 self.assertEqual(out, expected) 113 114 if MS_WINDOWS: 115 # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode 116 # and has the priority over -X utf8 and PYTHONUTF8 117 out = self.get_output('-X', 'utf8', '-c', code, 118 PYTHONUTF8='strict', 119 PYTHONLEGACYWINDOWSFSENCODING='1') 120 self.assertEqual(out, 'mbcs/replace') 121 122 def test_stdio(self): 123 code = textwrap.dedent(''' 124 import sys 125 print(f"stdin: {sys.stdin.encoding}/{sys.stdin.errors}") 126 print(f"stdout: {sys.stdout.encoding}/{sys.stdout.errors}") 127 print(f"stderr: {sys.stderr.encoding}/{sys.stderr.errors}") 128 ''') 129 130 out = self.get_output('-X', 'utf8', '-c', code, 131 PYTHONIOENCODING='') 132 self.assertEqual(out.splitlines(), 133 ['stdin: utf-8/surrogateescape', 134 'stdout: utf-8/surrogateescape', 135 'stderr: utf-8/backslashreplace']) 136 137 # PYTHONIOENCODING has the priority over PYTHONUTF8 138 out = self.get_output('-X', 'utf8', '-c', code, 139 PYTHONIOENCODING="latin1") 140 self.assertEqual(out.splitlines(), 141 ['stdin: iso8859-1/strict', 142 'stdout: iso8859-1/strict', 143 'stderr: iso8859-1/backslashreplace']) 144 145 out = self.get_output('-X', 'utf8', '-c', code, 146 PYTHONIOENCODING=":namereplace") 147 self.assertEqual(out.splitlines(), 148 ['stdin: utf-8/namereplace', 149 'stdout: utf-8/namereplace', 150 'stderr: utf-8/backslashreplace']) 151 152 def test_io(self): 153 code = textwrap.dedent(''' 154 import sys 155 filename = sys.argv[1] 156 with open(filename) as fp: 157 print(f"{fp.encoding}/{fp.errors}") 158 ''') 159 filename = __file__ 160 161 out = self.get_output('-c', code, filename, PYTHONUTF8='1') 162 self.assertEqual(out, 'UTF-8/strict') 163 164 def _check_io_encoding(self, module, encoding=None, errors=None): 165 filename = __file__ 166 167 # Encoding explicitly set 168 args = [] 169 if encoding: 170 args.append(f'encoding={encoding!r}') 171 if errors: 172 args.append(f'errors={errors!r}') 173 code = textwrap.dedent(''' 174 import sys 175 from %s import open 176 filename = sys.argv[1] 177 with open(filename, %s) as fp: 178 print(f"{fp.encoding}/{fp.errors}") 179 ''') % (module, ', '.join(args)) 180 out = self.get_output('-c', code, filename, 181 PYTHONUTF8='1') 182 183 if not encoding: 184 encoding = 'UTF-8' 185 if not errors: 186 errors = 'strict' 187 self.assertEqual(out, f'{encoding}/{errors}') 188 189 def check_io_encoding(self, module): 190 self._check_io_encoding(module, encoding="latin1") 191 self._check_io_encoding(module, errors="namereplace") 192 self._check_io_encoding(module, 193 encoding="latin1", errors="namereplace") 194 195 def test_io_encoding(self): 196 self.check_io_encoding('io') 197 198 def test_pyio_encoding(self): 199 self.check_io_encoding('_pyio') 200 201 def test_locale_getpreferredencoding(self): 202 code = 'import locale; print(locale.getpreferredencoding(False), locale.getpreferredencoding(True))' 203 out = self.get_output('-X', 'utf8', '-c', code) 204 self.assertEqual(out, 'UTF-8 UTF-8') 205 206 for loc in POSIX_LOCALES: 207 with self.subTest(LC_ALL=loc): 208 out = self.get_output('-X', 'utf8', '-c', code, LC_ALL=loc) 209 self.assertEqual(out, 'UTF-8 UTF-8') 210 211 @unittest.skipIf(MS_WINDOWS, 'test specific to Unix') 212 def test_cmd_line(self): 213 arg = 'h\xe9\u20ac'.encode('utf-8') 214 arg_utf8 = arg.decode('utf-8') 215 arg_ascii = arg.decode('ascii', 'surrogateescape') 216 code = 'import locale, sys; print("%s:%s" % (locale.getpreferredencoding(), ascii(sys.argv[1:])))' 217 218 def check(utf8_opt, expected, **kw): 219 out = self.get_output('-X', utf8_opt, '-c', code, arg, **kw) 220 args = out.partition(':')[2].rstrip() 221 self.assertEqual(args, ascii(expected), out) 222 223 check('utf8', [arg_utf8]) 224 for loc in POSIX_LOCALES: 225 with self.subTest(LC_ALL=loc): 226 check('utf8', [arg_utf8], LC_ALL=loc) 227 228 if sys.platform == 'darwin' or support.is_android or VXWORKS: 229 c_arg = arg_utf8 230 elif sys.platform.startswith("aix"): 231 c_arg = arg.decode('iso-8859-1') 232 else: 233 c_arg = arg_ascii 234 for loc in POSIX_LOCALES: 235 with self.subTest(LC_ALL=loc): 236 check('utf8=0', [c_arg], LC_ALL=loc) 237 238 def test_optim_level(self): 239 # CPython: check that Py_Main() doesn't increment Py_OptimizeFlag 240 # twice when -X utf8 requires to parse the configuration twice (when 241 # the encoding changes after reading the configuration, the 242 # configuration is read again with the new encoding). 243 code = 'import sys; print(sys.flags.optimize)' 244 out = self.get_output('-X', 'utf8', '-O', '-c', code) 245 self.assertEqual(out, '1') 246 out = self.get_output('-X', 'utf8', '-OO', '-c', code) 247 self.assertEqual(out, '2') 248 249 code = 'import sys; print(sys.flags.ignore_environment)' 250 out = self.get_output('-X', 'utf8', '-E', '-c', code) 251 self.assertEqual(out, '1') 252 253 254if __name__ == "__main__": 255 unittest.main() 256