1r"""Test correct treatment of various string literals by the parser.
2
3There are four types of string literals:
4
5    'abc'             -- normal str
6    r'abc'            -- raw str
7    b'xyz'            -- normal bytes
8    br'xyz' | rb'xyz' -- raw bytes
9
10The difference between normal and raw strings is of course that in a
11raw string, \ escapes (while still used to determine the end of the
12literal) are not interpreted, so that r'\x00' contains four
13characters: a backslash, an x, and two zeros; while '\x00' contains a
14single character (code point zero).
15
16The tricky thing is what should happen when non-ASCII bytes are used
17inside literals.  For bytes literals, this is considered illegal.  But
18for str literals, those bytes are supposed to be decoded using the
19encoding declared for the file (UTF-8 by default).
20
21We have to test this with various file encodings.  We also test it with
22exec()/eval(), which uses a different code path.
23
24This file is really about correct treatment of encodings and
25backslashes.  It doesn't concern itself with issues like single
26vs. double quotes or singly- vs. triply-quoted strings: that's dealt
27with elsewhere (I assume).
28"""
29
30import os
31import sys
32import shutil
33import tempfile
34import unittest
35import warnings
36
37
38TEMPLATE = r"""# coding: %s
39a = 'x'
40assert ord(a) == 120
41b = '\x01'
42assert ord(b) == 1
43c = r'\x01'
44assert list(map(ord, c)) == [92, 120, 48, 49]
45d = '\x81'
46assert ord(d) == 0x81
47e = r'\x81'
48assert list(map(ord, e)) == [92, 120, 56, 49]
49f = '\u1881'
50assert ord(f) == 0x1881
51g = r'\u1881'
52assert list(map(ord, g)) == [92, 117, 49, 56, 56, 49]
53h = '\U0001d120'
54assert ord(h) == 0x1d120
55i = r'\U0001d120'
56assert list(map(ord, i)) == [92, 85, 48, 48, 48, 49, 100, 49, 50, 48]
57"""
58
59
60def byte(i):
61    return bytes([i])
62
63
64class TestLiterals(unittest.TestCase):
65
66    def setUp(self):
67        self.save_path = sys.path[:]
68        self.tmpdir = tempfile.mkdtemp()
69        sys.path.insert(0, self.tmpdir)
70
71    def tearDown(self):
72        sys.path[:] = self.save_path
73        shutil.rmtree(self.tmpdir, ignore_errors=True)
74
75    def test_template(self):
76        # Check that the template doesn't contain any non-printables
77        # except for \n.
78        for c in TEMPLATE:
79            assert c == '\n' or ' ' <= c <= '~', repr(c)
80
81    def test_eval_str_normal(self):
82        self.assertEqual(eval(""" 'x' """), 'x')
83        self.assertEqual(eval(r""" '\x01' """), chr(1))
84        self.assertEqual(eval(""" '\x01' """), chr(1))
85        self.assertEqual(eval(r""" '\x81' """), chr(0x81))
86        self.assertEqual(eval(""" '\x81' """), chr(0x81))
87        self.assertEqual(eval(r""" '\u1881' """), chr(0x1881))
88        self.assertEqual(eval(""" '\u1881' """), chr(0x1881))
89        self.assertEqual(eval(r""" '\U0001d120' """), chr(0x1d120))
90        self.assertEqual(eval(""" '\U0001d120' """), chr(0x1d120))
91
92    def test_eval_str_incomplete(self):
93        self.assertRaises(SyntaxError, eval, r""" '\x' """)
94        self.assertRaises(SyntaxError, eval, r""" '\x0' """)
95        self.assertRaises(SyntaxError, eval, r""" '\u' """)
96        self.assertRaises(SyntaxError, eval, r""" '\u0' """)
97        self.assertRaises(SyntaxError, eval, r""" '\u00' """)
98        self.assertRaises(SyntaxError, eval, r""" '\u000' """)
99        self.assertRaises(SyntaxError, eval, r""" '\U' """)
100        self.assertRaises(SyntaxError, eval, r""" '\U0' """)
101        self.assertRaises(SyntaxError, eval, r""" '\U00' """)
102        self.assertRaises(SyntaxError, eval, r""" '\U000' """)
103        self.assertRaises(SyntaxError, eval, r""" '\U0000' """)
104        self.assertRaises(SyntaxError, eval, r""" '\U00000' """)
105        self.assertRaises(SyntaxError, eval, r""" '\U000000' """)
106        self.assertRaises(SyntaxError, eval, r""" '\U0000000' """)
107
108    def test_eval_str_invalid_escape(self):
109        for b in range(1, 128):
110            if b in b"""\n\r"'01234567NU\\abfnrtuvx""":
111                continue
112            with self.assertWarns(DeprecationWarning):
113                self.assertEqual(eval(r"'\%c'" % b), '\\' + chr(b))
114
115        with warnings.catch_warnings(record=True) as w:
116            warnings.simplefilter('always', category=DeprecationWarning)
117            eval("'''\n\\z'''")
118        self.assertEqual(len(w), 1)
119        self.assertEqual(w[0].filename, '<string>')
120        self.assertEqual(w[0].lineno, 1)
121
122        with warnings.catch_warnings(record=True) as w:
123            warnings.simplefilter('error', category=DeprecationWarning)
124            with self.assertRaises(SyntaxError) as cm:
125                eval("'''\n\\z'''")
126            exc = cm.exception
127        self.assertEqual(w, [])
128        self.assertEqual(exc.filename, '<string>')
129        self.assertEqual(exc.lineno, 1)
130        self.assertEqual(exc.offset, 1)
131
132    def test_eval_str_raw(self):
133        self.assertEqual(eval(""" r'x' """), 'x')
134        self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01')
135        self.assertEqual(eval(""" r'\x01' """), chr(1))
136        self.assertEqual(eval(r""" r'\x81' """), '\\' + 'x81')
137        self.assertEqual(eval(""" r'\x81' """), chr(0x81))
138        self.assertEqual(eval(r""" r'\u1881' """), '\\' + 'u1881')
139        self.assertEqual(eval(""" r'\u1881' """), chr(0x1881))
140        self.assertEqual(eval(r""" r'\U0001d120' """), '\\' + 'U0001d120')
141        self.assertEqual(eval(""" r'\U0001d120' """), chr(0x1d120))
142
143    def test_eval_bytes_normal(self):
144        self.assertEqual(eval(""" b'x' """), b'x')
145        self.assertEqual(eval(r""" b'\x01' """), byte(1))
146        self.assertEqual(eval(""" b'\x01' """), byte(1))
147        self.assertEqual(eval(r""" b'\x81' """), byte(0x81))
148        self.assertRaises(SyntaxError, eval, """ b'\x81' """)
149        self.assertEqual(eval(r""" br'\u1881' """), b'\\' + b'u1881')
150        self.assertRaises(SyntaxError, eval, """ b'\u1881' """)
151        self.assertEqual(eval(r""" br'\U0001d120' """), b'\\' + b'U0001d120')
152        self.assertRaises(SyntaxError, eval, """ b'\U0001d120' """)
153
154    def test_eval_bytes_incomplete(self):
155        self.assertRaises(SyntaxError, eval, r""" b'\x' """)
156        self.assertRaises(SyntaxError, eval, r""" b'\x0' """)
157
158    def test_eval_bytes_invalid_escape(self):
159        for b in range(1, 128):
160            if b in b"""\n\r"'01234567\\abfnrtvx""":
161                continue
162            with self.assertWarns(DeprecationWarning):
163                self.assertEqual(eval(r"b'\%c'" % b), b'\\' + bytes([b]))
164
165        with warnings.catch_warnings(record=True) as w:
166            warnings.simplefilter('always', category=DeprecationWarning)
167            eval("b'''\n\\z'''")
168        self.assertEqual(len(w), 1)
169        self.assertEqual(w[0].filename, '<string>')
170        self.assertEqual(w[0].lineno, 1)
171
172        with warnings.catch_warnings(record=True) as w:
173            warnings.simplefilter('error', category=DeprecationWarning)
174            with self.assertRaises(SyntaxError) as cm:
175                eval("b'''\n\\z'''")
176            exc = cm.exception
177        self.assertEqual(w, [])
178        self.assertEqual(exc.filename, '<string>')
179        self.assertEqual(exc.lineno, 1)
180
181    def test_eval_bytes_raw(self):
182        self.assertEqual(eval(""" br'x' """), b'x')
183        self.assertEqual(eval(""" rb'x' """), b'x')
184        self.assertEqual(eval(r""" br'\x01' """), b'\\' + b'x01')
185        self.assertEqual(eval(r""" rb'\x01' """), b'\\' + b'x01')
186        self.assertEqual(eval(""" br'\x01' """), byte(1))
187        self.assertEqual(eval(""" rb'\x01' """), byte(1))
188        self.assertEqual(eval(r""" br'\x81' """), b"\\" + b"x81")
189        self.assertEqual(eval(r""" rb'\x81' """), b"\\" + b"x81")
190        self.assertRaises(SyntaxError, eval, """ br'\x81' """)
191        self.assertRaises(SyntaxError, eval, """ rb'\x81' """)
192        self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881")
193        self.assertEqual(eval(r""" rb'\u1881' """), b"\\" + b"u1881")
194        self.assertRaises(SyntaxError, eval, """ br'\u1881' """)
195        self.assertRaises(SyntaxError, eval, """ rb'\u1881' """)
196        self.assertEqual(eval(r""" br'\U0001d120' """), b"\\" + b"U0001d120")
197        self.assertEqual(eval(r""" rb'\U0001d120' """), b"\\" + b"U0001d120")
198        self.assertRaises(SyntaxError, eval, """ br'\U0001d120' """)
199        self.assertRaises(SyntaxError, eval, """ rb'\U0001d120' """)
200        self.assertRaises(SyntaxError, eval, """ bb'' """)
201        self.assertRaises(SyntaxError, eval, """ rr'' """)
202        self.assertRaises(SyntaxError, eval, """ brr'' """)
203        self.assertRaises(SyntaxError, eval, """ bbr'' """)
204        self.assertRaises(SyntaxError, eval, """ rrb'' """)
205        self.assertRaises(SyntaxError, eval, """ rbb'' """)
206
207    def test_eval_str_u(self):
208        self.assertEqual(eval(""" u'x' """), 'x')
209        self.assertEqual(eval(""" U'\u00e4' """), 'ä')
210        self.assertEqual(eval(""" u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' """), 'ä')
211        self.assertRaises(SyntaxError, eval, """ ur'' """)
212        self.assertRaises(SyntaxError, eval, """ ru'' """)
213        self.assertRaises(SyntaxError, eval, """ bu'' """)
214        self.assertRaises(SyntaxError, eval, """ ub'' """)
215
216    def check_encoding(self, encoding, extra=""):
217        modname = "xx_" + encoding.replace("-", "_")
218        fn = os.path.join(self.tmpdir, modname + ".py")
219        f = open(fn, "w", encoding=encoding)
220        try:
221            f.write(TEMPLATE % encoding)
222            f.write(extra)
223        finally:
224            f.close()
225        __import__(modname)
226        del sys.modules[modname]
227
228    def test_file_utf_8(self):
229        extra = "z = '\u1234'; assert ord(z) == 0x1234\n"
230        self.check_encoding("utf-8", extra)
231
232    def test_file_utf_8_error(self):
233        extra = "b'\x80'\n"
234        self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra)
235
236    def test_file_utf8(self):
237        self.check_encoding("utf-8")
238
239    def test_file_iso_8859_1(self):
240        self.check_encoding("iso-8859-1")
241
242    def test_file_latin_1(self):
243        self.check_encoding("latin-1")
244
245    def test_file_latin9(self):
246        self.check_encoding("latin9")
247
248
249if __name__ == "__main__":
250    unittest.main()
251