1r"""Test correct treatment of various string literals by the parser.
2
3There are four types of string literals:
4
5    'abc'             -- normal str
6    r'abc'            -- raw str
7    b'xyz'            -- normal bytes
8    br'xyz' | rb'xyz' -- raw bytes
9
10The difference between normal and raw strings is of course that in a
11raw string, \ escapes (while still used to determine the end of the
12literal) are not interpreted, so that r'\x00' contains four
13characters: a backslash, an x, and two zeros; while '\x00' contains a
14single character (code point zero).
15
16The tricky thing is what should happen when non-ASCII bytes are used
17inside literals.  For bytes literals, this is considered illegal.  But
18for str literals, those bytes are supposed to be decoded using the
19encoding declared for the file (UTF-8 by default).
20
21We have to test this with various file encodings.  We also test it with
22exec()/eval(), which uses a different code path.
23
24This file is really about correct treatment of encodings and
25backslashes.  It doesn't concern itself with issues like single
26vs. double quotes or singly- vs. triply-quoted strings: that's dealt
27with elsewhere (I assume).
28"""
29
30import os
31import sys
32import shutil
33import tempfile
34import unittest
35import warnings
36
37
38TEMPLATE = r"""# coding: %s
39a = 'x'
40assert ord(a) == 120
41b = '\x01'
42assert ord(b) == 1
43c = r'\x01'
44assert list(map(ord, c)) == [92, 120, 48, 49]
45d = '\x81'
46assert ord(d) == 0x81
47e = r'\x81'
48assert list(map(ord, e)) == [92, 120, 56, 49]
49f = '\u1881'
50assert ord(f) == 0x1881
51g = r'\u1881'
52assert list(map(ord, g)) == [92, 117, 49, 56, 56, 49]
53h = '\U0001d120'
54assert ord(h) == 0x1d120
55i = r'\U0001d120'
56assert list(map(ord, i)) == [92, 85, 48, 48, 48, 49, 100, 49, 50, 48]
57"""
58
59
60def byte(i):
61    return bytes([i])
62
63
64class TestLiterals(unittest.TestCase):
65
66    from test.support import check_syntax_warning
67
68    def setUp(self):
69        self.save_path = sys.path[:]
70        self.tmpdir = tempfile.mkdtemp()
71        sys.path.insert(0, self.tmpdir)
72
73    def tearDown(self):
74        sys.path[:] = self.save_path
75        shutil.rmtree(self.tmpdir, ignore_errors=True)
76
77    def test_template(self):
78        # Check that the template doesn't contain any non-printables
79        # except for \n.
80        for c in TEMPLATE:
81            assert c == '\n' or ' ' <= c <= '~', repr(c)
82
83    def test_eval_str_normal(self):
84        self.assertEqual(eval(""" 'x' """), 'x')
85        self.assertEqual(eval(r""" '\x01' """), chr(1))
86        self.assertEqual(eval(""" '\x01' """), chr(1))
87        self.assertEqual(eval(r""" '\x81' """), chr(0x81))
88        self.assertEqual(eval(""" '\x81' """), chr(0x81))
89        self.assertEqual(eval(r""" '\u1881' """), chr(0x1881))
90        self.assertEqual(eval(""" '\u1881' """), chr(0x1881))
91        self.assertEqual(eval(r""" '\U0001d120' """), chr(0x1d120))
92        self.assertEqual(eval(""" '\U0001d120' """), chr(0x1d120))
93
94    def test_eval_str_incomplete(self):
95        self.assertRaises(SyntaxError, eval, r""" '\x' """)
96        self.assertRaises(SyntaxError, eval, r""" '\x0' """)
97        self.assertRaises(SyntaxError, eval, r""" '\u' """)
98        self.assertRaises(SyntaxError, eval, r""" '\u0' """)
99        self.assertRaises(SyntaxError, eval, r""" '\u00' """)
100        self.assertRaises(SyntaxError, eval, r""" '\u000' """)
101        self.assertRaises(SyntaxError, eval, r""" '\U' """)
102        self.assertRaises(SyntaxError, eval, r""" '\U0' """)
103        self.assertRaises(SyntaxError, eval, r""" '\U00' """)
104        self.assertRaises(SyntaxError, eval, r""" '\U000' """)
105        self.assertRaises(SyntaxError, eval, r""" '\U0000' """)
106        self.assertRaises(SyntaxError, eval, r""" '\U00000' """)
107        self.assertRaises(SyntaxError, eval, r""" '\U000000' """)
108        self.assertRaises(SyntaxError, eval, r""" '\U0000000' """)
109
110    def test_eval_str_invalid_escape(self):
111        for b in range(1, 128):
112            if b in b"""\n\r"'01234567NU\\abfnrtuvx""":
113                continue
114            with self.assertWarns(DeprecationWarning):
115                self.assertEqual(eval(r"'\%c'" % b), '\\' + chr(b))
116
117        with warnings.catch_warnings(record=True) as w:
118            warnings.simplefilter('always', category=DeprecationWarning)
119            eval("'''\n\\z'''")
120        self.assertEqual(len(w), 1)
121        self.assertEqual(w[0].filename, '<string>')
122        self.assertEqual(w[0].lineno, 1)
123
124        with warnings.catch_warnings(record=True) as w:
125            warnings.simplefilter('error', category=DeprecationWarning)
126            with self.assertRaises(SyntaxError) as cm:
127                eval("'''\n\\z'''")
128            exc = cm.exception
129        self.assertEqual(w, [])
130        self.assertEqual(exc.filename, '<string>')
131        self.assertEqual(exc.lineno, 1)
132
133    def test_eval_str_raw(self):
134        self.assertEqual(eval(""" r'x' """), 'x')
135        self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01')
136        self.assertEqual(eval(""" r'\x01' """), chr(1))
137        self.assertEqual(eval(r""" r'\x81' """), '\\' + 'x81')
138        self.assertEqual(eval(""" r'\x81' """), chr(0x81))
139        self.assertEqual(eval(r""" r'\u1881' """), '\\' + 'u1881')
140        self.assertEqual(eval(""" r'\u1881' """), chr(0x1881))
141        self.assertEqual(eval(r""" r'\U0001d120' """), '\\' + 'U0001d120')
142        self.assertEqual(eval(""" r'\U0001d120' """), chr(0x1d120))
143
144    def test_eval_bytes_normal(self):
145        self.assertEqual(eval(""" b'x' """), b'x')
146        self.assertEqual(eval(r""" b'\x01' """), byte(1))
147        self.assertEqual(eval(""" b'\x01' """), byte(1))
148        self.assertEqual(eval(r""" b'\x81' """), byte(0x81))
149        self.assertRaises(SyntaxError, eval, """ b'\x81' """)
150        self.assertEqual(eval(r""" br'\u1881' """), b'\\' + b'u1881')
151        self.assertRaises(SyntaxError, eval, """ b'\u1881' """)
152        self.assertEqual(eval(r""" br'\U0001d120' """), b'\\' + b'U0001d120')
153        self.assertRaises(SyntaxError, eval, """ b'\U0001d120' """)
154
155    def test_eval_bytes_incomplete(self):
156        self.assertRaises(SyntaxError, eval, r""" b'\x' """)
157        self.assertRaises(SyntaxError, eval, r""" b'\x0' """)
158
159    def test_eval_bytes_invalid_escape(self):
160        for b in range(1, 128):
161            if b in b"""\n\r"'01234567\\abfnrtvx""":
162                continue
163            with self.assertWarns(DeprecationWarning):
164                self.assertEqual(eval(r"b'\%c'" % b), b'\\' + bytes([b]))
165
166        with warnings.catch_warnings(record=True) as w:
167            warnings.simplefilter('always', category=DeprecationWarning)
168            eval("b'''\n\\z'''")
169        self.assertEqual(len(w), 1)
170        self.assertEqual(w[0].filename, '<string>')
171        self.assertEqual(w[0].lineno, 1)
172
173        with warnings.catch_warnings(record=True) as w:
174            warnings.simplefilter('error', category=DeprecationWarning)
175            with self.assertRaises(SyntaxError) as cm:
176                eval("b'''\n\\z'''")
177            exc = cm.exception
178        self.assertEqual(w, [])
179        self.assertEqual(exc.filename, '<string>')
180        self.assertEqual(exc.lineno, 1)
181
182    def test_eval_bytes_raw(self):
183        self.assertEqual(eval(""" br'x' """), b'x')
184        self.assertEqual(eval(""" rb'x' """), b'x')
185        self.assertEqual(eval(r""" br'\x01' """), b'\\' + b'x01')
186        self.assertEqual(eval(r""" rb'\x01' """), b'\\' + b'x01')
187        self.assertEqual(eval(""" br'\x01' """), byte(1))
188        self.assertEqual(eval(""" rb'\x01' """), byte(1))
189        self.assertEqual(eval(r""" br'\x81' """), b"\\" + b"x81")
190        self.assertEqual(eval(r""" rb'\x81' """), b"\\" + b"x81")
191        self.assertRaises(SyntaxError, eval, """ br'\x81' """)
192        self.assertRaises(SyntaxError, eval, """ rb'\x81' """)
193        self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881")
194        self.assertEqual(eval(r""" rb'\u1881' """), b"\\" + b"u1881")
195        self.assertRaises(SyntaxError, eval, """ br'\u1881' """)
196        self.assertRaises(SyntaxError, eval, """ rb'\u1881' """)
197        self.assertEqual(eval(r""" br'\U0001d120' """), b"\\" + b"U0001d120")
198        self.assertEqual(eval(r""" rb'\U0001d120' """), b"\\" + b"U0001d120")
199        self.assertRaises(SyntaxError, eval, """ br'\U0001d120' """)
200        self.assertRaises(SyntaxError, eval, """ rb'\U0001d120' """)
201        self.assertRaises(SyntaxError, eval, """ bb'' """)
202        self.assertRaises(SyntaxError, eval, """ rr'' """)
203        self.assertRaises(SyntaxError, eval, """ brr'' """)
204        self.assertRaises(SyntaxError, eval, """ bbr'' """)
205        self.assertRaises(SyntaxError, eval, """ rrb'' """)
206        self.assertRaises(SyntaxError, eval, """ rbb'' """)
207
208    def test_eval_str_u(self):
209        self.assertEqual(eval(""" u'x' """), 'x')
210        self.assertEqual(eval(""" U'\u00e4' """), 'ä')
211        self.assertEqual(eval(""" u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' """), 'ä')
212        self.assertRaises(SyntaxError, eval, """ ur'' """)
213        self.assertRaises(SyntaxError, eval, """ ru'' """)
214        self.assertRaises(SyntaxError, eval, """ bu'' """)
215        self.assertRaises(SyntaxError, eval, """ ub'' """)
216
217    def check_encoding(self, encoding, extra=""):
218        modname = "xx_" + encoding.replace("-", "_")
219        fn = os.path.join(self.tmpdir, modname + ".py")
220        f = open(fn, "w", encoding=encoding)
221        try:
222            f.write(TEMPLATE % encoding)
223            f.write(extra)
224        finally:
225            f.close()
226        __import__(modname)
227        del sys.modules[modname]
228
229    def test_file_utf_8(self):
230        extra = "z = '\u1234'; assert ord(z) == 0x1234\n"
231        self.check_encoding("utf-8", extra)
232
233    def test_file_utf_8_error(self):
234        extra = "b'\x80'\n"
235        self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra)
236
237    def test_file_utf8(self):
238        self.check_encoding("utf-8")
239
240    def test_file_iso_8859_1(self):
241        self.check_encoding("iso-8859-1")
242
243    def test_file_latin_1(self):
244        self.check_encoding("latin-1")
245
246    def test_file_latin9(self):
247        self.check_encoding("latin9")
248
249
250if __name__ == "__main__":
251    unittest.main()
252