1from test import test_support
2from tokenize import (untokenize, generate_tokens, NUMBER, NAME, OP,
3                     STRING, ENDMARKER, tok_name, Untokenizer, tokenize)
4from StringIO import StringIO
5import os
6from unittest import TestCase
7
8
9class TokenizeTest(TestCase):
10    # Tests for the tokenize module.
11
12    # The tests can be really simple. Given a small fragment of source
13    # code, print out a table with tokens. The ENDMARKER is omitted for
14    # brevity.
15
16    def check_tokenize(self, s, expected):
17        # Format the tokens in s in a table format.
18        # The ENDMARKER is omitted.
19        result = []
20        f = StringIO(s)
21        for type, token, start, end, line in generate_tokens(f.readline):
22            if type == ENDMARKER:
23                break
24            type = tok_name[type]
25            result.append("    %(type)-10.10s %(token)-13.13r %(start)s %(end)s" %
26                          locals())
27        self.assertEqual(result,
28                         expected.rstrip().splitlines())
29
30
31    def test_basic(self):
32        self.check_tokenize("1 + 1", """\
33    NUMBER     '1'           (1, 0) (1, 1)
34    OP         '+'           (1, 2) (1, 3)
35    NUMBER     '1'           (1, 4) (1, 5)
36    """)
37        self.check_tokenize("if False:\n"
38                            "    # NL\n"
39                            "    True = False # NEWLINE\n", """\
40    NAME       'if'          (1, 0) (1, 2)
41    NAME       'False'       (1, 3) (1, 8)
42    OP         ':'           (1, 8) (1, 9)
43    NEWLINE    '\\n'          (1, 9) (1, 10)
44    COMMENT    '# NL'        (2, 4) (2, 8)
45    NL         '\\n'          (2, 8) (2, 9)
46    INDENT     '    '        (3, 0) (3, 4)
47    NAME       'True'        (3, 4) (3, 8)
48    OP         '='           (3, 9) (3, 10)
49    NAME       'False'       (3, 11) (3, 16)
50    COMMENT    '# NEWLINE'   (3, 17) (3, 26)
51    NEWLINE    '\\n'          (3, 26) (3, 27)
52    DEDENT     ''            (4, 0) (4, 0)
53    """)
54
55        indent_error_file = """\
56def k(x):
57    x += 2
58  x += 5
59"""
60        with self.assertRaisesRegexp(IndentationError,
61                                     "unindent does not match any "
62                                     "outer indentation level"):
63            for tok in generate_tokens(StringIO(indent_error_file).readline):
64                pass
65
66    def test_int(self):
67        # Ordinary integers and binary operators
68        self.check_tokenize("0xff <= 255", """\
69    NUMBER     '0xff'        (1, 0) (1, 4)
70    OP         '<='          (1, 5) (1, 7)
71    NUMBER     '255'         (1, 8) (1, 11)
72    """)
73        self.check_tokenize("0b10 <= 255", """\
74    NUMBER     '0b10'        (1, 0) (1, 4)
75    OP         '<='          (1, 5) (1, 7)
76    NUMBER     '255'         (1, 8) (1, 11)
77    """)
78        self.check_tokenize("0o123 <= 0123", """\
79    NUMBER     '0o123'       (1, 0) (1, 5)
80    OP         '<='          (1, 6) (1, 8)
81    NUMBER     '0123'        (1, 9) (1, 13)
82    """)
83        self.check_tokenize("01234567 > ~0x15", """\
84    NUMBER     '01234567'    (1, 0) (1, 8)
85    OP         '>'           (1, 9) (1, 10)
86    OP         '~'           (1, 11) (1, 12)
87    NUMBER     '0x15'        (1, 12) (1, 16)
88    """)
89        self.check_tokenize("2134568 != 01231515", """\
90    NUMBER     '2134568'     (1, 0) (1, 7)
91    OP         '!='          (1, 8) (1, 10)
92    NUMBER     '01231515'    (1, 11) (1, 19)
93    """)
94        self.check_tokenize("(-124561-1) & 0200000000", """\
95    OP         '('           (1, 0) (1, 1)
96    OP         '-'           (1, 1) (1, 2)
97    NUMBER     '124561'      (1, 2) (1, 8)
98    OP         '-'           (1, 8) (1, 9)
99    NUMBER     '1'           (1, 9) (1, 10)
100    OP         ')'           (1, 10) (1, 11)
101    OP         '&'           (1, 12) (1, 13)
102    NUMBER     '0200000000'  (1, 14) (1, 24)
103    """)
104        self.check_tokenize("0xdeadbeef != -1", """\
105    NUMBER     '0xdeadbeef'  (1, 0) (1, 10)
106    OP         '!='          (1, 11) (1, 13)
107    OP         '-'           (1, 14) (1, 15)
108    NUMBER     '1'           (1, 15) (1, 16)
109    """)
110        self.check_tokenize("0xdeadc0de & 012345", """\
111    NUMBER     '0xdeadc0de'  (1, 0) (1, 10)
112    OP         '&'           (1, 11) (1, 12)
113    NUMBER     '012345'      (1, 13) (1, 19)
114    """)
115        self.check_tokenize("0xFF & 0x15 | 1234", """\
116    NUMBER     '0xFF'        (1, 0) (1, 4)
117    OP         '&'           (1, 5) (1, 6)
118    NUMBER     '0x15'        (1, 7) (1, 11)
119    OP         '|'           (1, 12) (1, 13)
120    NUMBER     '1234'        (1, 14) (1, 18)
121    """)
122
123    def test_long(self):
124        # Long integers
125        self.check_tokenize("x = 0L", """\
126    NAME       'x'           (1, 0) (1, 1)
127    OP         '='           (1, 2) (1, 3)
128    NUMBER     '0L'          (1, 4) (1, 6)
129    """)
130        self.check_tokenize("x = 0xfffffffffff", """\
131    NAME       'x'           (1, 0) (1, 1)
132    OP         '='           (1, 2) (1, 3)
133    NUMBER     '0xffffffffff (1, 4) (1, 17)
134    """)
135        self.check_tokenize("x = 123141242151251616110l", """\
136    NAME       'x'           (1, 0) (1, 1)
137    OP         '='           (1, 2) (1, 3)
138    NUMBER     '123141242151 (1, 4) (1, 26)
139    """)
140        self.check_tokenize("x = -15921590215012591L", """\
141    NAME       'x'           (1, 0) (1, 1)
142    OP         '='           (1, 2) (1, 3)
143    OP         '-'           (1, 4) (1, 5)
144    NUMBER     '159215902150 (1, 5) (1, 23)
145    """)
146
147    def test_float(self):
148        # Floating point numbers
149        self.check_tokenize("x = 3.14159", """\
150    NAME       'x'           (1, 0) (1, 1)
151    OP         '='           (1, 2) (1, 3)
152    NUMBER     '3.14159'     (1, 4) (1, 11)
153    """)
154        self.check_tokenize("x = 314159.", """\
155    NAME       'x'           (1, 0) (1, 1)
156    OP         '='           (1, 2) (1, 3)
157    NUMBER     '314159.'     (1, 4) (1, 11)
158    """)
159        self.check_tokenize("x = .314159", """\
160    NAME       'x'           (1, 0) (1, 1)
161    OP         '='           (1, 2) (1, 3)
162    NUMBER     '.314159'     (1, 4) (1, 11)
163    """)
164        self.check_tokenize("x = 3e14159", """\
165    NAME       'x'           (1, 0) (1, 1)
166    OP         '='           (1, 2) (1, 3)
167    NUMBER     '3e14159'     (1, 4) (1, 11)
168    """)
169        self.check_tokenize("x = 3E123", """\
170    NAME       'x'           (1, 0) (1, 1)
171    OP         '='           (1, 2) (1, 3)
172    NUMBER     '3E123'       (1, 4) (1, 9)
173    """)
174        self.check_tokenize("x+y = 3e-1230", """\
175    NAME       'x'           (1, 0) (1, 1)
176    OP         '+'           (1, 1) (1, 2)
177    NAME       'y'           (1, 2) (1, 3)
178    OP         '='           (1, 4) (1, 5)
179    NUMBER     '3e-1230'     (1, 6) (1, 13)
180    """)
181        self.check_tokenize("x = 3.14e159", """\
182    NAME       'x'           (1, 0) (1, 1)
183    OP         '='           (1, 2) (1, 3)
184    NUMBER     '3.14e159'    (1, 4) (1, 12)
185    """)
186
187    def test_string(self):
188        # String literals
189        self.check_tokenize("x = ''; y = \"\"", """\
190    NAME       'x'           (1, 0) (1, 1)
191    OP         '='           (1, 2) (1, 3)
192    STRING     "''"          (1, 4) (1, 6)
193    OP         ';'           (1, 6) (1, 7)
194    NAME       'y'           (1, 8) (1, 9)
195    OP         '='           (1, 10) (1, 11)
196    STRING     '""'          (1, 12) (1, 14)
197    """)
198        self.check_tokenize("x = '\"'; y = \"'\"", """\
199    NAME       'x'           (1, 0) (1, 1)
200    OP         '='           (1, 2) (1, 3)
201    STRING     '\\'"\\''       (1, 4) (1, 7)
202    OP         ';'           (1, 7) (1, 8)
203    NAME       'y'           (1, 9) (1, 10)
204    OP         '='           (1, 11) (1, 12)
205    STRING     '"\\'"'        (1, 13) (1, 16)
206    """)
207        self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\
208    NAME       'x'           (1, 0) (1, 1)
209    OP         '='           (1, 2) (1, 3)
210    STRING     '"doesn\\'t "' (1, 4) (1, 14)
211    NAME       'shrink'      (1, 14) (1, 20)
212    STRING     '", does it"' (1, 20) (1, 31)
213    """)
214        self.check_tokenize("x = u'abc' + U'ABC'", """\
215    NAME       'x'           (1, 0) (1, 1)
216    OP         '='           (1, 2) (1, 3)
217    STRING     "u'abc'"      (1, 4) (1, 10)
218    OP         '+'           (1, 11) (1, 12)
219    STRING     "U'ABC'"      (1, 13) (1, 19)
220    """)
221        self.check_tokenize('y = u"ABC" + U"ABC"', """\
222    NAME       'y'           (1, 0) (1, 1)
223    OP         '='           (1, 2) (1, 3)
224    STRING     'u"ABC"'      (1, 4) (1, 10)
225    OP         '+'           (1, 11) (1, 12)
226    STRING     'U"ABC"'      (1, 13) (1, 19)
227    """)
228        self.check_tokenize("x = ur'abc' + Ur'ABC' + uR'ABC' + UR'ABC'", """\
229    NAME       'x'           (1, 0) (1, 1)
230    OP         '='           (1, 2) (1, 3)
231    STRING     "ur'abc'"     (1, 4) (1, 11)
232    OP         '+'           (1, 12) (1, 13)
233    STRING     "Ur'ABC'"     (1, 14) (1, 21)
234    OP         '+'           (1, 22) (1, 23)
235    STRING     "uR'ABC'"     (1, 24) (1, 31)
236    OP         '+'           (1, 32) (1, 33)
237    STRING     "UR'ABC'"     (1, 34) (1, 41)
238    """)
239        self.check_tokenize('y = ur"abc" + Ur"ABC" + uR"ABC" + UR"ABC"', """\
240    NAME       'y'           (1, 0) (1, 1)
241    OP         '='           (1, 2) (1, 3)
242    STRING     'ur"abc"'     (1, 4) (1, 11)
243    OP         '+'           (1, 12) (1, 13)
244    STRING     'Ur"ABC"'     (1, 14) (1, 21)
245    OP         '+'           (1, 22) (1, 23)
246    STRING     'uR"ABC"'     (1, 24) (1, 31)
247    OP         '+'           (1, 32) (1, 33)
248    STRING     'UR"ABC"'     (1, 34) (1, 41)
249
250    """)
251        self.check_tokenize("b'abc' + B'abc'", """\
252    STRING     "b'abc'"      (1, 0) (1, 6)
253    OP         '+'           (1, 7) (1, 8)
254    STRING     "B'abc'"      (1, 9) (1, 15)
255    """)
256        self.check_tokenize('b"abc" + B"abc"', """\
257    STRING     'b"abc"'      (1, 0) (1, 6)
258    OP         '+'           (1, 7) (1, 8)
259    STRING     'B"abc"'      (1, 9) (1, 15)
260    """)
261        self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\
262    STRING     "br'abc'"     (1, 0) (1, 7)
263    OP         '+'           (1, 8) (1, 9)
264    STRING     "bR'abc'"     (1, 10) (1, 17)
265    OP         '+'           (1, 18) (1, 19)
266    STRING     "Br'abc'"     (1, 20) (1, 27)
267    OP         '+'           (1, 28) (1, 29)
268    STRING     "BR'abc'"     (1, 30) (1, 37)
269    """)
270        self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\
271    STRING     'br"abc"'     (1, 0) (1, 7)
272    OP         '+'           (1, 8) (1, 9)
273    STRING     'bR"abc"'     (1, 10) (1, 17)
274    OP         '+'           (1, 18) (1, 19)
275    STRING     'Br"abc"'     (1, 20) (1, 27)
276    OP         '+'           (1, 28) (1, 29)
277    STRING     'BR"abc"'     (1, 30) (1, 37)
278    """)
279
280    def test_function(self):
281        self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\
282    NAME       'def'         (1, 0) (1, 3)
283    NAME       'd22'         (1, 4) (1, 7)
284    OP         '('           (1, 7) (1, 8)
285    NAME       'a'           (1, 8) (1, 9)
286    OP         ','           (1, 9) (1, 10)
287    NAME       'b'           (1, 11) (1, 12)
288    OP         ','           (1, 12) (1, 13)
289    NAME       'c'           (1, 14) (1, 15)
290    OP         '='           (1, 15) (1, 16)
291    NUMBER     '2'           (1, 16) (1, 17)
292    OP         ','           (1, 17) (1, 18)
293    NAME       'd'           (1, 19) (1, 20)
294    OP         '='           (1, 20) (1, 21)
295    NUMBER     '2'           (1, 21) (1, 22)
296    OP         ','           (1, 22) (1, 23)
297    OP         '*'           (1, 24) (1, 25)
298    NAME       'k'           (1, 25) (1, 26)
299    OP         ')'           (1, 26) (1, 27)
300    OP         ':'           (1, 27) (1, 28)
301    NAME       'pass'        (1, 29) (1, 33)
302    """)
303        self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\
304    NAME       'def'         (1, 0) (1, 3)
305    NAME       'd01v_'       (1, 4) (1, 9)
306    OP         '('           (1, 9) (1, 10)
307    NAME       'a'           (1, 10) (1, 11)
308    OP         '='           (1, 11) (1, 12)
309    NUMBER     '1'           (1, 12) (1, 13)
310    OP         ','           (1, 13) (1, 14)
311    OP         '*'           (1, 15) (1, 16)
312    NAME       'k'           (1, 16) (1, 17)
313    OP         ','           (1, 17) (1, 18)
314    OP         '**'          (1, 19) (1, 21)
315    NAME       'w'           (1, 21) (1, 22)
316    OP         ')'           (1, 22) (1, 23)
317    OP         ':'           (1, 23) (1, 24)
318    NAME       'pass'        (1, 25) (1, 29)
319    """)
320
321    def test_comparison(self):
322        # Comparison
323        self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " +
324                            "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\
325    NAME       'if'          (1, 0) (1, 2)
326    NUMBER     '1'           (1, 3) (1, 4)
327    OP         '<'           (1, 5) (1, 6)
328    NUMBER     '1'           (1, 7) (1, 8)
329    OP         '>'           (1, 9) (1, 10)
330    NUMBER     '1'           (1, 11) (1, 12)
331    OP         '=='          (1, 13) (1, 15)
332    NUMBER     '1'           (1, 16) (1, 17)
333    OP         '>='          (1, 18) (1, 20)
334    NUMBER     '5'           (1, 21) (1, 22)
335    OP         '<='          (1, 23) (1, 25)
336    NUMBER     '0x15'        (1, 26) (1, 30)
337    OP         '<='          (1, 31) (1, 33)
338    NUMBER     '0x12'        (1, 34) (1, 38)
339    OP         '!='          (1, 39) (1, 41)
340    NUMBER     '1'           (1, 42) (1, 43)
341    NAME       'and'         (1, 44) (1, 47)
342    NUMBER     '5'           (1, 48) (1, 49)
343    NAME       'in'          (1, 50) (1, 52)
344    NUMBER     '1'           (1, 53) (1, 54)
345    NAME       'not'         (1, 55) (1, 58)
346    NAME       'in'          (1, 59) (1, 61)
347    NUMBER     '1'           (1, 62) (1, 63)
348    NAME       'is'          (1, 64) (1, 66)
349    NUMBER     '1'           (1, 67) (1, 68)
350    NAME       'or'          (1, 69) (1, 71)
351    NUMBER     '5'           (1, 72) (1, 73)
352    NAME       'is'          (1, 74) (1, 76)
353    NAME       'not'         (1, 77) (1, 80)
354    NUMBER     '1'           (1, 81) (1, 82)
355    OP         ':'           (1, 82) (1, 83)
356    NAME       'pass'        (1, 84) (1, 88)
357    """)
358
359    def test_shift(self):
360        # Shift
361        self.check_tokenize("x = 1 << 1 >> 5", """\
362    NAME       'x'           (1, 0) (1, 1)
363    OP         '='           (1, 2) (1, 3)
364    NUMBER     '1'           (1, 4) (1, 5)
365    OP         '<<'          (1, 6) (1, 8)
366    NUMBER     '1'           (1, 9) (1, 10)
367    OP         '>>'          (1, 11) (1, 13)
368    NUMBER     '5'           (1, 14) (1, 15)
369    """)
370
371    def test_additive(self):
372        # Additive
373        self.check_tokenize("x = 1 - y + 15 - 01 + 0x124 + z + a[5]", """\
374    NAME       'x'           (1, 0) (1, 1)
375    OP         '='           (1, 2) (1, 3)
376    NUMBER     '1'           (1, 4) (1, 5)
377    OP         '-'           (1, 6) (1, 7)
378    NAME       'y'           (1, 8) (1, 9)
379    OP         '+'           (1, 10) (1, 11)
380    NUMBER     '15'          (1, 12) (1, 14)
381    OP         '-'           (1, 15) (1, 16)
382    NUMBER     '01'          (1, 17) (1, 19)
383    OP         '+'           (1, 20) (1, 21)
384    NUMBER     '0x124'       (1, 22) (1, 27)
385    OP         '+'           (1, 28) (1, 29)
386    NAME       'z'           (1, 30) (1, 31)
387    OP         '+'           (1, 32) (1, 33)
388    NAME       'a'           (1, 34) (1, 35)
389    OP         '['           (1, 35) (1, 36)
390    NUMBER     '5'           (1, 36) (1, 37)
391    OP         ']'           (1, 37) (1, 38)
392    """)
393
394    def test_multiplicative(self):
395        # Multiplicative
396        self.check_tokenize("x = 1//1*1/5*12%0x12", """\
397    NAME       'x'           (1, 0) (1, 1)
398    OP         '='           (1, 2) (1, 3)
399    NUMBER     '1'           (1, 4) (1, 5)
400    OP         '//'          (1, 5) (1, 7)
401    NUMBER     '1'           (1, 7) (1, 8)
402    OP         '*'           (1, 8) (1, 9)
403    NUMBER     '1'           (1, 9) (1, 10)
404    OP         '/'           (1, 10) (1, 11)
405    NUMBER     '5'           (1, 11) (1, 12)
406    OP         '*'           (1, 12) (1, 13)
407    NUMBER     '12'          (1, 13) (1, 15)
408    OP         '%'           (1, 15) (1, 16)
409    NUMBER     '0x12'        (1, 16) (1, 20)
410    """)
411
412    def test_unary(self):
413        # Unary
414        self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\
415    OP         '~'           (1, 0) (1, 1)
416    NUMBER     '1'           (1, 1) (1, 2)
417    OP         '^'           (1, 3) (1, 4)
418    NUMBER     '1'           (1, 5) (1, 6)
419    OP         '&'           (1, 7) (1, 8)
420    NUMBER     '1'           (1, 9) (1, 10)
421    OP         '|'           (1, 11) (1, 12)
422    NUMBER     '1'           (1, 12) (1, 13)
423    OP         '^'           (1, 14) (1, 15)
424    OP         '-'           (1, 16) (1, 17)
425    NUMBER     '1'           (1, 17) (1, 18)
426    """)
427        self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\
428    OP         '-'           (1, 0) (1, 1)
429    NUMBER     '1'           (1, 1) (1, 2)
430    OP         '*'           (1, 2) (1, 3)
431    NUMBER     '1'           (1, 3) (1, 4)
432    OP         '/'           (1, 4) (1, 5)
433    NUMBER     '1'           (1, 5) (1, 6)
434    OP         '+'           (1, 6) (1, 7)
435    NUMBER     '1'           (1, 7) (1, 8)
436    OP         '*'           (1, 8) (1, 9)
437    NUMBER     '1'           (1, 9) (1, 10)
438    OP         '//'          (1, 10) (1, 12)
439    NUMBER     '1'           (1, 12) (1, 13)
440    OP         '-'           (1, 14) (1, 15)
441    OP         '-'           (1, 16) (1, 17)
442    OP         '-'           (1, 17) (1, 18)
443    OP         '-'           (1, 18) (1, 19)
444    NUMBER     '1'           (1, 19) (1, 20)
445    OP         '**'          (1, 20) (1, 22)
446    NUMBER     '1'           (1, 22) (1, 23)
447    """)
448
449    def test_selector(self):
450        # Selector
451        self.check_tokenize("import sys, time\n"
452                            "x = sys.modules['time'].time()", """\
453    NAME       'import'      (1, 0) (1, 6)
454    NAME       'sys'         (1, 7) (1, 10)
455    OP         ','           (1, 10) (1, 11)
456    NAME       'time'        (1, 12) (1, 16)
457    NEWLINE    '\\n'          (1, 16) (1, 17)
458    NAME       'x'           (2, 0) (2, 1)
459    OP         '='           (2, 2) (2, 3)
460    NAME       'sys'         (2, 4) (2, 7)
461    OP         '.'           (2, 7) (2, 8)
462    NAME       'modules'     (2, 8) (2, 15)
463    OP         '['           (2, 15) (2, 16)
464    STRING     "'time'"      (2, 16) (2, 22)
465    OP         ']'           (2, 22) (2, 23)
466    OP         '.'           (2, 23) (2, 24)
467    NAME       'time'        (2, 24) (2, 28)
468    OP         '('           (2, 28) (2, 29)
469    OP         ')'           (2, 29) (2, 30)
470    """)
471
472    def test_method(self):
473        # Methods
474        self.check_tokenize("@staticmethod\n"
475                            "def foo(x,y): pass", """\
476    OP         '@'           (1, 0) (1, 1)
477    NAME       'staticmethod (1, 1) (1, 13)
478    NEWLINE    '\\n'          (1, 13) (1, 14)
479    NAME       'def'         (2, 0) (2, 3)
480    NAME       'foo'         (2, 4) (2, 7)
481    OP         '('           (2, 7) (2, 8)
482    NAME       'x'           (2, 8) (2, 9)
483    OP         ','           (2, 9) (2, 10)
484    NAME       'y'           (2, 10) (2, 11)
485    OP         ')'           (2, 11) (2, 12)
486    OP         ':'           (2, 12) (2, 13)
487    NAME       'pass'        (2, 14) (2, 18)
488    """)
489
490    def test_tabs(self):
491        # Evil tabs
492        self.check_tokenize("def f():\n"
493                            "\tif x\n"
494                            "        \tpass", """\
495    NAME       'def'         (1, 0) (1, 3)
496    NAME       'f'           (1, 4) (1, 5)
497    OP         '('           (1, 5) (1, 6)
498    OP         ')'           (1, 6) (1, 7)
499    OP         ':'           (1, 7) (1, 8)
500    NEWLINE    '\\n'          (1, 8) (1, 9)
501    INDENT     '\\t'          (2, 0) (2, 1)
502    NAME       'if'          (2, 1) (2, 3)
503    NAME       'x'           (2, 4) (2, 5)
504    NEWLINE    '\\n'          (2, 5) (2, 6)
505    INDENT     '        \\t'  (3, 0) (3, 9)
506    NAME       'pass'        (3, 9) (3, 13)
507    DEDENT     ''            (4, 0) (4, 0)
508    DEDENT     ''            (4, 0) (4, 0)
509    """)
510
511    def test_pathological_trailing_whitespace(self):
512        # Pathological whitespace (http://bugs.python.org/issue16152)
513        self.check_tokenize("@          ", """\
514    OP         '@'           (1, 0) (1, 1)
515    """)
516
517
518def decistmt(s):
519    result = []
520    g = generate_tokens(StringIO(s).readline)   # tokenize the string
521    for toknum, tokval, _, _, _  in g:
522        if toknum == NUMBER and '.' in tokval:  # replace NUMBER tokens
523            result.extend([
524                (NAME, 'Decimal'),
525                (OP, '('),
526                (STRING, repr(tokval)),
527                (OP, ')')
528            ])
529        else:
530            result.append((toknum, tokval))
531    return untokenize(result)
532
533class TestMisc(TestCase):
534
535    def test_decistmt(self):
536        # Substitute Decimals for floats in a string of statements.
537        # This is an example from the docs.
538
539        from decimal import Decimal
540        s = '+21.3e-5*-.1234/81.7'
541        self.assertEqual(decistmt(s),
542                         "+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')")
543
544        # The format of the exponent is inherited from the platform C library.
545        # Known cases are "e-007" (Windows) and "e-07" (not Windows).  Since
546        # we're only showing 12 digits, and the 13th isn't close to 5, the
547        # rest of the output should be platform-independent.
548        self.assertRegexpMatches(str(eval(s)), '-3.21716034272e-0+7')
549
550        # Output from calculations with Decimal should be identical across all
551        # platforms.
552        self.assertEqual(eval(decistmt(s)), Decimal('-3.217160342717258261933904529E-7'))
553
554
555class UntokenizeTest(TestCase):
556
557    def test_bad_input_order(self):
558        # raise if previous row
559        u = Untokenizer()
560        u.prev_row = 2
561        u.prev_col = 2
562        with self.assertRaises(ValueError) as cm:
563            u.add_whitespace((1,3))
564        self.assertEqual(cm.exception.args[0],
565                'start (1,3) precedes previous end (2,2)')
566        # raise if previous column in row
567        self.assertRaises(ValueError, u.add_whitespace, (2,1))
568
569    def test_backslash_continuation(self):
570        # The problem is that <whitespace>\<newline> leaves no token
571        u = Untokenizer()
572        u.prev_row = 1
573        u.prev_col =  1
574        u.tokens = []
575        u.add_whitespace((2, 0))
576        self.assertEqual(u.tokens, ['\\\n'])
577        u.prev_row = 2
578        u.add_whitespace((4, 4))
579        self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', '    '])
580
581    def test_iter_compat(self):
582        u = Untokenizer()
583        token = (NAME, 'Hello')
584        u.compat(token, iter([]))
585        self.assertEqual(u.tokens, ["Hello "])
586        u = Untokenizer()
587        self.assertEqual(u.untokenize(iter([token])), 'Hello ')
588
589
590class TestRoundtrip(TestCase):
591
592    def check_roundtrip(self, f):
593        """
594        Test roundtrip for `untokenize`. `f` is an open file or a string.
595        The source code in f is tokenized, converted back to source code
596        via tokenize.untokenize(), and tokenized again from the latter.
597        The test fails if the second tokenization doesn't match the first.
598        """
599        if isinstance(f, str): f = StringIO(f)
600        token_list = list(generate_tokens(f.readline))
601        f.close()
602        tokens1 = [tok[:2] for tok in token_list]
603        new_text = untokenize(tokens1)
604        readline = iter(new_text.splitlines(1)).next
605        tokens2 = [tok[:2] for tok in generate_tokens(readline)]
606        self.assertEqual(tokens2, tokens1)
607
608    def test_roundtrip(self):
609        # There are some standard formatting practices that are easy to get right.
610
611        self.check_roundtrip("if x == 1:\n"
612                             "    print(x)\n")
613
614        # There are some standard formatting practices that are easy to get right.
615
616        self.check_roundtrip("if x == 1:\n"
617                             "    print x\n")
618        self.check_roundtrip("# This is a comment\n"
619                             "# This also")
620
621        # Some people use different formatting conventions, which makes
622        # untokenize a little trickier. Note that this test involves trailing
623        # whitespace after the colon. Note that we use hex escapes to make the
624        # two trailing blanks apperant in the expected output.
625
626        self.check_roundtrip("if x == 1 : \n"
627                             "  print x\n")
628        fn = test_support.findfile("tokenize_tests" + os.extsep + "txt")
629        with open(fn) as f:
630            self.check_roundtrip(f)
631        self.check_roundtrip("if x == 1:\n"
632                             "    # A comment by itself.\n"
633                             "    print x # Comment here, too.\n"
634                             "    # Another comment.\n"
635                             "after_if = True\n")
636        self.check_roundtrip("if (x # The comments need to go in the right place\n"
637                             "    == 1):\n"
638                             "    print 'x==1'\n")
639        self.check_roundtrip("class Test: # A comment here\n"
640                             "  # A comment with weird indent\n"
641                             "  after_com = 5\n"
642                             "  def x(m): return m*5 # a one liner\n"
643                             "  def y(m): # A whitespace after the colon\n"
644                             "     return y*4 # 3-space indent\n")
645
646        # Some error-handling code
647
648        self.check_roundtrip("try: import somemodule\n"
649                             "except ImportError: # comment\n"
650                             "    print 'Can not import' # comment2\n"
651                             "else:   print 'Loaded'\n")
652
653    def test_continuation(self):
654        # Balancing continuation
655        self.check_roundtrip("a = (3,4, \n"
656                             "5,6)\n"
657                             "y = [3, 4,\n"
658                             "5]\n"
659                             "z = {'a': 5,\n"
660                             "'b':15, 'c':True}\n"
661                             "x = len(y) + 5 - a[\n"
662                             "3] - a[2]\n"
663                             "+ len(z) - z[\n"
664                             "'b']\n")
665
666    def test_backslash_continuation(self):
667        # Backslash means line continuation, except for comments
668        self.check_roundtrip("x=1+\\\n"
669                             "1\n"
670                             "# This is a comment\\\n"
671                             "# This also\n")
672        self.check_roundtrip("# Comment \\\n"
673                             "x = 0")
674
675    def test_string_concatenation(self):
676        # Two string literals on the same line
677        self.check_roundtrip("'' ''")
678
679    def test_random_files(self):
680        # Test roundtrip on random python modules.
681        # pass the '-ucpu' option to process the full directory.
682
683        import glob, random
684        fn = test_support.findfile("tokenize_tests" + os.extsep + "txt")
685        tempdir = os.path.dirname(fn) or os.curdir
686        testfiles = glob.glob(os.path.join(tempdir, "test*.py"))
687
688        if not test_support.is_resource_enabled("cpu"):
689            testfiles = random.sample(testfiles, 10)
690
691        for testfile in testfiles:
692            try:
693                with open(testfile, 'rb') as f:
694                    self.check_roundtrip(f)
695            except:
696                print "Roundtrip failed for file %s" % testfile
697                raise
698
699
700    def roundtrip(self, code):
701        if isinstance(code, str):
702            code = code.encode('utf-8')
703        tokens = generate_tokens(StringIO(code).readline)
704        return untokenize(tokens).decode('utf-8')
705
706    def test_indentation_semantics_retained(self):
707        """
708        Ensure that although whitespace might be mutated in a roundtrip,
709        the semantic meaning of the indentation remains consistent.
710        """
711        code = "if False:\n\tx=3\n\tx=3\n"
712        codelines = self.roundtrip(code).split('\n')
713        self.assertEqual(codelines[1], codelines[2])
714
715
716def test_main():
717    test_support.run_unittest(TokenizeTest)
718    test_support.run_unittest(UntokenizeTest)
719    test_support.run_unittest(TestRoundtrip)
720    test_support.run_unittest(TestMisc)
721
722if __name__ == "__main__":
723    test_main()
724