1# -*- coding: iso-8859-1 -*-
2# mode: run
3# tag: warnings
4
5
6cimport cython
7
8cdef Py_UCS4 char_ASCII = u'A'
9cdef Py_UCS4 char_KLINGON = u'\uF8D2'
10
11u_A = char_ASCII
12u_KLINGON = char_KLINGON
13
14
15def compare_ASCII():
16    """
17    >>> compare_ASCII()
18    True
19    False
20    False
21    """
22    print(char_ASCII == u'A')
23    print(char_ASCII == u'B')
24    print(char_ASCII == u'\uF8D2')
25
26
27def compare_klingon():
28    """
29    >>> compare_klingon()
30    True
31    False
32    False
33    """
34    print(char_KLINGON == u'\uF8D2')
35    print(char_KLINGON == u'A')
36    print(char_KLINGON == u'B')
37
38
39def single_uchar_compare():
40    """
41    >>> single_uchar_compare()
42    """
43    assert u'\u0100' < u'\u0101'
44    assert u'\u0101' > u'\u0100'
45
46
47from cpython.unicode cimport PyUnicode_FromOrdinal
48import sys
49
50u0 = u'\x00'
51u1 = u'\x01'
52umax = PyUnicode_FromOrdinal(sys.maxunicode)
53
54def unicode_ordinal(Py_UCS4 i):
55    """
56    >>> ord(unicode_ordinal(0)) == 0
57    True
58    >>> ord(unicode_ordinal(1)) == 1
59    True
60    >>> ord(unicode_ordinal(sys.maxunicode)) == sys.maxunicode
61    True
62
63    >>> ord(unicode_ordinal(u0)) == 0
64    True
65    >>> ord(unicode_ordinal(u1)) == 1
66    True
67    >>> ord(unicode_ordinal(umax)) == sys.maxunicode
68    True
69
70    Value too small:
71    >>> unicode_ordinal(-1) #doctest: +ELLIPSIS
72    Traceback (most recent call last):
73    ...
74    OverflowError: ...
75
76    Value too large:
77    >>> unicode_ordinal(1114111+1) #doctest: +ELLIPSIS
78    Traceback (most recent call last):
79    ...
80    OverflowError: ...
81
82    Less than one character:
83    >>> unicode_ordinal(u0[:0])
84    Traceback (most recent call last):
85    ...
86    ValueError: only single character unicode strings can be converted to Py_UCS4, got length 0
87
88    More than one character:
89    >>> unicode_ordinal(u0+u1)
90    Traceback (most recent call last):
91    ...
92    ValueError: only single character unicode strings can be converted to Py_UCS4, got length 2
93    """
94    return i
95
96
97def ord_py_ucs4(Py_UCS4 x):
98    """
99    >>> ord_py_ucs4(u0)
100    0
101    >>> ord_py_ucs4(u_A)
102    65
103    >>> ord_py_ucs4(u_KLINGON)
104    63698
105    """
106    return ord(x)
107
108
109@cython.test_assert_path_exists('//PythonCapiCallNode')
110@cython.test_fail_if_path_exists('//SimpleCallNode')
111def unicode_type_methods(Py_UCS4 uchar):
112    """
113    >>> unicode_type_methods(ord('A'))
114    [True, True, False, False, False, False, False, True, True]
115    >>> unicode_type_methods(ord('a'))
116    [True, True, False, False, True, False, False, False, False]
117    >>> unicode_type_methods(ord('8'))
118    [True, False, True, True, False, True, False, False, False]
119    >>> unicode_type_methods(ord('\\t'))
120    [False, False, False, False, False, False, True, False, False]
121    """
122    return [
123        # character types
124        uchar.isalnum(),
125        uchar.isalpha(),
126        uchar.isdecimal(),
127        uchar.isdigit(),
128        uchar.islower(),
129        uchar.isnumeric(),
130        uchar.isspace(),
131        uchar.istitle(),
132        uchar.isupper(),
133        ]
134
135@cython.test_assert_path_exists('//PythonCapiCallNode')
136@cython.test_fail_if_path_exists('//SimpleCallNode')
137def unicode_methods(Py_UCS4 uchar):
138    """
139    >>> unicode_methods(ord('A')) == ['a', 'A', 'A']
140    True
141    >>> unicode_methods(ord('a')) == ['a', 'A', 'A']
142    True
143    """
144    return [
145        # character conversion
146        uchar.lower(),
147        uchar.upper(),
148        uchar.title(),
149        ]
150
151
152@cython.test_assert_path_exists('//PythonCapiCallNode')
153@cython.test_fail_if_path_exists(
154    '//SimpleCallNode',
155    '//CoerceFromPyTypeNode',
156)
157def unicode_method_return_type(Py_UCS4 uchar):
158    """
159    >>> unicode_method_return_type(ord('A'))
160    [True, False]
161    >>> unicode_method_return_type(ord('a'))
162    [False, True]
163    """
164    cdef Py_UCS4 uc, ul
165    uc, ul = uchar.upper(), uchar.lower()
166    return [uc == uchar, ul == uchar]
167
168
169@cython.test_assert_path_exists('//IntNode')
170@cython.test_fail_if_path_exists('//SimpleCallNode',
171                                 '//PythonCapiCallNode')
172def len_uchar(Py_UCS4 uchar):
173    """
174    >>> len_uchar(ord('A'))
175    1
176    """
177    return len(uchar)
178
179def index_uchar(Py_UCS4 uchar, Py_ssize_t i):
180    """
181    >>> index_uchar(ord('A'), 0) == ('A', 'A', 'A')
182    True
183    >>> index_uchar(ord('A'), -1) == ('A', 'A', 'A')
184    True
185    >>> index_uchar(ord('A'), 1)
186    Traceback (most recent call last):
187    IndexError: string index out of range
188    """
189    return uchar[0], uchar[-1], uchar[i]
190
191mixed_ustring = u'AbcDefGhIjKlmnoP'
192lower_ustring = mixed_ustring.lower()
193upper_ustring = mixed_ustring.lower()
194
195@cython.test_assert_path_exists('//PythonCapiCallNode',
196                                '//ForFromStatNode')
197@cython.test_fail_if_path_exists('//SimpleCallNode',
198                                 '//ForInStatNode')
199def count_lower_case_characters(unicode ustring):
200    """
201    >>> count_lower_case_characters(mixed_ustring)
202    10
203    >>> count_lower_case_characters(lower_ustring)
204    16
205    """
206    cdef Py_ssize_t count = 0
207    for uchar in ustring:
208         if uchar.islower():
209             count += 1
210    return count
211
212@cython.test_assert_path_exists('//PythonCapiCallNode',
213                                '//ForFromStatNode')
214@cython.test_fail_if_path_exists('//SimpleCallNode',
215                                 '//ForInStatNode')
216def count_lower_case_characters_slice(unicode ustring):
217    """
218    >>> count_lower_case_characters_slice(mixed_ustring)
219    10
220    >>> count_lower_case_characters_slice(lower_ustring)
221    14
222    >>> sum([ 1 for uchar in lower_ustring[1:-1] if uchar.islower() ])
223    14
224    """
225    cdef Py_ssize_t count = 0
226    for uchar in ustring[1:-1]:
227         if uchar.islower():
228             count += 1
229    return count
230
231@cython.test_assert_path_exists('//PythonCapiCallNode',
232                                '//ForFromStatNode')
233@cython.test_fail_if_path_exists('//SimpleCallNode',
234                                 '//ForInStatNode')
235def count_lower_case_characters_slice_reversed(unicode ustring):
236    """
237    >>> count_lower_case_characters_slice_reversed(mixed_ustring)
238    10
239    >>> count_lower_case_characters_slice_reversed(lower_ustring)
240    14
241    >>> sum([ 1 for uchar in lower_ustring[-2:0:-1] if uchar.islower() ])
242    14
243    """
244    cdef Py_ssize_t count = 0
245    for uchar in ustring[-2:0:-1]:
246         if uchar.islower():
247             count += 1
248    return count
249
250def loop_object_over_latin1_unicode_literal():
251    """
252    >>> result = loop_object_over_latin1_unicode_literal()
253    >>> print(result[:-1])
254    abcdefg
255    >>> ord(result[-1]) == 0xD7
256    True
257    """
258    cdef object uchar
259    chars = []
260    for uchar in u'abcdefg\xD7':
261        chars.append(uchar)
262    return u''.join(chars)
263
264def loop_object_over_unicode_literal():
265    """
266    >>> result = loop_object_over_unicode_literal()
267    >>> print(result[:-1])
268    abcdefg
269    >>> ord(result[-1]) == 0xF8FD
270    True
271    """
272    cdef object uchar
273    chars = []
274    for uchar in u'abcdefg\uF8FD':
275        chars.append(uchar)
276    return u''.join(chars)
277
278@cython.test_assert_path_exists('//SwitchStatNode')
279@cython.test_fail_if_path_exists('//ForInStatNode')
280def iter_and_in():
281    """
282    >>> iter_and_in()
283    a
284    b
285    e
286    f
287    h
288    """
289    for c in u'abcdefgh':
290        if c in u'abCDefGh':
291            print c
292
293
294@cython.test_fail_if_path_exists('//ForInStatNode')
295def iter_inferred():
296    """
297    >>> iter_inferred()
298    a
299    b
300    c
301    d
302    e
303    """
304    uchars = list(u"abcde")
305    uchars = u''.join(uchars)
306    for c in uchars:
307        print c
308
309
310@cython.test_assert_path_exists('//SwitchStatNode',
311                                '//ForFromStatNode')
312@cython.test_fail_if_path_exists('//ForInStatNode')
313def index_and_in():
314    """
315    >>> index_and_in()
316    1
317    3
318    4
319    7
320    8
321    """
322    cdef int i
323    for i in range(1,9):
324        if u'abcdefgh'[-i] in u'abCDefGh':
325            print i
326
327# special test for narrow builds
328
329high_uchar = u'\U00012345'
330high_ustring0 = u'\U00012345\U00012346abc'
331high_ustring1 = u'\U00012346\U00012345abc'
332high_ustring_end = u'\U00012346abc\U00012344\U00012345'
333high_ustring_no = u'\U00012346\U00012346abc'
334
335def uchar_in(Py_UCS4 uchar, unicode ustring):
336    """
337    >>> uchar_in(high_uchar, high_ustring0)
338    True
339    >>> uchar_in(high_uchar, high_ustring1)
340    True
341    >>> uchar_in(high_uchar, high_ustring_end)
342    True
343    >>> uchar_in(high_uchar, high_ustring_no)
344    False
345    """
346    assert uchar == 0x12345, ('%X' % uchar)
347    return uchar in ustring
348
349
350def uchar_lookup_in_dict(obj, Py_UCS4 uchar):
351    """
352    >>> d = {u_KLINGON: 1234, u0: 0, u1: 1, u_A: 2}
353    >>> uchar_lookup_in_dict(d, u_KLINGON)
354    (1234, 1234)
355    >>> uchar_lookup_in_dict(d, u_A)
356    (2, 2)
357    >>> uchar_lookup_in_dict(d, u0)
358    (0, 0)
359    >>> uchar_lookup_in_dict(d, u1)
360    (1, 1)
361    """
362    cdef dict d = obj
363    dval = d[uchar]
364    objval = obj[uchar]
365    return dval, objval
366
367
368_WARNINGS = """
369364:16: Item lookup of unicode character codes now always converts to a Unicode string. Use an explicit C integer cast to get back the previous integer lookup behaviour.
370"""
371