1# -*- coding: iso-8859-1 -*-
2# mode: run
3# tag: warnings
4
5
6cimport cython
7
8cdef Py_UCS4 char_ASCII = u'A'
9cdef Py_UCS4 char_KLINGON = u'\uF8D2'
10
11u_A = char_ASCII
12u_KLINGON = char_KLINGON
13
14
15def compare_ASCII():
16    """
17    >>> compare_ASCII()
18    True
19    False
20    False
21    """
22    print(char_ASCII == u'A')
23    print(char_ASCII == u'B')
24    print(char_ASCII == u'\uF8D2')
25
26
27def compare_klingon():
28    """
29    >>> compare_klingon()
30    True
31    False
32    False
33    """
34    print(char_KLINGON == u'\uF8D2')
35    print(char_KLINGON == u'A')
36    print(char_KLINGON == u'B')
37
38
39def single_uchar_compare():
40    """
41    >>> single_uchar_compare()
42    """
43    assert u'\u0100' < u'\u0101'
44    assert u'\u0101' > u'\u0100'
45
46
47from cpython.unicode cimport PyUnicode_FromOrdinal
48import sys
49
50u0 = u'\x00'
51u1 = u'\x01'
52umax = PyUnicode_FromOrdinal(sys.maxunicode)
53
54def unicode_ordinal(Py_UCS4 i):
55    """
56    >>> ord(unicode_ordinal(0)) == 0
57    True
58    >>> ord(unicode_ordinal(1)) == 1
59    True
60    >>> ord(unicode_ordinal(sys.maxunicode)) == sys.maxunicode
61    True
62
63    >>> ord(unicode_ordinal(u0)) == 0
64    True
65    >>> ord(unicode_ordinal(u1)) == 1
66    True
67    >>> ord(unicode_ordinal(umax)) == sys.maxunicode
68    True
69
70    Value too small:
71    >>> unicode_ordinal(-1) #doctest: +ELLIPSIS
72    Traceback (most recent call last):
73    ...
74    OverflowError: ...
75
76    Value too large:
77    >>> unicode_ordinal(1114111+1) #doctest: +ELLIPSIS
78    Traceback (most recent call last):
79    ...
80    OverflowError: ...
81
82    Less than one character:
83    >>> unicode_ordinal(u0[:0])
84    Traceback (most recent call last):
85    ...
86    ValueError: only single character unicode strings can be converted to Py_UCS4, got length 0
87
88    More than one character:
89    >>> unicode_ordinal(u0+u1)
90    Traceback (most recent call last):
91    ...
92    ValueError: only single character unicode strings can be converted to Py_UCS4, got length 2
93    """
94    return i
95
96
97def ord_py_ucs4(Py_UCS4 x):
98    """
99    >>> ord_py_ucs4(u0)
100    0
101    >>> ord_py_ucs4(u_A)
102    65
103    >>> ord_py_ucs4(u_KLINGON)
104    63698
105    """
106    return ord(x)
107
108
109@cython.test_assert_path_exists('//PythonCapiCallNode')
110@cython.test_fail_if_path_exists('//SimpleCallNode')
111def unicode_type_methods(Py_UCS4 uchar):
112    """
113    >>> unicode_type_methods(ord('A'))
114    [True, True, False, False, False, False, False, True, True]
115    >>> unicode_type_methods(ord('a'))
116    [True, True, False, False, True, False, False, False, False]
117    >>> unicode_type_methods(ord('8'))
118    [True, False, True, True, False, True, False, False, False]
119    >>> unicode_type_methods(ord('\\t'))
120    [False, False, False, False, False, False, True, False, False]
121    """
122    return [
123        # character types
124        uchar.isalnum(),
125        uchar.isalpha(),
126        uchar.isdecimal(),
127        uchar.isdigit(),
128        uchar.islower(),
129        uchar.isnumeric(),
130        uchar.isspace(),
131        uchar.istitle(),
132        uchar.isupper(),
133        ]
134
135#@cython.test_assert_path_exists('//PythonCapiCallNode')
136#@cython.test_fail_if_path_exists('//SimpleCallNode')
137def unicode_methods(Py_UCS4 uchar):
138    """
139    >>> unicode_methods(ord('A')) == ['a', 'A', 'A'] or unicode_methods(ord('A'))
140    True
141    >>> unicode_methods(ord('a')) == ['a', 'A', 'A'] or unicode_methods(ord('a'))
142    True
143    >>> unicode_methods(0x1E9E) == [u'\\xdf', u'\\u1e9e', u'\\u1e9e'] or unicode_methods(0x1E9E)
144    True
145    >>> unicode_methods(0x0130) in (
146    ...     [u'i\\u0307', u'\\u0130', u'\\u0130'],  # Py3
147    ...     [u'i', u'\\u0130', u'\\u0130'],  # Py2
148    ... ) or unicode_methods(0x0130)
149    True
150    """
151    # \u1E9E == 'LATIN CAPITAL LETTER SHARP S'
152    # \u0130 == 'LATIN CAPITAL LETTER I WITH DOT ABOVE'
153    return [
154        # character conversion
155        uchar.lower(),
156        uchar.upper(),
157        uchar.title(),
158        ]
159
160
161#@cython.test_assert_path_exists('//PythonCapiCallNode')
162#@cython.test_fail_if_path_exists(
163#    '//SimpleCallNode',
164#    '//CoerceFromPyTypeNode',
165#)
166def unicode_method_return_type(Py_UCS4 uchar):
167    """
168    >>> unicode_method_return_type(ord('A'))
169    [True, False]
170    >>> unicode_method_return_type(ord('a'))
171    [False, True]
172    """
173    cdef Py_UCS4 uc, ul
174    uc, ul = uchar.upper(), uchar.lower()
175    return [uc == uchar, ul == uchar]
176
177
178@cython.test_assert_path_exists('//IntNode')
179@cython.test_fail_if_path_exists('//SimpleCallNode',
180                                 '//PythonCapiCallNode')
181def len_uchar(Py_UCS4 uchar):
182    """
183    >>> len_uchar(ord('A'))
184    1
185    """
186    return len(uchar)
187
188def index_uchar(Py_UCS4 uchar, Py_ssize_t i):
189    """
190    >>> index_uchar(ord('A'), 0) == ('A', 'A', 'A')
191    True
192    >>> index_uchar(ord('A'), -1) == ('A', 'A', 'A')
193    True
194    >>> index_uchar(ord('A'), 1)
195    Traceback (most recent call last):
196    IndexError: string index out of range
197    """
198    return uchar[0], uchar[-1], uchar[i]
199
200mixed_ustring = u'AbcDefGhIjKlmnoP'
201lower_ustring = mixed_ustring.lower()
202upper_ustring = mixed_ustring.lower()
203
204@cython.test_assert_path_exists('//PythonCapiCallNode',
205                                '//ForFromStatNode')
206@cython.test_fail_if_path_exists('//SimpleCallNode',
207                                 '//ForInStatNode')
208def count_lower_case_characters(unicode ustring):
209    """
210    >>> count_lower_case_characters(mixed_ustring)
211    10
212    >>> count_lower_case_characters(lower_ustring)
213    16
214    """
215    cdef Py_ssize_t count = 0
216    for uchar in ustring:
217         if uchar.islower():
218             count += 1
219    return count
220
221@cython.test_assert_path_exists('//PythonCapiCallNode',
222                                '//ForFromStatNode')
223@cython.test_fail_if_path_exists('//SimpleCallNode',
224                                 '//ForInStatNode')
225def count_lower_case_characters_slice(unicode ustring):
226    """
227    >>> count_lower_case_characters_slice(mixed_ustring)
228    10
229    >>> count_lower_case_characters_slice(lower_ustring)
230    14
231    >>> sum([ 1 for uchar in lower_ustring[1:-1] if uchar.islower() ])
232    14
233    """
234    cdef Py_ssize_t count = 0
235    for uchar in ustring[1:-1]:
236         if uchar.islower():
237             count += 1
238    return count
239
240@cython.test_assert_path_exists('//PythonCapiCallNode',
241                                '//ForFromStatNode')
242@cython.test_fail_if_path_exists('//SimpleCallNode',
243                                 '//ForInStatNode')
244def count_lower_case_characters_slice_reversed(unicode ustring):
245    """
246    >>> count_lower_case_characters_slice_reversed(mixed_ustring)
247    10
248    >>> count_lower_case_characters_slice_reversed(lower_ustring)
249    14
250    >>> sum([ 1 for uchar in lower_ustring[-2:0:-1] if uchar.islower() ])
251    14
252    """
253    cdef Py_ssize_t count = 0
254    for uchar in ustring[-2:0:-1]:
255         if uchar.islower():
256             count += 1
257    return count
258
259def loop_object_over_latin1_unicode_literal():
260    """
261    >>> result = loop_object_over_latin1_unicode_literal()
262    >>> print(result[:-1])
263    abcdefg
264    >>> ord(result[-1]) == 0xD7
265    True
266    """
267    cdef object uchar
268    chars = []
269    for uchar in u'abcdefg\xD7':
270        chars.append(uchar)
271    return u''.join(chars)
272
273def loop_object_over_unicode_literal():
274    """
275    >>> result = loop_object_over_unicode_literal()
276    >>> print(result[:-1])
277    abcdefg
278    >>> ord(result[-1]) == 0xF8FD
279    True
280    """
281    cdef object uchar
282    chars = []
283    for uchar in u'abcdefg\uF8FD':
284        chars.append(uchar)
285    return u''.join(chars)
286
287@cython.test_assert_path_exists('//SwitchStatNode')
288@cython.test_fail_if_path_exists('//ForInStatNode')
289def iter_and_in():
290    """
291    >>> iter_and_in()
292    a
293    b
294    e
295    f
296    h
297    """
298    for c in u'abcdefgh':
299        if c in u'abCDefGh':
300            print c
301
302
303@cython.test_fail_if_path_exists('//ForInStatNode')
304def iter_inferred():
305    """
306    >>> iter_inferred()
307    a
308    b
309    c
310    d
311    e
312    """
313    uchars = list(u"abcde")
314    uchars = u''.join(uchars)
315    for c in uchars:
316        print c
317
318
319@cython.test_assert_path_exists('//SwitchStatNode',
320                                '//ForFromStatNode')
321@cython.test_fail_if_path_exists('//ForInStatNode')
322def index_and_in():
323    """
324    >>> index_and_in()
325    1
326    3
327    4
328    7
329    8
330    """
331    cdef int i
332    for i in range(1,9):
333        if u'abcdefgh'[-i] in u'abCDefGh':
334            print i
335
336# special test for narrow builds
337
338high_uchar = u'\U00012345'
339high_ustring0 = u'\U00012345\U00012346abc'
340high_ustring1 = u'\U00012346\U00012345abc'
341high_ustring_end = u'\U00012346abc\U00012344\U00012345'
342high_ustring_no = u'\U00012346\U00012346abc'
343
344def uchar_in(Py_UCS4 uchar, unicode ustring):
345    """
346    >>> uchar_in(high_uchar, high_ustring0)
347    True
348    >>> uchar_in(high_uchar, high_ustring1)
349    True
350    >>> uchar_in(high_uchar, high_ustring_end)
351    True
352    >>> uchar_in(high_uchar, high_ustring_no)
353    False
354    """
355    assert uchar == 0x12345, ('%X' % uchar)
356    return uchar in ustring
357
358
359def uchar_lookup_in_dict(obj, Py_UCS4 uchar):
360    """
361    >>> d = {u_KLINGON: 1234, u0: 0, u1: 1, u_A: 2}
362    >>> uchar_lookup_in_dict(d, u_KLINGON)
363    (1234, 1234)
364    >>> uchar_lookup_in_dict(d, u_A)
365    (2, 2)
366    >>> uchar_lookup_in_dict(d, u0)
367    (0, 0)
368    >>> uchar_lookup_in_dict(d, u1)
369    (1, 1)
370    """
371    cdef dict d = obj
372    dval = d[uchar]
373    objval = obj[uchar]
374    return dval, objval
375
376
377_WARNINGS = """
378373:16: Item lookup of unicode character codes now always converts to a Unicode string. Use an explicit C integer cast to get back the previous integer lookup behaviour.
379"""
380