1# -*- coding: iso-8859-1 -*-
2# mode: run
3# tag: warnings
4
5cimport cython
6
7cdef Py_UNICODE char_ASCII = u'A'
8cdef Py_UNICODE char_KLINGON = u'\uF8D2'
9
10u_A = char_ASCII
11u_KLINGON = char_KLINGON
12
13
14def compare_ASCII():
15    """
16    >>> compare_ASCII()
17    True
18    False
19    False
20    """
21    print(char_ASCII == u'A')
22    print(char_ASCII == u'B')
23    print(char_ASCII == u'\uF8D2')
24
25
26def compare_klingon():
27    """
28    >>> compare_klingon()
29    True
30    False
31    False
32    """
33    print(char_KLINGON == u'\uF8D2')
34    print(char_KLINGON == u'A')
35    print(char_KLINGON == u'B')
36
37
38from cpython.unicode cimport PyUnicode_FromOrdinal
39import sys
40
41u0 = u'\x00'
42u1 = u'\x01'
43umax = PyUnicode_FromOrdinal(sys.maxunicode)
44
45def unicode_ordinal(Py_UNICODE i):
46    """
47    >>> ord(unicode_ordinal(0)) == 0
48    True
49    >>> ord(unicode_ordinal(1)) == 1
50    True
51    >>> ord(unicode_ordinal(sys.maxunicode)) == sys.maxunicode
52    True
53
54    >>> ord(unicode_ordinal(u0)) == 0
55    True
56    >>> ord(unicode_ordinal(u1)) == 1
57    True
58    >>> ord(unicode_ordinal(umax)) == sys.maxunicode
59    True
60
61    Value too small:
62    >>> unicode_ordinal(-1) #doctest: +ELLIPSIS
63    Traceback (most recent call last):
64    ...
65    OverflowError: ...
66
67    Value too large:
68    >>> unicode_ordinal(sys.maxunicode+1) #doctest: +ELLIPSIS
69    Traceback (most recent call last):
70    ...
71    OverflowError: ...
72
73    Less than one character:
74    >>> unicode_ordinal(u0[:0])
75    Traceback (most recent call last):
76    ...
77    ValueError: only single character unicode strings can be converted to Py_UNICODE, got length 0
78
79    More than one character:
80    >>> unicode_ordinal(u0+u1)
81    Traceback (most recent call last):
82    ...
83    ValueError: only single character unicode strings can be converted to Py_UNICODE, got length 2
84    """
85    return i
86
87
88def ord_pyunicode(Py_UNICODE x):
89    """
90    >>> ord_pyunicode(u0)
91    0
92    >>> ord_pyunicode(u_A)
93    65
94    >>> ord_pyunicode(u_KLINGON)
95    63698
96    """
97    return ord(x)
98
99
100@cython.test_assert_path_exists('//PythonCapiCallNode')
101@cython.test_fail_if_path_exists('//SimpleCallNode')
102def unicode_type_methods(Py_UNICODE uchar):
103    """
104    >>> unicode_type_methods(ord('A'))
105    [True, True, False, False, False, False, False, True, True]
106    >>> unicode_type_methods(ord('a'))
107    [True, True, False, False, True, False, False, False, False]
108    >>> unicode_type_methods(ord('8'))
109    [True, False, True, True, False, True, False, False, False]
110    >>> unicode_type_methods(ord('\\t'))
111    [False, False, False, False, False, False, True, False, False]
112    """
113    return [
114        # character types
115        uchar.isalnum(),
116        uchar.isalpha(),
117        uchar.isdecimal(),
118        uchar.isdigit(),
119        uchar.islower(),
120        uchar.isnumeric(),
121        uchar.isspace(),
122        uchar.istitle(),
123        uchar.isupper(),
124        ]
125
126#@cython.test_assert_path_exists('//PythonCapiCallNode')
127#@cython.test_fail_if_path_exists('//SimpleCallNode')
128def unicode_methods(Py_UNICODE uchar):
129    """
130    >>> unicode_methods(ord('A')) == ['a', 'A', 'A']
131    True
132    >>> unicode_methods(ord('a')) == ['a', 'A', 'A']
133    True
134    """
135    return [
136        # character conversion
137        uchar.lower(),
138        uchar.upper(),
139        uchar.title(),
140        ]
141
142@cython.test_assert_path_exists('//IntNode')
143@cython.test_fail_if_path_exists('//SimpleCallNode',
144                                 '//PythonCapiCallNode')
145def len_uchar(Py_UNICODE uchar):
146    """
147    >>> len_uchar(ord('A'))
148    1
149    """
150    assert uchar  # just to avoid C compiler unused arg warning
151    return len(uchar)
152
153def index_uchar(Py_UNICODE uchar, Py_ssize_t i):
154    """
155    >>> index_uchar(ord('A'), 0) == ('A', 'A', 'A')
156    True
157    >>> index_uchar(ord('A'), -1) == ('A', 'A', 'A')
158    True
159    >>> index_uchar(ord('A'), 1)
160    Traceback (most recent call last):
161    IndexError: string index out of range
162    """
163    return uchar[0], uchar[-1], uchar[i]
164
165mixed_ustring = u'AbcDefGhIjKlmnoP'
166lower_ustring = mixed_ustring.lower()
167upper_ustring = mixed_ustring.lower()
168
169@cython.test_assert_path_exists('//PythonCapiCallNode',
170                                '//ForFromStatNode')
171@cython.test_fail_if_path_exists('//SimpleCallNode',
172                                 '//ForInStatNode')
173def count_lower_case_characters(unicode ustring):
174    """
175    >>> count_lower_case_characters(mixed_ustring)
176    10
177    >>> count_lower_case_characters(lower_ustring)
178    16
179    """
180    cdef Py_ssize_t count = 0
181    for uchar in ustring:
182         if uchar.islower():
183             count += 1
184    return count
185
186@cython.test_assert_path_exists('//PythonCapiCallNode',
187                                '//ForFromStatNode')
188@cython.test_fail_if_path_exists('//SimpleCallNode',
189                                 '//ForInStatNode')
190def count_lower_case_characters_slice(unicode ustring):
191    """
192    >>> count_lower_case_characters_slice(mixed_ustring)
193    10
194    >>> count_lower_case_characters_slice(lower_ustring)
195    14
196    """
197    cdef Py_ssize_t count = 0
198    for uchar in ustring[1:-1]:
199         if uchar.islower():
200             count += 1
201    return count
202
203@cython.test_assert_path_exists('//SwitchStatNode',
204                                '//ForFromStatNode')
205@cython.test_fail_if_path_exists('//ForInStatNode')
206def iter_and_in():
207    """
208    >>> iter_and_in()
209    a
210    b
211    e
212    f
213    h
214    """
215    for c in u'abcdefgh':
216        if c in u'abCDefGh':
217            print c
218
219@cython.test_assert_path_exists('//SwitchStatNode')
220@cython.test_fail_if_path_exists('//ForInStatNode')
221def index_and_in():
222    """
223    >>> index_and_in()
224    1
225    3
226    4
227    7
228    8
229    """
230    cdef int i
231    for i in range(1,9):
232        if u'abcdefgh'[-i] in u'abCDefGh':
233            print i
234
235
236def uchar_lookup_in_dict(obj, Py_UNICODE uchar):
237    """
238    >>> d = {u_KLINGON: 1234, u0: 0, u1: 1, u_A: 2}
239    >>> uchar_lookup_in_dict(d, u_KLINGON)
240    (1234, 1234)
241    >>> uchar_lookup_in_dict(d, u_A)
242    (2, 2)
243    >>> uchar_lookup_in_dict(d, u0)
244    (0, 0)
245    >>> uchar_lookup_in_dict(d, u1)
246    (1, 1)
247    """
248    cdef dict d = obj
249    dval = d[uchar]
250    objval = obj[uchar]
251    return dval, objval
252
253
254_WARNINGS = """
255250:16: Item lookup of unicode character codes now always converts to a Unicode string. Use an explicit C integer cast to get back the previous integer lookup behaviour.
256"""
257