1# -*- coding: iso-8859-1 -*- 2# mode: run 3# tag: warnings 4 5cimport cython 6 7cdef Py_UNICODE char_ASCII = u'A' 8cdef Py_UNICODE char_KLINGON = u'\uF8D2' 9 10u_A = char_ASCII 11u_KLINGON = char_KLINGON 12 13 14def compare_ASCII(): 15 """ 16 >>> compare_ASCII() 17 True 18 False 19 False 20 """ 21 print(char_ASCII == u'A') 22 print(char_ASCII == u'B') 23 print(char_ASCII == u'\uF8D2') 24 25 26def compare_klingon(): 27 """ 28 >>> compare_klingon() 29 True 30 False 31 False 32 """ 33 print(char_KLINGON == u'\uF8D2') 34 print(char_KLINGON == u'A') 35 print(char_KLINGON == u'B') 36 37 38from cpython.unicode cimport PyUnicode_FromOrdinal 39import sys 40 41u0 = u'\x00' 42u1 = u'\x01' 43umax = PyUnicode_FromOrdinal(sys.maxunicode) 44 45def unicode_ordinal(Py_UNICODE i): 46 """ 47 >>> ord(unicode_ordinal(0)) == 0 48 True 49 >>> ord(unicode_ordinal(1)) == 1 50 True 51 >>> ord(unicode_ordinal(sys.maxunicode)) == sys.maxunicode 52 True 53 54 >>> ord(unicode_ordinal(u0)) == 0 55 True 56 >>> ord(unicode_ordinal(u1)) == 1 57 True 58 >>> ord(unicode_ordinal(umax)) == sys.maxunicode 59 True 60 61 Value too small: 62 >>> unicode_ordinal(-1) #doctest: +ELLIPSIS 63 Traceback (most recent call last): 64 ... 65 OverflowError: ... 66 67 Value too large: 68 >>> unicode_ordinal(sys.maxunicode+1) #doctest: +ELLIPSIS 69 Traceback (most recent call last): 70 ... 71 OverflowError: ... 72 73 Less than one character: 74 >>> unicode_ordinal(u0[:0]) 75 Traceback (most recent call last): 76 ... 77 ValueError: only single character unicode strings can be converted to Py_UNICODE, got length 0 78 79 More than one character: 80 >>> unicode_ordinal(u0+u1) 81 Traceback (most recent call last): 82 ... 83 ValueError: only single character unicode strings can be converted to Py_UNICODE, got length 2 84 """ 85 return i 86 87 88def ord_pyunicode(Py_UNICODE x): 89 """ 90 >>> ord_pyunicode(u0) 91 0 92 >>> ord_pyunicode(u_A) 93 65 94 >>> ord_pyunicode(u_KLINGON) 95 63698 96 """ 97 return ord(x) 98 99 100@cython.test_assert_path_exists('//PythonCapiCallNode') 101@cython.test_fail_if_path_exists('//SimpleCallNode') 102def unicode_type_methods(Py_UNICODE uchar): 103 """ 104 >>> unicode_type_methods(ord('A')) 105 [True, True, False, False, False, False, False, True, True] 106 >>> unicode_type_methods(ord('a')) 107 [True, True, False, False, True, False, False, False, False] 108 >>> unicode_type_methods(ord('8')) 109 [True, False, True, True, False, True, False, False, False] 110 >>> unicode_type_methods(ord('\\t')) 111 [False, False, False, False, False, False, True, False, False] 112 """ 113 return [ 114 # character types 115 uchar.isalnum(), 116 uchar.isalpha(), 117 uchar.isdecimal(), 118 uchar.isdigit(), 119 uchar.islower(), 120 uchar.isnumeric(), 121 uchar.isspace(), 122 uchar.istitle(), 123 uchar.isupper(), 124 ] 125 126#@cython.test_assert_path_exists('//PythonCapiCallNode') 127#@cython.test_fail_if_path_exists('//SimpleCallNode') 128def unicode_methods(Py_UNICODE uchar): 129 """ 130 >>> unicode_methods(ord('A')) == ['a', 'A', 'A'] 131 True 132 >>> unicode_methods(ord('a')) == ['a', 'A', 'A'] 133 True 134 """ 135 return [ 136 # character conversion 137 uchar.lower(), 138 uchar.upper(), 139 uchar.title(), 140 ] 141 142@cython.test_assert_path_exists('//IntNode') 143@cython.test_fail_if_path_exists('//SimpleCallNode', 144 '//PythonCapiCallNode') 145def len_uchar(Py_UNICODE uchar): 146 """ 147 >>> len_uchar(ord('A')) 148 1 149 """ 150 assert uchar # just to avoid C compiler unused arg warning 151 return len(uchar) 152 153def index_uchar(Py_UNICODE uchar, Py_ssize_t i): 154 """ 155 >>> index_uchar(ord('A'), 0) == ('A', 'A', 'A') 156 True 157 >>> index_uchar(ord('A'), -1) == ('A', 'A', 'A') 158 True 159 >>> index_uchar(ord('A'), 1) 160 Traceback (most recent call last): 161 IndexError: string index out of range 162 """ 163 return uchar[0], uchar[-1], uchar[i] 164 165mixed_ustring = u'AbcDefGhIjKlmnoP' 166lower_ustring = mixed_ustring.lower() 167upper_ustring = mixed_ustring.lower() 168 169@cython.test_assert_path_exists('//PythonCapiCallNode', 170 '//ForFromStatNode') 171@cython.test_fail_if_path_exists('//SimpleCallNode', 172 '//ForInStatNode') 173def count_lower_case_characters(unicode ustring): 174 """ 175 >>> count_lower_case_characters(mixed_ustring) 176 10 177 >>> count_lower_case_characters(lower_ustring) 178 16 179 """ 180 cdef Py_ssize_t count = 0 181 for uchar in ustring: 182 if uchar.islower(): 183 count += 1 184 return count 185 186@cython.test_assert_path_exists('//PythonCapiCallNode', 187 '//ForFromStatNode') 188@cython.test_fail_if_path_exists('//SimpleCallNode', 189 '//ForInStatNode') 190def count_lower_case_characters_slice(unicode ustring): 191 """ 192 >>> count_lower_case_characters_slice(mixed_ustring) 193 10 194 >>> count_lower_case_characters_slice(lower_ustring) 195 14 196 """ 197 cdef Py_ssize_t count = 0 198 for uchar in ustring[1:-1]: 199 if uchar.islower(): 200 count += 1 201 return count 202 203@cython.test_assert_path_exists('//SwitchStatNode', 204 '//ForFromStatNode') 205@cython.test_fail_if_path_exists('//ForInStatNode') 206def iter_and_in(): 207 """ 208 >>> iter_and_in() 209 a 210 b 211 e 212 f 213 h 214 """ 215 for c in u'abcdefgh': 216 if c in u'abCDefGh': 217 print c 218 219@cython.test_assert_path_exists('//SwitchStatNode') 220@cython.test_fail_if_path_exists('//ForInStatNode') 221def index_and_in(): 222 """ 223 >>> index_and_in() 224 1 225 3 226 4 227 7 228 8 229 """ 230 cdef int i 231 for i in range(1,9): 232 if u'abcdefgh'[-i] in u'abCDefGh': 233 print i 234 235 236def uchar_lookup_in_dict(obj, Py_UNICODE uchar): 237 """ 238 >>> d = {u_KLINGON: 1234, u0: 0, u1: 1, u_A: 2} 239 >>> uchar_lookup_in_dict(d, u_KLINGON) 240 (1234, 1234) 241 >>> uchar_lookup_in_dict(d, u_A) 242 (2, 2) 243 >>> uchar_lookup_in_dict(d, u0) 244 (0, 0) 245 >>> uchar_lookup_in_dict(d, u1) 246 (1, 1) 247 """ 248 cdef dict d = obj 249 dval = d[uchar] 250 objval = obj[uchar] 251 return dval, objval 252 253 254_WARNINGS = """ 255250:16: Item lookup of unicode character codes now always converts to a Unicode string. Use an explicit C integer cast to get back the previous integer lookup behaviour. 256""" 257