1# -*- coding: iso-8859-1 -*- 2# mode: run 3# tag: warnings 4 5 6cimport cython 7 8cdef Py_UCS4 char_ASCII = u'A' 9cdef Py_UCS4 char_KLINGON = u'\uF8D2' 10 11u_A = char_ASCII 12u_KLINGON = char_KLINGON 13 14 15def compare_ASCII(): 16 """ 17 >>> compare_ASCII() 18 True 19 False 20 False 21 """ 22 print(char_ASCII == u'A') 23 print(char_ASCII == u'B') 24 print(char_ASCII == u'\uF8D2') 25 26 27def compare_klingon(): 28 """ 29 >>> compare_klingon() 30 True 31 False 32 False 33 """ 34 print(char_KLINGON == u'\uF8D2') 35 print(char_KLINGON == u'A') 36 print(char_KLINGON == u'B') 37 38 39def single_uchar_compare(): 40 """ 41 >>> single_uchar_compare() 42 """ 43 assert u'\u0100' < u'\u0101' 44 assert u'\u0101' > u'\u0100' 45 46 47from cpython.unicode cimport PyUnicode_FromOrdinal 48import sys 49 50u0 = u'\x00' 51u1 = u'\x01' 52umax = PyUnicode_FromOrdinal(sys.maxunicode) 53 54def unicode_ordinal(Py_UCS4 i): 55 """ 56 >>> ord(unicode_ordinal(0)) == 0 57 True 58 >>> ord(unicode_ordinal(1)) == 1 59 True 60 >>> ord(unicode_ordinal(sys.maxunicode)) == sys.maxunicode 61 True 62 63 >>> ord(unicode_ordinal(u0)) == 0 64 True 65 >>> ord(unicode_ordinal(u1)) == 1 66 True 67 >>> ord(unicode_ordinal(umax)) == sys.maxunicode 68 True 69 70 Value too small: 71 >>> unicode_ordinal(-1) #doctest: +ELLIPSIS 72 Traceback (most recent call last): 73 ... 74 OverflowError: ... 75 76 Value too large: 77 >>> unicode_ordinal(1114111+1) #doctest: +ELLIPSIS 78 Traceback (most recent call last): 79 ... 80 OverflowError: ... 81 82 Less than one character: 83 >>> unicode_ordinal(u0[:0]) 84 Traceback (most recent call last): 85 ... 86 ValueError: only single character unicode strings can be converted to Py_UCS4, got length 0 87 88 More than one character: 89 >>> unicode_ordinal(u0+u1) 90 Traceback (most recent call last): 91 ... 92 ValueError: only single character unicode strings can be converted to Py_UCS4, got length 2 93 """ 94 return i 95 96 97def ord_py_ucs4(Py_UCS4 x): 98 """ 99 >>> ord_py_ucs4(u0) 100 0 101 >>> ord_py_ucs4(u_A) 102 65 103 >>> ord_py_ucs4(u_KLINGON) 104 63698 105 """ 106 return ord(x) 107 108 109@cython.test_assert_path_exists('//PythonCapiCallNode') 110@cython.test_fail_if_path_exists('//SimpleCallNode') 111def unicode_type_methods(Py_UCS4 uchar): 112 """ 113 >>> unicode_type_methods(ord('A')) 114 [True, True, False, False, False, False, False, True, True] 115 >>> unicode_type_methods(ord('a')) 116 [True, True, False, False, True, False, False, False, False] 117 >>> unicode_type_methods(ord('8')) 118 [True, False, True, True, False, True, False, False, False] 119 >>> unicode_type_methods(ord('\\t')) 120 [False, False, False, False, False, False, True, False, False] 121 """ 122 return [ 123 # character types 124 uchar.isalnum(), 125 uchar.isalpha(), 126 uchar.isdecimal(), 127 uchar.isdigit(), 128 uchar.islower(), 129 uchar.isnumeric(), 130 uchar.isspace(), 131 uchar.istitle(), 132 uchar.isupper(), 133 ] 134 135@cython.test_assert_path_exists('//PythonCapiCallNode') 136@cython.test_fail_if_path_exists('//SimpleCallNode') 137def unicode_methods(Py_UCS4 uchar): 138 """ 139 >>> unicode_methods(ord('A')) == ['a', 'A', 'A'] 140 True 141 >>> unicode_methods(ord('a')) == ['a', 'A', 'A'] 142 True 143 """ 144 return [ 145 # character conversion 146 uchar.lower(), 147 uchar.upper(), 148 uchar.title(), 149 ] 150 151 152@cython.test_assert_path_exists('//PythonCapiCallNode') 153@cython.test_fail_if_path_exists( 154 '//SimpleCallNode', 155 '//CoerceFromPyTypeNode', 156) 157def unicode_method_return_type(Py_UCS4 uchar): 158 """ 159 >>> unicode_method_return_type(ord('A')) 160 [True, False] 161 >>> unicode_method_return_type(ord('a')) 162 [False, True] 163 """ 164 cdef Py_UCS4 uc, ul 165 uc, ul = uchar.upper(), uchar.lower() 166 return [uc == uchar, ul == uchar] 167 168 169@cython.test_assert_path_exists('//IntNode') 170@cython.test_fail_if_path_exists('//SimpleCallNode', 171 '//PythonCapiCallNode') 172def len_uchar(Py_UCS4 uchar): 173 """ 174 >>> len_uchar(ord('A')) 175 1 176 """ 177 return len(uchar) 178 179def index_uchar(Py_UCS4 uchar, Py_ssize_t i): 180 """ 181 >>> index_uchar(ord('A'), 0) == ('A', 'A', 'A') 182 True 183 >>> index_uchar(ord('A'), -1) == ('A', 'A', 'A') 184 True 185 >>> index_uchar(ord('A'), 1) 186 Traceback (most recent call last): 187 IndexError: string index out of range 188 """ 189 return uchar[0], uchar[-1], uchar[i] 190 191mixed_ustring = u'AbcDefGhIjKlmnoP' 192lower_ustring = mixed_ustring.lower() 193upper_ustring = mixed_ustring.lower() 194 195@cython.test_assert_path_exists('//PythonCapiCallNode', 196 '//ForFromStatNode') 197@cython.test_fail_if_path_exists('//SimpleCallNode', 198 '//ForInStatNode') 199def count_lower_case_characters(unicode ustring): 200 """ 201 >>> count_lower_case_characters(mixed_ustring) 202 10 203 >>> count_lower_case_characters(lower_ustring) 204 16 205 """ 206 cdef Py_ssize_t count = 0 207 for uchar in ustring: 208 if uchar.islower(): 209 count += 1 210 return count 211 212@cython.test_assert_path_exists('//PythonCapiCallNode', 213 '//ForFromStatNode') 214@cython.test_fail_if_path_exists('//SimpleCallNode', 215 '//ForInStatNode') 216def count_lower_case_characters_slice(unicode ustring): 217 """ 218 >>> count_lower_case_characters_slice(mixed_ustring) 219 10 220 >>> count_lower_case_characters_slice(lower_ustring) 221 14 222 >>> sum([ 1 for uchar in lower_ustring[1:-1] if uchar.islower() ]) 223 14 224 """ 225 cdef Py_ssize_t count = 0 226 for uchar in ustring[1:-1]: 227 if uchar.islower(): 228 count += 1 229 return count 230 231@cython.test_assert_path_exists('//PythonCapiCallNode', 232 '//ForFromStatNode') 233@cython.test_fail_if_path_exists('//SimpleCallNode', 234 '//ForInStatNode') 235def count_lower_case_characters_slice_reversed(unicode ustring): 236 """ 237 >>> count_lower_case_characters_slice_reversed(mixed_ustring) 238 10 239 >>> count_lower_case_characters_slice_reversed(lower_ustring) 240 14 241 >>> sum([ 1 for uchar in lower_ustring[-2:0:-1] if uchar.islower() ]) 242 14 243 """ 244 cdef Py_ssize_t count = 0 245 for uchar in ustring[-2:0:-1]: 246 if uchar.islower(): 247 count += 1 248 return count 249 250def loop_object_over_latin1_unicode_literal(): 251 """ 252 >>> result = loop_object_over_latin1_unicode_literal() 253 >>> print(result[:-1]) 254 abcdefg 255 >>> ord(result[-1]) == 0xD7 256 True 257 """ 258 cdef object uchar 259 chars = [] 260 for uchar in u'abcdefg\xD7': 261 chars.append(uchar) 262 return u''.join(chars) 263 264def loop_object_over_unicode_literal(): 265 """ 266 >>> result = loop_object_over_unicode_literal() 267 >>> print(result[:-1]) 268 abcdefg 269 >>> ord(result[-1]) == 0xF8FD 270 True 271 """ 272 cdef object uchar 273 chars = [] 274 for uchar in u'abcdefg\uF8FD': 275 chars.append(uchar) 276 return u''.join(chars) 277 278@cython.test_assert_path_exists('//SwitchStatNode') 279@cython.test_fail_if_path_exists('//ForInStatNode') 280def iter_and_in(): 281 """ 282 >>> iter_and_in() 283 a 284 b 285 e 286 f 287 h 288 """ 289 for c in u'abcdefgh': 290 if c in u'abCDefGh': 291 print c 292 293 294@cython.test_fail_if_path_exists('//ForInStatNode') 295def iter_inferred(): 296 """ 297 >>> iter_inferred() 298 a 299 b 300 c 301 d 302 e 303 """ 304 uchars = list(u"abcde") 305 uchars = u''.join(uchars) 306 for c in uchars: 307 print c 308 309 310@cython.test_assert_path_exists('//SwitchStatNode', 311 '//ForFromStatNode') 312@cython.test_fail_if_path_exists('//ForInStatNode') 313def index_and_in(): 314 """ 315 >>> index_and_in() 316 1 317 3 318 4 319 7 320 8 321 """ 322 cdef int i 323 for i in range(1,9): 324 if u'abcdefgh'[-i] in u'abCDefGh': 325 print i 326 327# special test for narrow builds 328 329high_uchar = u'\U00012345' 330high_ustring0 = u'\U00012345\U00012346abc' 331high_ustring1 = u'\U00012346\U00012345abc' 332high_ustring_end = u'\U00012346abc\U00012344\U00012345' 333high_ustring_no = u'\U00012346\U00012346abc' 334 335def uchar_in(Py_UCS4 uchar, unicode ustring): 336 """ 337 >>> uchar_in(high_uchar, high_ustring0) 338 True 339 >>> uchar_in(high_uchar, high_ustring1) 340 True 341 >>> uchar_in(high_uchar, high_ustring_end) 342 True 343 >>> uchar_in(high_uchar, high_ustring_no) 344 False 345 """ 346 assert uchar == 0x12345, ('%X' % uchar) 347 return uchar in ustring 348 349 350def uchar_lookup_in_dict(obj, Py_UCS4 uchar): 351 """ 352 >>> d = {u_KLINGON: 1234, u0: 0, u1: 1, u_A: 2} 353 >>> uchar_lookup_in_dict(d, u_KLINGON) 354 (1234, 1234) 355 >>> uchar_lookup_in_dict(d, u_A) 356 (2, 2) 357 >>> uchar_lookup_in_dict(d, u0) 358 (0, 0) 359 >>> uchar_lookup_in_dict(d, u1) 360 (1, 1) 361 """ 362 cdef dict d = obj 363 dval = d[uchar] 364 objval = obj[uchar] 365 return dval, objval 366 367 368_WARNINGS = """ 369364:16: Item lookup of unicode character codes now always converts to a Unicode string. Use an explicit C integer cast to get back the previous integer lookup behaviour. 370""" 371