1# -*- coding: iso-8859-1 -*- 2# mode: run 3# tag: warnings 4 5 6cimport cython 7 8cdef Py_UCS4 char_ASCII = u'A' 9cdef Py_UCS4 char_KLINGON = u'\uF8D2' 10 11u_A = char_ASCII 12u_KLINGON = char_KLINGON 13 14 15def compare_ASCII(): 16 """ 17 >>> compare_ASCII() 18 True 19 False 20 False 21 """ 22 print(char_ASCII == u'A') 23 print(char_ASCII == u'B') 24 print(char_ASCII == u'\uF8D2') 25 26 27def compare_klingon(): 28 """ 29 >>> compare_klingon() 30 True 31 False 32 False 33 """ 34 print(char_KLINGON == u'\uF8D2') 35 print(char_KLINGON == u'A') 36 print(char_KLINGON == u'B') 37 38 39def single_uchar_compare(): 40 """ 41 >>> single_uchar_compare() 42 """ 43 assert u'\u0100' < u'\u0101' 44 assert u'\u0101' > u'\u0100' 45 46 47from cpython.unicode cimport PyUnicode_FromOrdinal 48import sys 49 50u0 = u'\x00' 51u1 = u'\x01' 52umax = PyUnicode_FromOrdinal(sys.maxunicode) 53 54def unicode_ordinal(Py_UCS4 i): 55 """ 56 >>> ord(unicode_ordinal(0)) == 0 57 True 58 >>> ord(unicode_ordinal(1)) == 1 59 True 60 >>> ord(unicode_ordinal(sys.maxunicode)) == sys.maxunicode 61 True 62 63 >>> ord(unicode_ordinal(u0)) == 0 64 True 65 >>> ord(unicode_ordinal(u1)) == 1 66 True 67 >>> ord(unicode_ordinal(umax)) == sys.maxunicode 68 True 69 70 Value too small: 71 >>> unicode_ordinal(-1) #doctest: +ELLIPSIS 72 Traceback (most recent call last): 73 ... 74 OverflowError: ... 75 76 Value too large: 77 >>> unicode_ordinal(1114111+1) #doctest: +ELLIPSIS 78 Traceback (most recent call last): 79 ... 80 OverflowError: ... 81 82 Less than one character: 83 >>> unicode_ordinal(u0[:0]) 84 Traceback (most recent call last): 85 ... 86 ValueError: only single character unicode strings can be converted to Py_UCS4, got length 0 87 88 More than one character: 89 >>> unicode_ordinal(u0+u1) 90 Traceback (most recent call last): 91 ... 92 ValueError: only single character unicode strings can be converted to Py_UCS4, got length 2 93 """ 94 return i 95 96 97def ord_py_ucs4(Py_UCS4 x): 98 """ 99 >>> ord_py_ucs4(u0) 100 0 101 >>> ord_py_ucs4(u_A) 102 65 103 >>> ord_py_ucs4(u_KLINGON) 104 63698 105 """ 106 return ord(x) 107 108 109@cython.test_assert_path_exists('//PythonCapiCallNode') 110@cython.test_fail_if_path_exists('//SimpleCallNode') 111def unicode_type_methods(Py_UCS4 uchar): 112 """ 113 >>> unicode_type_methods(ord('A')) 114 [True, True, False, False, False, False, False, True, True] 115 >>> unicode_type_methods(ord('a')) 116 [True, True, False, False, True, False, False, False, False] 117 >>> unicode_type_methods(ord('8')) 118 [True, False, True, True, False, True, False, False, False] 119 >>> unicode_type_methods(ord('\\t')) 120 [False, False, False, False, False, False, True, False, False] 121 """ 122 return [ 123 # character types 124 uchar.isalnum(), 125 uchar.isalpha(), 126 uchar.isdecimal(), 127 uchar.isdigit(), 128 uchar.islower(), 129 uchar.isnumeric(), 130 uchar.isspace(), 131 uchar.istitle(), 132 uchar.isupper(), 133 ] 134 135#@cython.test_assert_path_exists('//PythonCapiCallNode') 136#@cython.test_fail_if_path_exists('//SimpleCallNode') 137def unicode_methods(Py_UCS4 uchar): 138 """ 139 >>> unicode_methods(ord('A')) == ['a', 'A', 'A'] or unicode_methods(ord('A')) 140 True 141 >>> unicode_methods(ord('a')) == ['a', 'A', 'A'] or unicode_methods(ord('a')) 142 True 143 >>> unicode_methods(0x1E9E) == [u'\\xdf', u'\\u1e9e', u'\\u1e9e'] or unicode_methods(0x1E9E) 144 True 145 >>> unicode_methods(0x0130) in ( 146 ... [u'i\\u0307', u'\\u0130', u'\\u0130'], # Py3 147 ... [u'i', u'\\u0130', u'\\u0130'], # Py2 148 ... ) or unicode_methods(0x0130) 149 True 150 """ 151 # \u1E9E == 'LATIN CAPITAL LETTER SHARP S' 152 # \u0130 == 'LATIN CAPITAL LETTER I WITH DOT ABOVE' 153 return [ 154 # character conversion 155 uchar.lower(), 156 uchar.upper(), 157 uchar.title(), 158 ] 159 160 161#@cython.test_assert_path_exists('//PythonCapiCallNode') 162#@cython.test_fail_if_path_exists( 163# '//SimpleCallNode', 164# '//CoerceFromPyTypeNode', 165#) 166def unicode_method_return_type(Py_UCS4 uchar): 167 """ 168 >>> unicode_method_return_type(ord('A')) 169 [True, False] 170 >>> unicode_method_return_type(ord('a')) 171 [False, True] 172 """ 173 cdef Py_UCS4 uc, ul 174 uc, ul = uchar.upper(), uchar.lower() 175 return [uc == uchar, ul == uchar] 176 177 178@cython.test_assert_path_exists('//IntNode') 179@cython.test_fail_if_path_exists('//SimpleCallNode', 180 '//PythonCapiCallNode') 181def len_uchar(Py_UCS4 uchar): 182 """ 183 >>> len_uchar(ord('A')) 184 1 185 """ 186 return len(uchar) 187 188def index_uchar(Py_UCS4 uchar, Py_ssize_t i): 189 """ 190 >>> index_uchar(ord('A'), 0) == ('A', 'A', 'A') 191 True 192 >>> index_uchar(ord('A'), -1) == ('A', 'A', 'A') 193 True 194 >>> index_uchar(ord('A'), 1) 195 Traceback (most recent call last): 196 IndexError: string index out of range 197 """ 198 return uchar[0], uchar[-1], uchar[i] 199 200mixed_ustring = u'AbcDefGhIjKlmnoP' 201lower_ustring = mixed_ustring.lower() 202upper_ustring = mixed_ustring.lower() 203 204@cython.test_assert_path_exists('//PythonCapiCallNode', 205 '//ForFromStatNode') 206@cython.test_fail_if_path_exists('//SimpleCallNode', 207 '//ForInStatNode') 208def count_lower_case_characters(unicode ustring): 209 """ 210 >>> count_lower_case_characters(mixed_ustring) 211 10 212 >>> count_lower_case_characters(lower_ustring) 213 16 214 """ 215 cdef Py_ssize_t count = 0 216 for uchar in ustring: 217 if uchar.islower(): 218 count += 1 219 return count 220 221@cython.test_assert_path_exists('//PythonCapiCallNode', 222 '//ForFromStatNode') 223@cython.test_fail_if_path_exists('//SimpleCallNode', 224 '//ForInStatNode') 225def count_lower_case_characters_slice(unicode ustring): 226 """ 227 >>> count_lower_case_characters_slice(mixed_ustring) 228 10 229 >>> count_lower_case_characters_slice(lower_ustring) 230 14 231 >>> sum([ 1 for uchar in lower_ustring[1:-1] if uchar.islower() ]) 232 14 233 """ 234 cdef Py_ssize_t count = 0 235 for uchar in ustring[1:-1]: 236 if uchar.islower(): 237 count += 1 238 return count 239 240@cython.test_assert_path_exists('//PythonCapiCallNode', 241 '//ForFromStatNode') 242@cython.test_fail_if_path_exists('//SimpleCallNode', 243 '//ForInStatNode') 244def count_lower_case_characters_slice_reversed(unicode ustring): 245 """ 246 >>> count_lower_case_characters_slice_reversed(mixed_ustring) 247 10 248 >>> count_lower_case_characters_slice_reversed(lower_ustring) 249 14 250 >>> sum([ 1 for uchar in lower_ustring[-2:0:-1] if uchar.islower() ]) 251 14 252 """ 253 cdef Py_ssize_t count = 0 254 for uchar in ustring[-2:0:-1]: 255 if uchar.islower(): 256 count += 1 257 return count 258 259def loop_object_over_latin1_unicode_literal(): 260 """ 261 >>> result = loop_object_over_latin1_unicode_literal() 262 >>> print(result[:-1]) 263 abcdefg 264 >>> ord(result[-1]) == 0xD7 265 True 266 """ 267 cdef object uchar 268 chars = [] 269 for uchar in u'abcdefg\xD7': 270 chars.append(uchar) 271 return u''.join(chars) 272 273def loop_object_over_unicode_literal(): 274 """ 275 >>> result = loop_object_over_unicode_literal() 276 >>> print(result[:-1]) 277 abcdefg 278 >>> ord(result[-1]) == 0xF8FD 279 True 280 """ 281 cdef object uchar 282 chars = [] 283 for uchar in u'abcdefg\uF8FD': 284 chars.append(uchar) 285 return u''.join(chars) 286 287@cython.test_assert_path_exists('//SwitchStatNode') 288@cython.test_fail_if_path_exists('//ForInStatNode') 289def iter_and_in(): 290 """ 291 >>> iter_and_in() 292 a 293 b 294 e 295 f 296 h 297 """ 298 for c in u'abcdefgh': 299 if c in u'abCDefGh': 300 print c 301 302 303@cython.test_fail_if_path_exists('//ForInStatNode') 304def iter_inferred(): 305 """ 306 >>> iter_inferred() 307 a 308 b 309 c 310 d 311 e 312 """ 313 uchars = list(u"abcde") 314 uchars = u''.join(uchars) 315 for c in uchars: 316 print c 317 318 319@cython.test_assert_path_exists('//SwitchStatNode', 320 '//ForFromStatNode') 321@cython.test_fail_if_path_exists('//ForInStatNode') 322def index_and_in(): 323 """ 324 >>> index_and_in() 325 1 326 3 327 4 328 7 329 8 330 """ 331 cdef int i 332 for i in range(1,9): 333 if u'abcdefgh'[-i] in u'abCDefGh': 334 print i 335 336# special test for narrow builds 337 338high_uchar = u'\U00012345' 339high_ustring0 = u'\U00012345\U00012346abc' 340high_ustring1 = u'\U00012346\U00012345abc' 341high_ustring_end = u'\U00012346abc\U00012344\U00012345' 342high_ustring_no = u'\U00012346\U00012346abc' 343 344def uchar_in(Py_UCS4 uchar, unicode ustring): 345 """ 346 >>> uchar_in(high_uchar, high_ustring0) 347 True 348 >>> uchar_in(high_uchar, high_ustring1) 349 True 350 >>> uchar_in(high_uchar, high_ustring_end) 351 True 352 >>> uchar_in(high_uchar, high_ustring_no) 353 False 354 """ 355 assert uchar == 0x12345, ('%X' % uchar) 356 return uchar in ustring 357 358 359def uchar_lookup_in_dict(obj, Py_UCS4 uchar): 360 """ 361 >>> d = {u_KLINGON: 1234, u0: 0, u1: 1, u_A: 2} 362 >>> uchar_lookup_in_dict(d, u_KLINGON) 363 (1234, 1234) 364 >>> uchar_lookup_in_dict(d, u_A) 365 (2, 2) 366 >>> uchar_lookup_in_dict(d, u0) 367 (0, 0) 368 >>> uchar_lookup_in_dict(d, u1) 369 (1, 1) 370 """ 371 cdef dict d = obj 372 dval = d[uchar] 373 objval = obj[uchar] 374 return dval, objval 375 376 377_WARNINGS = """ 378373:16: Item lookup of unicode character codes now always converts to a Unicode string. Use an explicit C integer cast to get back the previous integer lookup behaviour. 379""" 380