1 #ifndef Py_UNICODEOBJECT_H 2 #define Py_UNICODEOBJECT_H 3 4 #include <stdarg.h> 5 6 /* 7 8 Unicode implementation based on original code by Fredrik Lundh, 9 modified by Marc-Andre Lemburg (mal@lemburg.com) according to the 10 Unicode Integration Proposal. (See 11 http://www.egenix.com/files/python/unicode-proposal.txt). 12 13 Copyright (c) Corporation for National Research Initiatives. 14 15 16 Original header: 17 -------------------------------------------------------------------- 18 19 * Yet another Unicode string type for Python. This type supports the 20 * 16-bit Basic Multilingual Plane (BMP) only. 21 * 22 * Written by Fredrik Lundh, January 1999. 23 * 24 * Copyright (c) 1999 by Secret Labs AB. 25 * Copyright (c) 1999 by Fredrik Lundh. 26 * 27 * fredrik@pythonware.com 28 * http://www.pythonware.com 29 * 30 * -------------------------------------------------------------------- 31 * This Unicode String Type is 32 * 33 * Copyright (c) 1999 by Secret Labs AB 34 * Copyright (c) 1999 by Fredrik Lundh 35 * 36 * By obtaining, using, and/or copying this software and/or its 37 * associated documentation, you agree that you have read, understood, 38 * and will comply with the following terms and conditions: 39 * 40 * Permission to use, copy, modify, and distribute this software and its 41 * associated documentation for any purpose and without fee is hereby 42 * granted, provided that the above copyright notice appears in all 43 * copies, and that both that copyright notice and this permission notice 44 * appear in supporting documentation, and that the name of Secret Labs 45 * AB or the author not be used in advertising or publicity pertaining to 46 * distribution of the software without specific, written prior 47 * permission. 48 * 49 * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO 50 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 51 * FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR 52 * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 53 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 54 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT 55 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 56 * -------------------------------------------------------------------- */ 57 58 #include <ctype.h> 59 60 /* === Internal API ======================================================= */ 61 62 /* --- Internal Unicode Format -------------------------------------------- */ 63 64 /* Python 3.x requires unicode */ 65 #define Py_USING_UNICODE 66 67 #ifndef SIZEOF_WCHAR_T 68 #error Must define SIZEOF_WCHAR_T 69 #endif 70 71 #define Py_UNICODE_SIZE SIZEOF_WCHAR_T 72 73 /* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE. 74 Otherwise, Unicode strings are stored as UCS-2 (with limited support 75 for UTF-16) */ 76 77 #if Py_UNICODE_SIZE >= 4 78 #define Py_UNICODE_WIDE 79 #endif 80 81 /* Set these flags if the platform has "wchar.h" and the 82 wchar_t type is a 16-bit unsigned type */ 83 /* #define HAVE_WCHAR_H */ 84 /* #define HAVE_USABLE_WCHAR_T */ 85 86 /* Py_UNICODE was the native Unicode storage format (code unit) used by 87 Python and represents a single Unicode element in the Unicode type. 88 With PEP 393, Py_UNICODE is deprecated and replaced with a 89 typedef to wchar_t. */ 90 91 #ifndef Py_LIMITED_API 92 #define PY_UNICODE_TYPE wchar_t 93 typedef wchar_t Py_UNICODE; 94 #endif 95 96 /* If the compiler provides a wchar_t type we try to support it 97 through the interface functions PyUnicode_FromWideChar(), 98 PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */ 99 100 #ifdef HAVE_USABLE_WCHAR_T 101 # ifndef HAVE_WCHAR_H 102 # define HAVE_WCHAR_H 103 # endif 104 #endif 105 106 #ifdef HAVE_WCHAR_H 107 /* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */ 108 # ifdef _HAVE_BSDI 109 # include <time.h> 110 # endif 111 # include <wchar.h> 112 #endif 113 114 /* Py_UCS4 and Py_UCS2 are typedefs for the respective 115 unicode representations. */ 116 typedef uint32_t Py_UCS4; 117 typedef uint16_t Py_UCS2; 118 typedef uint8_t Py_UCS1; 119 120 /* --- Internal Unicode Operations ---------------------------------------- */ 121 122 /* Since splitting on whitespace is an important use case, and 123 whitespace in most situations is solely ASCII whitespace, we 124 optimize for the common case by using a quick look-up table 125 _Py_ascii_whitespace (see below) with an inlined check. 126 127 */ 128 #ifndef Py_LIMITED_API 129 #define Py_UNICODE_ISSPACE(ch) \ 130 ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch)) 131 132 #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch) 133 #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch) 134 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch) 135 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch) 136 137 #define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch) 138 #define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch) 139 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch) 140 141 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch) 142 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch) 143 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch) 144 #define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch) 145 146 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch) 147 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch) 148 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch) 149 150 #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch) 151 152 #define Py_UNICODE_ISALNUM(ch) \ 153 (Py_UNICODE_ISALPHA(ch) || \ 154 Py_UNICODE_ISDECIMAL(ch) || \ 155 Py_UNICODE_ISDIGIT(ch) || \ 156 Py_UNICODE_ISNUMERIC(ch)) 157 158 #define Py_UNICODE_COPY(target, source, length) \ 159 memcpy((target), (source), (length)*sizeof(Py_UNICODE)) 160 161 #define Py_UNICODE_FILL(target, value, length) \ 162 do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\ 163 for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\ 164 } while (0) 165 166 /* macros to work with surrogates */ 167 #define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF) 168 #define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDBFF) 169 #define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= (ch) && (ch) <= 0xDFFF) 170 /* Join two surrogate characters and return a single Py_UCS4 value. */ 171 #define Py_UNICODE_JOIN_SURROGATES(high, low) \ 172 (((((Py_UCS4)(high) & 0x03FF) << 10) | \ 173 ((Py_UCS4)(low) & 0x03FF)) + 0x10000) 174 /* high surrogate = top 10 bits added to D800 */ 175 #define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10)) 176 /* low surrogate = bottom 10 bits added to DC00 */ 177 #define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF)) 178 179 /* Check if substring matches at given offset. The offset must be 180 valid, and the substring must not be empty. */ 181 182 #define Py_UNICODE_MATCH(string, offset, substring) \ 183 ((*((string)->wstr + (offset)) == *((substring)->wstr)) && \ 184 ((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \ 185 !memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE))) 186 187 #endif /* Py_LIMITED_API */ 188 189 #ifdef __cplusplus 190 extern "C" { 191 #endif 192 193 /* --- Unicode Type ------------------------------------------------------- */ 194 195 #ifndef Py_LIMITED_API 196 197 /* ASCII-only strings created through PyUnicode_New use the PyASCIIObject 198 structure. state.ascii and state.compact are set, and the data 199 immediately follow the structure. utf8_length and wstr_length can be found 200 in the length field; the utf8 pointer is equal to the data pointer. */ 201 typedef struct { 202 /* There are 4 forms of Unicode strings: 203 204 - compact ascii: 205 206 * structure = PyASCIIObject 207 * test: PyUnicode_IS_COMPACT_ASCII(op) 208 * kind = PyUnicode_1BYTE_KIND 209 * compact = 1 210 * ascii = 1 211 * ready = 1 212 * (length is the length of the utf8 and wstr strings) 213 * (data starts just after the structure) 214 * (since ASCII is decoded from UTF-8, the utf8 string are the data) 215 216 - compact: 217 218 * structure = PyCompactUnicodeObject 219 * test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op) 220 * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or 221 PyUnicode_4BYTE_KIND 222 * compact = 1 223 * ready = 1 224 * ascii = 0 225 * utf8 is not shared with data 226 * utf8_length = 0 if utf8 is NULL 227 * wstr is shared with data and wstr_length=length 228 if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2 229 or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4 230 * wstr_length = 0 if wstr is NULL 231 * (data starts just after the structure) 232 233 - legacy string, not ready: 234 235 * structure = PyUnicodeObject 236 * test: kind == PyUnicode_WCHAR_KIND 237 * length = 0 (use wstr_length) 238 * hash = -1 239 * kind = PyUnicode_WCHAR_KIND 240 * compact = 0 241 * ascii = 0 242 * ready = 0 243 * interned = SSTATE_NOT_INTERNED 244 * wstr is not NULL 245 * data.any is NULL 246 * utf8 is NULL 247 * utf8_length = 0 248 249 - legacy string, ready: 250 251 * structure = PyUnicodeObject structure 252 * test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND 253 * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or 254 PyUnicode_4BYTE_KIND 255 * compact = 0 256 * ready = 1 257 * data.any is not NULL 258 * utf8 is shared and utf8_length = length with data.any if ascii = 1 259 * utf8_length = 0 if utf8 is NULL 260 * wstr is shared with data.any and wstr_length = length 261 if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2 262 or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4 263 * wstr_length = 0 if wstr is NULL 264 265 Compact strings use only one memory block (structure + characters), 266 whereas legacy strings use one block for the structure and one block 267 for characters. 268 269 Legacy strings are created by PyUnicode_FromUnicode() and 270 PyUnicode_FromStringAndSize(NULL, size) functions. They become ready 271 when PyUnicode_READY() is called. 272 273 See also _PyUnicode_CheckConsistency(). 274 */ 275 PyObject_HEAD 276 Py_ssize_t length; /* Number of code points in the string */ 277 Py_hash_t hash; /* Hash value; -1 if not set */ 278 struct { 279 /* 280 SSTATE_NOT_INTERNED (0) 281 SSTATE_INTERNED_MORTAL (1) 282 SSTATE_INTERNED_IMMORTAL (2) 283 284 If interned != SSTATE_NOT_INTERNED, the two references from the 285 dictionary to this object are *not* counted in ob_refcnt. 286 */ 287 unsigned int interned:2; 288 /* Character size: 289 290 - PyUnicode_WCHAR_KIND (0): 291 292 * character type = wchar_t (16 or 32 bits, depending on the 293 platform) 294 295 - PyUnicode_1BYTE_KIND (1): 296 297 * character type = Py_UCS1 (8 bits, unsigned) 298 * all characters are in the range U+0000-U+00FF (latin1) 299 * if ascii is set, all characters are in the range U+0000-U+007F 300 (ASCII), otherwise at least one character is in the range 301 U+0080-U+00FF 302 303 - PyUnicode_2BYTE_KIND (2): 304 305 * character type = Py_UCS2 (16 bits, unsigned) 306 * all characters are in the range U+0000-U+FFFF (BMP) 307 * at least one character is in the range U+0100-U+FFFF 308 309 - PyUnicode_4BYTE_KIND (4): 310 311 * character type = Py_UCS4 (32 bits, unsigned) 312 * all characters are in the range U+0000-U+10FFFF 313 * at least one character is in the range U+10000-U+10FFFF 314 */ 315 unsigned int kind:3; 316 /* Compact is with respect to the allocation scheme. Compact unicode 317 objects only require one memory block while non-compact objects use 318 one block for the PyUnicodeObject struct and another for its data 319 buffer. */ 320 unsigned int compact:1; 321 /* The string only contains characters in the range U+0000-U+007F (ASCII) 322 and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is 323 set, use the PyASCIIObject structure. */ 324 unsigned int ascii:1; 325 /* The ready flag indicates whether the object layout is initialized 326 completely. This means that this is either a compact object, or 327 the data pointer is filled out. The bit is redundant, and helps 328 to minimize the test in PyUnicode_IS_READY(). */ 329 unsigned int ready:1; 330 /* Padding to ensure that PyUnicode_DATA() is always aligned to 331 4 bytes (see issue #19537 on m68k). */ 332 unsigned int :24; 333 } state; 334 wchar_t *wstr; /* wchar_t representation (null-terminated) */ 335 } PyASCIIObject; 336 337 /* Non-ASCII strings allocated through PyUnicode_New use the 338 PyCompactUnicodeObject structure. state.compact is set, and the data 339 immediately follow the structure. */ 340 typedef struct { 341 PyASCIIObject _base; 342 Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the 343 * terminating \0. */ 344 char *utf8; /* UTF-8 representation (null-terminated) */ 345 Py_ssize_t wstr_length; /* Number of code points in wstr, possible 346 * surrogates count as two code points. */ 347 } PyCompactUnicodeObject; 348 349 /* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the 350 PyUnicodeObject structure. The actual string data is initially in the wstr 351 block, and copied into the data block using _PyUnicode_Ready. */ 352 typedef struct { 353 PyCompactUnicodeObject _base; 354 union { 355 void *any; 356 Py_UCS1 *latin1; 357 Py_UCS2 *ucs2; 358 Py_UCS4 *ucs4; 359 } data; /* Canonical, smallest-form Unicode buffer */ 360 } PyUnicodeObject; 361 #endif 362 363 PyAPI_DATA(PyTypeObject) PyUnicode_Type; 364 PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type; 365 366 #define PyUnicode_Check(op) \ 367 PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS) 368 #define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type) 369 370 /* Fast access macros */ 371 #ifndef Py_LIMITED_API 372 373 #define PyUnicode_WSTR_LENGTH(op) \ 374 (PyUnicode_IS_COMPACT_ASCII(op) ? \ 375 ((PyASCIIObject*)op)->length : \ 376 ((PyCompactUnicodeObject*)op)->wstr_length) 377 378 /* Returns the deprecated Py_UNICODE representation's size in code units 379 (this includes surrogate pairs as 2 units). 380 If the Py_UNICODE representation is not available, it will be computed 381 on request. Use PyUnicode_GET_LENGTH() for the length in code points. */ 382 383 #define PyUnicode_GET_SIZE(op) \ 384 (assert(PyUnicode_Check(op)), \ 385 (((PyASCIIObject *)(op))->wstr) ? \ 386 PyUnicode_WSTR_LENGTH(op) : \ 387 ((void)PyUnicode_AsUnicode((PyObject *)(op)), \ 388 assert(((PyASCIIObject *)(op))->wstr), \ 389 PyUnicode_WSTR_LENGTH(op))) 390 391 #define PyUnicode_GET_DATA_SIZE(op) \ 392 (PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE) 393 394 /* Alias for PyUnicode_AsUnicode(). This will create a wchar_t/Py_UNICODE 395 representation on demand. Using this macro is very inefficient now, 396 try to port your code to use the new PyUnicode_*BYTE_DATA() macros or 397 use PyUnicode_WRITE() and PyUnicode_READ(). */ 398 399 #define PyUnicode_AS_UNICODE(op) \ 400 (assert(PyUnicode_Check(op)), \ 401 (((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \ 402 PyUnicode_AsUnicode((PyObject *)(op))) 403 404 #define PyUnicode_AS_DATA(op) \ 405 ((const char *)(PyUnicode_AS_UNICODE(op))) 406 407 408 /* --- Flexible String Representation Helper Macros (PEP 393) -------------- */ 409 410 /* Values for PyASCIIObject.state: */ 411 412 /* Interning state. */ 413 #define SSTATE_NOT_INTERNED 0 414 #define SSTATE_INTERNED_MORTAL 1 415 #define SSTATE_INTERNED_IMMORTAL 2 416 417 /* Return true if the string contains only ASCII characters, or 0 if not. The 418 string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be 419 ready. */ 420 #define PyUnicode_IS_ASCII(op) \ 421 (assert(PyUnicode_Check(op)), \ 422 assert(PyUnicode_IS_READY(op)), \ 423 ((PyASCIIObject*)op)->state.ascii) 424 425 /* Return true if the string is compact or 0 if not. 426 No type checks or Ready calls are performed. */ 427 #define PyUnicode_IS_COMPACT(op) \ 428 (((PyASCIIObject*)(op))->state.compact) 429 430 /* Return true if the string is a compact ASCII string (use PyASCIIObject 431 structure), or 0 if not. No type checks or Ready calls are performed. */ 432 #define PyUnicode_IS_COMPACT_ASCII(op) \ 433 (((PyASCIIObject*)op)->state.ascii && PyUnicode_IS_COMPACT(op)) 434 435 enum PyUnicode_Kind { 436 /* String contains only wstr byte characters. This is only possible 437 when the string was created with a legacy API and _PyUnicode_Ready() 438 has not been called yet. */ 439 PyUnicode_WCHAR_KIND = 0, 440 /* Return values of the PyUnicode_KIND() macro: */ 441 PyUnicode_1BYTE_KIND = 1, 442 PyUnicode_2BYTE_KIND = 2, 443 PyUnicode_4BYTE_KIND = 4 444 }; 445 446 /* Return pointers to the canonical representation cast to unsigned char, 447 Py_UCS2, or Py_UCS4 for direct character access. 448 No checks are performed, use PyUnicode_KIND() before to ensure 449 these will work correctly. */ 450 451 #define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op)) 452 #define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op)) 453 #define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op)) 454 455 /* Return one of the PyUnicode_*_KIND values defined above. */ 456 #define PyUnicode_KIND(op) \ 457 (assert(PyUnicode_Check(op)), \ 458 assert(PyUnicode_IS_READY(op)), \ 459 ((PyASCIIObject *)(op))->state.kind) 460 461 /* Return a void pointer to the raw unicode buffer. */ 462 #define _PyUnicode_COMPACT_DATA(op) \ 463 (PyUnicode_IS_ASCII(op) ? \ 464 ((void*)((PyASCIIObject*)(op) + 1)) : \ 465 ((void*)((PyCompactUnicodeObject*)(op) + 1))) 466 467 #define _PyUnicode_NONCOMPACT_DATA(op) \ 468 (assert(((PyUnicodeObject*)(op))->data.any), \ 469 ((((PyUnicodeObject *)(op))->data.any))) 470 471 #define PyUnicode_DATA(op) \ 472 (assert(PyUnicode_Check(op)), \ 473 PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \ 474 _PyUnicode_NONCOMPACT_DATA(op)) 475 476 /* In the access macros below, "kind" may be evaluated more than once. 477 All other macro parameters are evaluated exactly once, so it is safe 478 to put side effects into them (such as increasing the index). */ 479 480 /* Write into the canonical representation, this macro does not do any sanity 481 checks and is intended for usage in loops. The caller should cache the 482 kind and data pointers obtained from other macro calls. 483 index is the index in the string (starts at 0) and value is the new 484 code point value which should be written to that location. */ 485 #define PyUnicode_WRITE(kind, data, index, value) \ 486 do { \ 487 switch ((kind)) { \ 488 case PyUnicode_1BYTE_KIND: { \ 489 ((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \ 490 break; \ 491 } \ 492 case PyUnicode_2BYTE_KIND: { \ 493 ((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \ 494 break; \ 495 } \ 496 default: { \ 497 assert((kind) == PyUnicode_4BYTE_KIND); \ 498 ((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \ 499 } \ 500 } \ 501 } while (0) 502 503 /* Read a code point from the string's canonical representation. No checks 504 or ready calls are performed. */ 505 #define PyUnicode_READ(kind, data, index) \ 506 ((Py_UCS4) \ 507 ((kind) == PyUnicode_1BYTE_KIND ? \ 508 ((const Py_UCS1 *)(data))[(index)] : \ 509 ((kind) == PyUnicode_2BYTE_KIND ? \ 510 ((const Py_UCS2 *)(data))[(index)] : \ 511 ((const Py_UCS4 *)(data))[(index)] \ 512 ) \ 513 )) 514 515 /* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it 516 calls PyUnicode_KIND() and might call it twice. For single reads, use 517 PyUnicode_READ_CHAR, for multiple consecutive reads callers should 518 cache kind and use PyUnicode_READ instead. */ 519 #define PyUnicode_READ_CHAR(unicode, index) \ 520 (assert(PyUnicode_Check(unicode)), \ 521 assert(PyUnicode_IS_READY(unicode)), \ 522 (Py_UCS4) \ 523 (PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \ 524 ((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \ 525 (PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \ 526 ((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \ 527 ((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \ 528 ) \ 529 )) 530 531 /* Returns the length of the unicode string. The caller has to make sure that 532 the string has it's canonical representation set before calling 533 this macro. Call PyUnicode_(FAST_)Ready to ensure that. */ 534 #define PyUnicode_GET_LENGTH(op) \ 535 (assert(PyUnicode_Check(op)), \ 536 assert(PyUnicode_IS_READY(op)), \ 537 ((PyASCIIObject *)(op))->length) 538 539 540 /* Fast check to determine whether an object is ready. Equivalent to 541 PyUnicode_IS_COMPACT(op) || ((PyUnicodeObject*)(op))->data.any) */ 542 543 #define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready) 544 545 /* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best 546 case. If the canonical representation is not yet set, it will still call 547 _PyUnicode_Ready(). 548 Returns 0 on success and -1 on errors. */ 549 #define PyUnicode_READY(op) \ 550 (assert(PyUnicode_Check(op)), \ 551 (PyUnicode_IS_READY(op) ? \ 552 0 : _PyUnicode_Ready((PyObject *)(op)))) 553 554 /* Return a maximum character value which is suitable for creating another 555 string based on op. This is always an approximation but more efficient 556 than iterating over the string. */ 557 #define PyUnicode_MAX_CHAR_VALUE(op) \ 558 (assert(PyUnicode_IS_READY(op)), \ 559 (PyUnicode_IS_ASCII(op) ? \ 560 (0x7f) : \ 561 (PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ? \ 562 (0xffU) : \ 563 (PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ? \ 564 (0xffffU) : \ 565 (0x10ffffU))))) 566 567 #endif 568 569 /* --- Constants ---------------------------------------------------------- */ 570 571 /* This Unicode character will be used as replacement character during 572 decoding if the errors argument is set to "replace". Note: the 573 Unicode character U+FFFD is the official REPLACEMENT CHARACTER in 574 Unicode 3.0. */ 575 576 #define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD) 577 578 /* === Public API ========================================================= */ 579 580 /* --- Plain Py_UNICODE --------------------------------------------------- */ 581 582 /* With PEP 393, this is the recommended way to allocate a new unicode object. 583 This function will allocate the object and its buffer in a single memory 584 block. Objects created using this function are not resizable. */ 585 #ifndef Py_LIMITED_API 586 PyAPI_FUNC(PyObject*) PyUnicode_New( 587 Py_ssize_t size, /* Number of code points in the new string */ 588 Py_UCS4 maxchar /* maximum code point value in the string */ 589 ); 590 #endif 591 592 /* Initializes the canonical string representation from the deprecated 593 wstr/Py_UNICODE representation. This function is used to convert Unicode 594 objects which were created using the old API to the new flexible format 595 introduced with PEP 393. 596 597 Don't call this function directly, use the public PyUnicode_READY() macro 598 instead. */ 599 #ifndef Py_LIMITED_API 600 PyAPI_FUNC(int) _PyUnicode_Ready( 601 PyObject *unicode /* Unicode object */ 602 ); 603 #endif 604 605 /* Get a copy of a Unicode string. */ 606 #ifndef Py_LIMITED_API 607 PyAPI_FUNC(PyObject*) _PyUnicode_Copy( 608 PyObject *unicode 609 ); 610 #endif 611 612 /* Copy character from one unicode object into another, this function performs 613 character conversion when necessary and falls back to memcpy() if possible. 614 615 Fail if to is too small (smaller than *how_many* or smaller than 616 len(from)-from_start), or if kind(from[from_start:from_start+how_many]) > 617 kind(to), or if *to* has more than 1 reference. 618 619 Return the number of written character, or return -1 and raise an exception 620 on error. 621 622 Pseudo-code: 623 624 how_many = min(how_many, len(from) - from_start) 625 to[to_start:to_start+how_many] = from[from_start:from_start+how_many] 626 return how_many 627 628 Note: The function doesn't write a terminating null character. 629 */ 630 #ifndef Py_LIMITED_API 631 PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters( 632 PyObject *to, 633 Py_ssize_t to_start, 634 PyObject *from, 635 Py_ssize_t from_start, 636 Py_ssize_t how_many 637 ); 638 639 /* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so 640 may crash if parameters are invalid (e.g. if the output string 641 is too short). */ 642 PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters( 643 PyObject *to, 644 Py_ssize_t to_start, 645 PyObject *from, 646 Py_ssize_t from_start, 647 Py_ssize_t how_many 648 ); 649 #endif 650 651 #ifndef Py_LIMITED_API 652 /* Fill a string with a character: write fill_char into 653 unicode[start:start+length]. 654 655 Fail if fill_char is bigger than the string maximum character, or if the 656 string has more than 1 reference. 657 658 Return the number of written character, or return -1 and raise an exception 659 on error. */ 660 PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill( 661 PyObject *unicode, 662 Py_ssize_t start, 663 Py_ssize_t length, 664 Py_UCS4 fill_char 665 ); 666 667 /* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash 668 if parameters are invalid (e.g. if length is longer than the string). */ 669 PyAPI_FUNC(void) _PyUnicode_FastFill( 670 PyObject *unicode, 671 Py_ssize_t start, 672 Py_ssize_t length, 673 Py_UCS4 fill_char 674 ); 675 #endif 676 677 /* Create a Unicode Object from the Py_UNICODE buffer u of the given 678 size. 679 680 u may be NULL which causes the contents to be undefined. It is the 681 user's responsibility to fill in the needed data afterwards. Note 682 that modifying the Unicode object contents after construction is 683 only allowed if u was set to NULL. 684 685 The buffer is copied into the new object. */ 686 687 #ifndef Py_LIMITED_API 688 PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode( 689 const Py_UNICODE *u, /* Unicode buffer */ 690 Py_ssize_t size /* size of buffer */ 691 ); 692 #endif 693 694 /* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */ 695 PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize( 696 const char *u, /* UTF-8 encoded string */ 697 Py_ssize_t size /* size of buffer */ 698 ); 699 700 /* Similar to PyUnicode_FromUnicode(), but u points to null-terminated 701 UTF-8 encoded bytes. The size is determined with strlen(). */ 702 PyAPI_FUNC(PyObject*) PyUnicode_FromString( 703 const char *u /* UTF-8 encoded string */ 704 ); 705 706 #ifndef Py_LIMITED_API 707 /* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters. 708 Scan the string to find the maximum character. */ 709 PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData( 710 int kind, 711 const void *buffer, 712 Py_ssize_t size); 713 714 /* Create a new string from a buffer of ASCII characters. 715 WARNING: Don't check if the string contains any non-ASCII character. */ 716 PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII( 717 const char *buffer, 718 Py_ssize_t size); 719 #endif 720 721 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 722 PyAPI_FUNC(PyObject*) PyUnicode_Substring( 723 PyObject *str, 724 Py_ssize_t start, 725 Py_ssize_t end); 726 #endif 727 728 #ifndef Py_LIMITED_API 729 /* Compute the maximum character of the substring unicode[start:end]. 730 Return 127 for an empty string. */ 731 PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar ( 732 PyObject *unicode, 733 Py_ssize_t start, 734 Py_ssize_t end); 735 #endif 736 737 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 738 /* Copy the string into a UCS4 buffer including the null character if copy_null 739 is set. Return NULL and raise an exception on error. Raise a SystemError if 740 the buffer is smaller than the string. Return buffer on success. 741 742 buflen is the length of the buffer in (Py_UCS4) characters. */ 743 PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4( 744 PyObject *unicode, 745 Py_UCS4* buffer, 746 Py_ssize_t buflen, 747 int copy_null); 748 749 /* Copy the string into a UCS4 buffer. A new buffer is allocated using 750 * PyMem_Malloc; if this fails, NULL is returned with a memory error 751 exception set. */ 752 PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode); 753 #endif 754 755 #ifndef Py_LIMITED_API 756 /* Return a read-only pointer to the Unicode object's internal 757 Py_UNICODE buffer. 758 If the wchar_t/Py_UNICODE representation is not yet available, this 759 function will calculate it. */ 760 761 PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode( 762 PyObject *unicode /* Unicode object */ 763 ); 764 765 /* Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string 766 contains null characters. */ 767 PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode( 768 PyObject *unicode /* Unicode object */ 769 ); 770 771 /* Return a read-only pointer to the Unicode object's internal 772 Py_UNICODE buffer and save the length at size. 773 If the wchar_t/Py_UNICODE representation is not yet available, this 774 function will calculate it. */ 775 776 PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize( 777 PyObject *unicode, /* Unicode object */ 778 Py_ssize_t *size /* location where to save the length */ 779 ); 780 #endif 781 782 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 783 /* Get the length of the Unicode object. */ 784 785 PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength( 786 PyObject *unicode 787 ); 788 #endif 789 790 /* Get the number of Py_UNICODE units in the 791 string representation. */ 792 793 PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize( 794 PyObject *unicode /* Unicode object */ 795 ); 796 797 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 798 /* Read a character from the string. */ 799 800 PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar( 801 PyObject *unicode, 802 Py_ssize_t index 803 ); 804 805 /* Write a character to the string. The string must have been created through 806 PyUnicode_New, must not be shared, and must not have been hashed yet. 807 808 Return 0 on success, -1 on error. */ 809 810 PyAPI_FUNC(int) PyUnicode_WriteChar( 811 PyObject *unicode, 812 Py_ssize_t index, 813 Py_UCS4 character 814 ); 815 #endif 816 817 #ifndef Py_LIMITED_API 818 /* Get the maximum ordinal for a Unicode character. */ 819 PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void); 820 #endif 821 822 /* Resize a Unicode object. The length is the number of characters, except 823 if the kind of the string is PyUnicode_WCHAR_KIND: in this case, the length 824 is the number of Py_UNICODE characters. 825 826 *unicode is modified to point to the new (resized) object and 0 827 returned on success. 828 829 Try to resize the string in place (which is usually faster than allocating 830 a new string and copy characters), or create a new string. 831 832 Error handling is implemented as follows: an exception is set, -1 833 is returned and *unicode left untouched. 834 835 WARNING: The function doesn't check string content, the result may not be a 836 string in canonical representation. */ 837 838 PyAPI_FUNC(int) PyUnicode_Resize( 839 PyObject **unicode, /* Pointer to the Unicode object */ 840 Py_ssize_t length /* New length */ 841 ); 842 843 /* Decode obj to a Unicode object. 844 845 bytes, bytearray and other bytes-like objects are decoded according to the 846 given encoding and error handler. The encoding and error handler can be 847 NULL to have the interface use UTF-8 and "strict". 848 849 All other objects (including Unicode objects) raise an exception. 850 851 The API returns NULL in case of an error. The caller is responsible 852 for decref'ing the returned objects. 853 854 */ 855 856 PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject( 857 PyObject *obj, /* Object */ 858 const char *encoding, /* encoding */ 859 const char *errors /* error handling */ 860 ); 861 862 /* Copy an instance of a Unicode subtype to a new true Unicode object if 863 necessary. If obj is already a true Unicode object (not a subtype), return 864 the reference with *incremented* refcount. 865 866 The API returns NULL in case of an error. The caller is responsible 867 for decref'ing the returned objects. 868 869 */ 870 871 PyAPI_FUNC(PyObject*) PyUnicode_FromObject( 872 PyObject *obj /* Object */ 873 ); 874 875 PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV( 876 const char *format, /* ASCII-encoded string */ 877 va_list vargs 878 ); 879 PyAPI_FUNC(PyObject *) PyUnicode_FromFormat( 880 const char *format, /* ASCII-encoded string */ 881 ... 882 ); 883 884 #ifndef Py_LIMITED_API 885 typedef struct { 886 PyObject *buffer; 887 void *data; 888 enum PyUnicode_Kind kind; 889 Py_UCS4 maxchar; 890 Py_ssize_t size; 891 Py_ssize_t pos; 892 893 /* minimum number of allocated characters (default: 0) */ 894 Py_ssize_t min_length; 895 896 /* minimum character (default: 127, ASCII) */ 897 Py_UCS4 min_char; 898 899 /* If non-zero, overallocate the buffer (default: 0). */ 900 unsigned char overallocate; 901 902 /* If readonly is 1, buffer is a shared string (cannot be modified) 903 and size is set to 0. */ 904 unsigned char readonly; 905 } _PyUnicodeWriter ; 906 907 /* Initialize a Unicode writer. 908 * 909 * By default, the minimum buffer size is 0 character and overallocation is 910 * disabled. Set min_length, min_char and overallocate attributes to control 911 * the allocation of the buffer. */ 912 PyAPI_FUNC(void) 913 _PyUnicodeWriter_Init(_PyUnicodeWriter *writer); 914 915 /* Prepare the buffer to write 'length' characters 916 with the specified maximum character. 917 918 Return 0 on success, raise an exception and return -1 on error. */ 919 #define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \ 920 (((MAXCHAR) <= (WRITER)->maxchar \ 921 && (LENGTH) <= (WRITER)->size - (WRITER)->pos) \ 922 ? 0 \ 923 : (((LENGTH) == 0) \ 924 ? 0 \ 925 : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR)))) 926 927 /* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro 928 instead. */ 929 PyAPI_FUNC(int) 930 _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, 931 Py_ssize_t length, Py_UCS4 maxchar); 932 933 /* Prepare the buffer to have at least the kind KIND. 934 For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will 935 support characters in range U+000-U+FFFF. 936 937 Return 0 on success, raise an exception and return -1 on error. */ 938 #define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \ 939 (assert((KIND) != PyUnicode_WCHAR_KIND), \ 940 (KIND) <= (WRITER)->kind \ 941 ? 0 \ 942 : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND))) 943 944 /* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind() 945 macro instead. */ 946 PyAPI_FUNC(int) 947 _PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer, 948 enum PyUnicode_Kind kind); 949 950 /* Append a Unicode character. 951 Return 0 on success, raise an exception and return -1 on error. */ 952 PyAPI_FUNC(int) 953 _PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, 954 Py_UCS4 ch 955 ); 956 957 /* Append a Unicode string. 958 Return 0 on success, raise an exception and return -1 on error. */ 959 PyAPI_FUNC(int) 960 _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, 961 PyObject *str /* Unicode string */ 962 ); 963 964 /* Append a substring of a Unicode string. 965 Return 0 on success, raise an exception and return -1 on error. */ 966 PyAPI_FUNC(int) 967 _PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, 968 PyObject *str, /* Unicode string */ 969 Py_ssize_t start, 970 Py_ssize_t end 971 ); 972 973 /* Append an ASCII-encoded byte string. 974 Return 0 on success, raise an exception and return -1 on error. */ 975 PyAPI_FUNC(int) 976 _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer, 977 const char *str, /* ASCII-encoded byte string */ 978 Py_ssize_t len /* number of bytes, or -1 if unknown */ 979 ); 980 981 /* Append a latin1-encoded byte string. 982 Return 0 on success, raise an exception and return -1 on error. */ 983 PyAPI_FUNC(int) 984 _PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer, 985 const char *str, /* latin1-encoded byte string */ 986 Py_ssize_t len /* length in bytes */ 987 ); 988 989 /* Get the value of the writer as a Unicode string. Clear the 990 buffer of the writer. Raise an exception and return NULL 991 on error. */ 992 PyAPI_FUNC(PyObject *) 993 _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer); 994 995 /* Deallocate memory of a writer (clear its internal buffer). */ 996 PyAPI_FUNC(void) 997 _PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer); 998 #endif 999 1000 #ifndef Py_LIMITED_API 1001 /* Format the object based on the format_spec, as defined in PEP 3101 1002 (Advanced String Formatting). */ 1003 PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter( 1004 _PyUnicodeWriter *writer, 1005 PyObject *obj, 1006 PyObject *format_spec, 1007 Py_ssize_t start, 1008 Py_ssize_t end); 1009 #endif 1010 1011 PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **); 1012 PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **); 1013 PyAPI_FUNC(PyObject *) PyUnicode_InternFromString( 1014 const char *u /* UTF-8 encoded string */ 1015 ); 1016 #ifndef Py_LIMITED_API 1017 PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void); 1018 #endif 1019 1020 /* Use only if you know it's a string */ 1021 #define PyUnicode_CHECK_INTERNED(op) \ 1022 (((PyASCIIObject *)(op))->state.interned) 1023 1024 /* --- wchar_t support for platforms which support it --------------------- */ 1025 1026 #ifdef HAVE_WCHAR_H 1027 1028 /* Create a Unicode Object from the wchar_t buffer w of the given 1029 size. 1030 1031 The buffer is copied into the new object. */ 1032 1033 PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar( 1034 const wchar_t *w, /* wchar_t buffer */ 1035 Py_ssize_t size /* size of buffer */ 1036 ); 1037 1038 /* Copies the Unicode Object contents into the wchar_t buffer w. At 1039 most size wchar_t characters are copied. 1040 1041 Note that the resulting wchar_t string may or may not be 1042 0-terminated. It is the responsibility of the caller to make sure 1043 that the wchar_t string is 0-terminated in case this is required by 1044 the application. 1045 1046 Returns the number of wchar_t characters copied (excluding a 1047 possibly trailing 0-termination character) or -1 in case of an 1048 error. */ 1049 1050 PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar( 1051 PyObject *unicode, /* Unicode object */ 1052 wchar_t *w, /* wchar_t buffer */ 1053 Py_ssize_t size /* size of buffer */ 1054 ); 1055 1056 /* Convert the Unicode object to a wide character string. The output string 1057 always ends with a nul character. If size is not NULL, write the number of 1058 wide characters (excluding the null character) into *size. 1059 1060 Returns a buffer allocated by PyMem_Malloc() (use PyMem_Free() to free it) 1061 on success. On error, returns NULL, *size is undefined and raises a 1062 MemoryError. */ 1063 1064 PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString( 1065 PyObject *unicode, /* Unicode object */ 1066 Py_ssize_t *size /* number of characters of the result */ 1067 ); 1068 1069 #ifndef Py_LIMITED_API 1070 /* Similar to PyUnicode_AsWideCharString(unicode, NULL), but check if 1071 the string contains null characters. */ 1072 PyAPI_FUNC(wchar_t*) _PyUnicode_AsWideCharString( 1073 PyObject *unicode /* Unicode object */ 1074 ); 1075 1076 PyAPI_FUNC(void*) _PyUnicode_AsKind(PyObject *s, unsigned int kind); 1077 #endif 1078 1079 #endif 1080 1081 /* --- Unicode ordinals --------------------------------------------------- */ 1082 1083 /* Create a Unicode Object from the given Unicode code point ordinal. 1084 1085 The ordinal must be in range(0x110000). A ValueError is 1086 raised in case it is not. 1087 1088 */ 1089 1090 PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal); 1091 1092 /* --- Free-list management ----------------------------------------------- */ 1093 1094 /* Clear the free list used by the Unicode implementation. 1095 1096 This can be used to release memory used for objects on the free 1097 list back to the Python memory allocator. 1098 1099 */ 1100 1101 PyAPI_FUNC(int) PyUnicode_ClearFreeList(void); 1102 1103 /* === Builtin Codecs ===================================================== 1104 1105 Many of these APIs take two arguments encoding and errors. These 1106 parameters encoding and errors have the same semantics as the ones 1107 of the builtin str() API. 1108 1109 Setting encoding to NULL causes the default encoding (UTF-8) to be used. 1110 1111 Error handling is set by errors which may also be set to NULL 1112 meaning to use the default handling defined for the codec. Default 1113 error handling for all builtin codecs is "strict" (ValueErrors are 1114 raised). 1115 1116 The codecs all use a similar interface. Only deviation from the 1117 generic ones are documented. 1118 1119 */ 1120 1121 /* --- Manage the default encoding ---------------------------------------- */ 1122 1123 /* Returns a pointer to the default encoding (UTF-8) of the 1124 Unicode object unicode and the size of the encoded representation 1125 in bytes stored in *size. 1126 1127 In case of an error, no *size is set. 1128 1129 This function caches the UTF-8 encoded string in the unicodeobject 1130 and subsequent calls will return the same string. The memory is released 1131 when the unicodeobject is deallocated. 1132 1133 _PyUnicode_AsStringAndSize is a #define for PyUnicode_AsUTF8AndSize to 1134 support the previous internal function with the same behaviour. 1135 1136 *** This API is for interpreter INTERNAL USE ONLY and will likely 1137 *** be removed or changed in the future. 1138 1139 *** If you need to access the Unicode object as UTF-8 bytes string, 1140 *** please use PyUnicode_AsUTF8String() instead. 1141 */ 1142 1143 #ifndef Py_LIMITED_API 1144 PyAPI_FUNC(char *) PyUnicode_AsUTF8AndSize( 1145 PyObject *unicode, 1146 Py_ssize_t *size); 1147 #define _PyUnicode_AsStringAndSize PyUnicode_AsUTF8AndSize 1148 #endif 1149 1150 /* Returns a pointer to the default encoding (UTF-8) of the 1151 Unicode object unicode. 1152 1153 Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation 1154 in the unicodeobject. 1155 1156 _PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to 1157 support the previous internal function with the same behaviour. 1158 1159 Use of this API is DEPRECATED since no size information can be 1160 extracted from the returned data. 1161 1162 *** This API is for interpreter INTERNAL USE ONLY and will likely 1163 *** be removed or changed for Python 3.1. 1164 1165 *** If you need to access the Unicode object as UTF-8 bytes string, 1166 *** please use PyUnicode_AsUTF8String() instead. 1167 1168 */ 1169 1170 #ifndef Py_LIMITED_API 1171 PyAPI_FUNC(char *) PyUnicode_AsUTF8(PyObject *unicode); 1172 #define _PyUnicode_AsString PyUnicode_AsUTF8 1173 #endif 1174 1175 /* Returns "utf-8". */ 1176 1177 PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void); 1178 1179 /* --- Generic Codecs ----------------------------------------------------- */ 1180 1181 /* Create a Unicode object by decoding the encoded string s of the 1182 given size. */ 1183 1184 PyAPI_FUNC(PyObject*) PyUnicode_Decode( 1185 const char *s, /* encoded string */ 1186 Py_ssize_t size, /* size of buffer */ 1187 const char *encoding, /* encoding */ 1188 const char *errors /* error handling */ 1189 ); 1190 1191 /* Decode a Unicode object unicode and return the result as Python 1192 object. 1193 1194 This API is DEPRECATED. The only supported standard encoding is rot13. 1195 Use PyCodec_Decode() to decode with rot13 and non-standard codecs 1196 that decode from str. */ 1197 1198 PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject( 1199 PyObject *unicode, /* Unicode object */ 1200 const char *encoding, /* encoding */ 1201 const char *errors /* error handling */ 1202 ) Py_DEPRECATED(3.6); 1203 1204 /* Decode a Unicode object unicode and return the result as Unicode 1205 object. 1206 1207 This API is DEPRECATED. The only supported standard encoding is rot13. 1208 Use PyCodec_Decode() to decode with rot13 and non-standard codecs 1209 that decode from str to str. */ 1210 1211 PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode( 1212 PyObject *unicode, /* Unicode object */ 1213 const char *encoding, /* encoding */ 1214 const char *errors /* error handling */ 1215 ) Py_DEPRECATED(3.6); 1216 1217 /* Encodes a Py_UNICODE buffer of the given size and returns a 1218 Python string object. */ 1219 1220 #ifndef Py_LIMITED_API 1221 PyAPI_FUNC(PyObject*) PyUnicode_Encode( 1222 const Py_UNICODE *s, /* Unicode char buffer */ 1223 Py_ssize_t size, /* number of Py_UNICODE chars to encode */ 1224 const char *encoding, /* encoding */ 1225 const char *errors /* error handling */ 1226 ); 1227 #endif 1228 1229 /* Encodes a Unicode object and returns the result as Python 1230 object. 1231 1232 This API is DEPRECATED. It is superceeded by PyUnicode_AsEncodedString() 1233 since all standard encodings (except rot13) encode str to bytes. 1234 Use PyCodec_Encode() for encoding with rot13 and non-standard codecs 1235 that encode form str to non-bytes. */ 1236 1237 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject( 1238 PyObject *unicode, /* Unicode object */ 1239 const char *encoding, /* encoding */ 1240 const char *errors /* error handling */ 1241 ) Py_DEPRECATED(3.6); 1242 1243 /* Encodes a Unicode object and returns the result as Python string 1244 object. */ 1245 1246 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString( 1247 PyObject *unicode, /* Unicode object */ 1248 const char *encoding, /* encoding */ 1249 const char *errors /* error handling */ 1250 ); 1251 1252 /* Encodes a Unicode object and returns the result as Unicode 1253 object. 1254 1255 This API is DEPRECATED. The only supported standard encodings is rot13. 1256 Use PyCodec_Encode() to encode with rot13 and non-standard codecs 1257 that encode from str to str. */ 1258 1259 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode( 1260 PyObject *unicode, /* Unicode object */ 1261 const char *encoding, /* encoding */ 1262 const char *errors /* error handling */ 1263 ) Py_DEPRECATED(3.6); 1264 1265 /* Build an encoding map. */ 1266 1267 PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap( 1268 PyObject* string /* 256 character map */ 1269 ); 1270 1271 /* --- UTF-7 Codecs ------------------------------------------------------- */ 1272 1273 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7( 1274 const char *string, /* UTF-7 encoded string */ 1275 Py_ssize_t length, /* size of string */ 1276 const char *errors /* error handling */ 1277 ); 1278 1279 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful( 1280 const char *string, /* UTF-7 encoded string */ 1281 Py_ssize_t length, /* size of string */ 1282 const char *errors, /* error handling */ 1283 Py_ssize_t *consumed /* bytes consumed */ 1284 ); 1285 1286 #ifndef Py_LIMITED_API 1287 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7( 1288 const Py_UNICODE *data, /* Unicode char buffer */ 1289 Py_ssize_t length, /* number of Py_UNICODE chars to encode */ 1290 int base64SetO, /* Encode RFC2152 Set O characters in base64 */ 1291 int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */ 1292 const char *errors /* error handling */ 1293 ); 1294 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7( 1295 PyObject *unicode, /* Unicode object */ 1296 int base64SetO, /* Encode RFC2152 Set O characters in base64 */ 1297 int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */ 1298 const char *errors /* error handling */ 1299 ); 1300 #endif 1301 1302 /* --- UTF-8 Codecs ------------------------------------------------------- */ 1303 1304 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8( 1305 const char *string, /* UTF-8 encoded string */ 1306 Py_ssize_t length, /* size of string */ 1307 const char *errors /* error handling */ 1308 ); 1309 1310 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful( 1311 const char *string, /* UTF-8 encoded string */ 1312 Py_ssize_t length, /* size of string */ 1313 const char *errors, /* error handling */ 1314 Py_ssize_t *consumed /* bytes consumed */ 1315 ); 1316 1317 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String( 1318 PyObject *unicode /* Unicode object */ 1319 ); 1320 1321 #ifndef Py_LIMITED_API 1322 PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String( 1323 PyObject *unicode, 1324 const char *errors); 1325 1326 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8( 1327 const Py_UNICODE *data, /* Unicode char buffer */ 1328 Py_ssize_t length, /* number of Py_UNICODE chars to encode */ 1329 const char *errors /* error handling */ 1330 ); 1331 #endif 1332 1333 /* --- UTF-32 Codecs ------------------------------------------------------ */ 1334 1335 /* Decodes length bytes from a UTF-32 encoded buffer string and returns 1336 the corresponding Unicode object. 1337 1338 errors (if non-NULL) defines the error handling. It defaults 1339 to "strict". 1340 1341 If byteorder is non-NULL, the decoder starts decoding using the 1342 given byte order: 1343 1344 *byteorder == -1: little endian 1345 *byteorder == 0: native order 1346 *byteorder == 1: big endian 1347 1348 In native mode, the first four bytes of the stream are checked for a 1349 BOM mark. If found, the BOM mark is analysed, the byte order 1350 adjusted and the BOM skipped. In the other modes, no BOM mark 1351 interpretation is done. After completion, *byteorder is set to the 1352 current byte order at the end of input data. 1353 1354 If byteorder is NULL, the codec starts in native order mode. 1355 1356 */ 1357 1358 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32( 1359 const char *string, /* UTF-32 encoded string */ 1360 Py_ssize_t length, /* size of string */ 1361 const char *errors, /* error handling */ 1362 int *byteorder /* pointer to byteorder to use 1363 0=native;-1=LE,1=BE; updated on 1364 exit */ 1365 ); 1366 1367 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful( 1368 const char *string, /* UTF-32 encoded string */ 1369 Py_ssize_t length, /* size of string */ 1370 const char *errors, /* error handling */ 1371 int *byteorder, /* pointer to byteorder to use 1372 0=native;-1=LE,1=BE; updated on 1373 exit */ 1374 Py_ssize_t *consumed /* bytes consumed */ 1375 ); 1376 1377 /* Returns a Python string using the UTF-32 encoding in native byte 1378 order. The string always starts with a BOM mark. */ 1379 1380 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String( 1381 PyObject *unicode /* Unicode object */ 1382 ); 1383 1384 /* Returns a Python string object holding the UTF-32 encoded value of 1385 the Unicode data. 1386 1387 If byteorder is not 0, output is written according to the following 1388 byte order: 1389 1390 byteorder == -1: little endian 1391 byteorder == 0: native byte order (writes a BOM mark) 1392 byteorder == 1: big endian 1393 1394 If byteorder is 0, the output string will always start with the 1395 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 1396 prepended. 1397 1398 */ 1399 1400 #ifndef Py_LIMITED_API 1401 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32( 1402 const Py_UNICODE *data, /* Unicode char buffer */ 1403 Py_ssize_t length, /* number of Py_UNICODE chars to encode */ 1404 const char *errors, /* error handling */ 1405 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ 1406 ); 1407 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32( 1408 PyObject *object, /* Unicode object */ 1409 const char *errors, /* error handling */ 1410 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ 1411 ); 1412 #endif 1413 1414 /* --- UTF-16 Codecs ------------------------------------------------------ */ 1415 1416 /* Decodes length bytes from a UTF-16 encoded buffer string and returns 1417 the corresponding Unicode object. 1418 1419 errors (if non-NULL) defines the error handling. It defaults 1420 to "strict". 1421 1422 If byteorder is non-NULL, the decoder starts decoding using the 1423 given byte order: 1424 1425 *byteorder == -1: little endian 1426 *byteorder == 0: native order 1427 *byteorder == 1: big endian 1428 1429 In native mode, the first two bytes of the stream are checked for a 1430 BOM mark. If found, the BOM mark is analysed, the byte order 1431 adjusted and the BOM skipped. In the other modes, no BOM mark 1432 interpretation is done. After completion, *byteorder is set to the 1433 current byte order at the end of input data. 1434 1435 If byteorder is NULL, the codec starts in native order mode. 1436 1437 */ 1438 1439 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16( 1440 const char *string, /* UTF-16 encoded string */ 1441 Py_ssize_t length, /* size of string */ 1442 const char *errors, /* error handling */ 1443 int *byteorder /* pointer to byteorder to use 1444 0=native;-1=LE,1=BE; updated on 1445 exit */ 1446 ); 1447 1448 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful( 1449 const char *string, /* UTF-16 encoded string */ 1450 Py_ssize_t length, /* size of string */ 1451 const char *errors, /* error handling */ 1452 int *byteorder, /* pointer to byteorder to use 1453 0=native;-1=LE,1=BE; updated on 1454 exit */ 1455 Py_ssize_t *consumed /* bytes consumed */ 1456 ); 1457 1458 /* Returns a Python string using the UTF-16 encoding in native byte 1459 order. The string always starts with a BOM mark. */ 1460 1461 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String( 1462 PyObject *unicode /* Unicode object */ 1463 ); 1464 1465 /* Returns a Python string object holding the UTF-16 encoded value of 1466 the Unicode data. 1467 1468 If byteorder is not 0, output is written according to the following 1469 byte order: 1470 1471 byteorder == -1: little endian 1472 byteorder == 0: native byte order (writes a BOM mark) 1473 byteorder == 1: big endian 1474 1475 If byteorder is 0, the output string will always start with the 1476 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is 1477 prepended. 1478 1479 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to 1480 UCS-2. This trick makes it possible to add full UTF-16 capabilities 1481 at a later point without compromising the APIs. 1482 1483 */ 1484 1485 #ifndef Py_LIMITED_API 1486 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16( 1487 const Py_UNICODE *data, /* Unicode char buffer */ 1488 Py_ssize_t length, /* number of Py_UNICODE chars to encode */ 1489 const char *errors, /* error handling */ 1490 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ 1491 ); 1492 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16( 1493 PyObject* unicode, /* Unicode object */ 1494 const char *errors, /* error handling */ 1495 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ 1496 ); 1497 #endif 1498 1499 /* --- Unicode-Escape Codecs ---------------------------------------------- */ 1500 1501 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape( 1502 const char *string, /* Unicode-Escape encoded string */ 1503 Py_ssize_t length, /* size of string */ 1504 const char *errors /* error handling */ 1505 ); 1506 1507 #ifndef Py_LIMITED_API 1508 /* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape 1509 chars. */ 1510 PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscape( 1511 const char *string, /* Unicode-Escape encoded string */ 1512 Py_ssize_t length, /* size of string */ 1513 const char *errors, /* error handling */ 1514 const char **first_invalid_escape /* on return, points to first 1515 invalid escaped char in 1516 string. */ 1517 ); 1518 #endif 1519 1520 PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString( 1521 PyObject *unicode /* Unicode object */ 1522 ); 1523 1524 #ifndef Py_LIMITED_API 1525 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape( 1526 const Py_UNICODE *data, /* Unicode char buffer */ 1527 Py_ssize_t length /* Number of Py_UNICODE chars to encode */ 1528 ); 1529 #endif 1530 1531 /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */ 1532 1533 PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape( 1534 const char *string, /* Raw-Unicode-Escape encoded string */ 1535 Py_ssize_t length, /* size of string */ 1536 const char *errors /* error handling */ 1537 ); 1538 1539 PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString( 1540 PyObject *unicode /* Unicode object */ 1541 ); 1542 1543 #ifndef Py_LIMITED_API 1544 PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape( 1545 const Py_UNICODE *data, /* Unicode char buffer */ 1546 Py_ssize_t length /* Number of Py_UNICODE chars to encode */ 1547 ); 1548 #endif 1549 1550 /* --- Unicode Internal Codec --------------------------------------------- 1551 1552 Only for internal use in _codecsmodule.c */ 1553 1554 #ifndef Py_LIMITED_API 1555 PyObject *_PyUnicode_DecodeUnicodeInternal( 1556 const char *string, 1557 Py_ssize_t length, 1558 const char *errors 1559 ); 1560 #endif 1561 1562 /* --- Latin-1 Codecs ----------------------------------------------------- 1563 1564 Note: Latin-1 corresponds to the first 256 Unicode ordinals. 1565 1566 */ 1567 1568 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1( 1569 const char *string, /* Latin-1 encoded string */ 1570 Py_ssize_t length, /* size of string */ 1571 const char *errors /* error handling */ 1572 ); 1573 1574 PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String( 1575 PyObject *unicode /* Unicode object */ 1576 ); 1577 1578 #ifndef Py_LIMITED_API 1579 PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String( 1580 PyObject* unicode, 1581 const char* errors); 1582 1583 PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1( 1584 const Py_UNICODE *data, /* Unicode char buffer */ 1585 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ 1586 const char *errors /* error handling */ 1587 ); 1588 #endif 1589 1590 /* --- ASCII Codecs ------------------------------------------------------- 1591 1592 Only 7-bit ASCII data is excepted. All other codes generate errors. 1593 1594 */ 1595 1596 PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII( 1597 const char *string, /* ASCII encoded string */ 1598 Py_ssize_t length, /* size of string */ 1599 const char *errors /* error handling */ 1600 ); 1601 1602 PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString( 1603 PyObject *unicode /* Unicode object */ 1604 ); 1605 1606 #ifndef Py_LIMITED_API 1607 PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString( 1608 PyObject* unicode, 1609 const char* errors); 1610 1611 PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII( 1612 const Py_UNICODE *data, /* Unicode char buffer */ 1613 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ 1614 const char *errors /* error handling */ 1615 ); 1616 #endif 1617 1618 /* --- Character Map Codecs ----------------------------------------------- 1619 1620 This codec uses mappings to encode and decode characters. 1621 1622 Decoding mappings must map byte ordinals (integers in the range from 0 to 1623 255) to Unicode strings, integers (which are then interpreted as Unicode 1624 ordinals) or None. Unmapped data bytes (ones which cause a LookupError) 1625 as well as mapped to None, 0xFFFE or '\ufffe' are treated as "undefined 1626 mapping" and cause an error. 1627 1628 Encoding mappings must map Unicode ordinal integers to bytes objects, 1629 integers in the range from 0 to 255 or None. Unmapped character 1630 ordinals (ones which cause a LookupError) as well as mapped to 1631 None are treated as "undefined mapping" and cause an error. 1632 1633 */ 1634 1635 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap( 1636 const char *string, /* Encoded string */ 1637 Py_ssize_t length, /* size of string */ 1638 PyObject *mapping, /* decoding mapping */ 1639 const char *errors /* error handling */ 1640 ); 1641 1642 PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString( 1643 PyObject *unicode, /* Unicode object */ 1644 PyObject *mapping /* encoding mapping */ 1645 ); 1646 1647 #ifndef Py_LIMITED_API 1648 PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap( 1649 const Py_UNICODE *data, /* Unicode char buffer */ 1650 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ 1651 PyObject *mapping, /* encoding mapping */ 1652 const char *errors /* error handling */ 1653 ); 1654 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap( 1655 PyObject *unicode, /* Unicode object */ 1656 PyObject *mapping, /* encoding mapping */ 1657 const char *errors /* error handling */ 1658 ); 1659 #endif 1660 1661 /* Translate a Py_UNICODE buffer of the given length by applying a 1662 character mapping table to it and return the resulting Unicode 1663 object. 1664 1665 The mapping table must map Unicode ordinal integers to Unicode strings, 1666 Unicode ordinal integers or None (causing deletion of the character). 1667 1668 Mapping tables may be dictionaries or sequences. Unmapped character 1669 ordinals (ones which cause a LookupError) are left untouched and 1670 are copied as-is. 1671 1672 */ 1673 1674 #ifndef Py_LIMITED_API 1675 PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap( 1676 const Py_UNICODE *data, /* Unicode char buffer */ 1677 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ 1678 PyObject *table, /* Translate table */ 1679 const char *errors /* error handling */ 1680 ); 1681 #endif 1682 1683 #ifdef MS_WINDOWS 1684 1685 /* --- MBCS codecs for Windows -------------------------------------------- */ 1686 1687 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS( 1688 const char *string, /* MBCS encoded string */ 1689 Py_ssize_t length, /* size of string */ 1690 const char *errors /* error handling */ 1691 ); 1692 1693 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful( 1694 const char *string, /* MBCS encoded string */ 1695 Py_ssize_t length, /* size of string */ 1696 const char *errors, /* error handling */ 1697 Py_ssize_t *consumed /* bytes consumed */ 1698 ); 1699 1700 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 1701 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCodePageStateful( 1702 int code_page, /* code page number */ 1703 const char *string, /* encoded string */ 1704 Py_ssize_t length, /* size of string */ 1705 const char *errors, /* error handling */ 1706 Py_ssize_t *consumed /* bytes consumed */ 1707 ); 1708 #endif 1709 1710 PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString( 1711 PyObject *unicode /* Unicode object */ 1712 ); 1713 1714 #ifndef Py_LIMITED_API 1715 PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS( 1716 const Py_UNICODE *data, /* Unicode char buffer */ 1717 Py_ssize_t length, /* number of Py_UNICODE chars to encode */ 1718 const char *errors /* error handling */ 1719 ); 1720 #endif 1721 1722 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 1723 PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage( 1724 int code_page, /* code page number */ 1725 PyObject *unicode, /* Unicode object */ 1726 const char *errors /* error handling */ 1727 ); 1728 #endif 1729 1730 #endif /* MS_WINDOWS */ 1731 1732 /* --- Decimal Encoder ---------------------------------------------------- */ 1733 1734 /* Takes a Unicode string holding a decimal value and writes it into 1735 an output buffer using standard ASCII digit codes. 1736 1737 The output buffer has to provide at least length+1 bytes of storage 1738 area. The output string is 0-terminated. 1739 1740 The encoder converts whitespace to ' ', decimal characters to their 1741 corresponding ASCII digit and all other Latin-1 characters except 1742 \0 as-is. Characters outside this range (Unicode ordinals 1-256) 1743 are treated as errors. This includes embedded NULL bytes. 1744 1745 Error handling is defined by the errors argument: 1746 1747 NULL or "strict": raise a ValueError 1748 "ignore": ignore the wrong characters (these are not copied to the 1749 output buffer) 1750 "replace": replaces illegal characters with '?' 1751 1752 Returns 0 on success, -1 on failure. 1753 1754 */ 1755 1756 #ifndef Py_LIMITED_API 1757 PyAPI_FUNC(int) PyUnicode_EncodeDecimal( 1758 Py_UNICODE *s, /* Unicode buffer */ 1759 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ 1760 char *output, /* Output buffer; must have size >= length */ 1761 const char *errors /* error handling */ 1762 ); 1763 #endif 1764 1765 /* Transforms code points that have decimal digit property to the 1766 corresponding ASCII digit code points. 1767 1768 Returns a new Unicode string on success, NULL on failure. 1769 */ 1770 1771 #ifndef Py_LIMITED_API 1772 PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII( 1773 Py_UNICODE *s, /* Unicode buffer */ 1774 Py_ssize_t length /* Number of Py_UNICODE chars to transform */ 1775 ); 1776 #endif 1777 1778 /* Similar to PyUnicode_TransformDecimalToASCII(), but takes a PyObject 1779 as argument instead of a raw buffer and length. This function additionally 1780 transforms spaces to ASCII because this is what the callers in longobject, 1781 floatobject, and complexobject did anyways. */ 1782 1783 #ifndef Py_LIMITED_API 1784 PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII( 1785 PyObject *unicode /* Unicode object */ 1786 ); 1787 #endif 1788 1789 /* --- Locale encoding --------------------------------------------------- */ 1790 1791 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 1792 /* Decode a string from the current locale encoding. The decoder is strict if 1793 *surrogateescape* is equal to zero, otherwise it uses the 'surrogateescape' 1794 error handler (PEP 383) to escape undecodable bytes. If a byte sequence can 1795 be decoded as a surrogate character and *surrogateescape* is not equal to 1796 zero, the byte sequence is escaped using the 'surrogateescape' error handler 1797 instead of being decoded. *str* must end with a null character but cannot 1798 contain embedded null characters. */ 1799 1800 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize( 1801 const char *str, 1802 Py_ssize_t len, 1803 const char *errors); 1804 1805 /* Similar to PyUnicode_DecodeLocaleAndSize(), but compute the string 1806 length using strlen(). */ 1807 1808 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale( 1809 const char *str, 1810 const char *errors); 1811 1812 /* Encode a Unicode object to the current locale encoding. The encoder is 1813 strict is *surrogateescape* is equal to zero, otherwise the 1814 "surrogateescape" error handler is used. Return a bytes object. The string 1815 cannot contain embedded null characters. */ 1816 1817 PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale( 1818 PyObject *unicode, 1819 const char *errors 1820 ); 1821 #endif 1822 1823 /* --- File system encoding ---------------------------------------------- */ 1824 1825 /* ParseTuple converter: encode str objects to bytes using 1826 PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */ 1827 1828 PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*); 1829 1830 /* ParseTuple converter: decode bytes objects to unicode using 1831 PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */ 1832 1833 PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*); 1834 1835 /* Decode a null-terminated string using Py_FileSystemDefaultEncoding 1836 and the "surrogateescape" error handler. 1837 1838 If Py_FileSystemDefaultEncoding is not set, fall back to the locale 1839 encoding. 1840 1841 Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known. 1842 */ 1843 1844 PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault( 1845 const char *s /* encoded string */ 1846 ); 1847 1848 /* Decode a string using Py_FileSystemDefaultEncoding 1849 and the "surrogateescape" error handler. 1850 1851 If Py_FileSystemDefaultEncoding is not set, fall back to the locale 1852 encoding. 1853 */ 1854 1855 PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize( 1856 const char *s, /* encoded string */ 1857 Py_ssize_t size /* size */ 1858 ); 1859 1860 /* Encode a Unicode object to Py_FileSystemDefaultEncoding with the 1861 "surrogateescape" error handler, and return bytes. 1862 1863 If Py_FileSystemDefaultEncoding is not set, fall back to the locale 1864 encoding. 1865 */ 1866 1867 PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault( 1868 PyObject *unicode 1869 ); 1870 1871 /* --- Methods & Slots ---------------------------------------------------- 1872 1873 These are capable of handling Unicode objects and strings on input 1874 (we refer to them as strings in the descriptions) and return 1875 Unicode objects or integers as appropriate. */ 1876 1877 /* Concat two strings giving a new Unicode string. */ 1878 1879 PyAPI_FUNC(PyObject*) PyUnicode_Concat( 1880 PyObject *left, /* Left string */ 1881 PyObject *right /* Right string */ 1882 ); 1883 1884 /* Concat two strings and put the result in *pleft 1885 (sets *pleft to NULL on error) */ 1886 1887 PyAPI_FUNC(void) PyUnicode_Append( 1888 PyObject **pleft, /* Pointer to left string */ 1889 PyObject *right /* Right string */ 1890 ); 1891 1892 /* Concat two strings, put the result in *pleft and drop the right object 1893 (sets *pleft to NULL on error) */ 1894 1895 PyAPI_FUNC(void) PyUnicode_AppendAndDel( 1896 PyObject **pleft, /* Pointer to left string */ 1897 PyObject *right /* Right string */ 1898 ); 1899 1900 /* Split a string giving a list of Unicode strings. 1901 1902 If sep is NULL, splitting will be done at all whitespace 1903 substrings. Otherwise, splits occur at the given separator. 1904 1905 At most maxsplit splits will be done. If negative, no limit is set. 1906 1907 Separators are not included in the resulting list. 1908 1909 */ 1910 1911 PyAPI_FUNC(PyObject*) PyUnicode_Split( 1912 PyObject *s, /* String to split */ 1913 PyObject *sep, /* String separator */ 1914 Py_ssize_t maxsplit /* Maxsplit count */ 1915 ); 1916 1917 /* Dito, but split at line breaks. 1918 1919 CRLF is considered to be one line break. Line breaks are not 1920 included in the resulting list. */ 1921 1922 PyAPI_FUNC(PyObject*) PyUnicode_Splitlines( 1923 PyObject *s, /* String to split */ 1924 int keepends /* If true, line end markers are included */ 1925 ); 1926 1927 /* Partition a string using a given separator. */ 1928 1929 PyAPI_FUNC(PyObject*) PyUnicode_Partition( 1930 PyObject *s, /* String to partition */ 1931 PyObject *sep /* String separator */ 1932 ); 1933 1934 /* Partition a string using a given separator, searching from the end of the 1935 string. */ 1936 1937 PyAPI_FUNC(PyObject*) PyUnicode_RPartition( 1938 PyObject *s, /* String to partition */ 1939 PyObject *sep /* String separator */ 1940 ); 1941 1942 /* Split a string giving a list of Unicode strings. 1943 1944 If sep is NULL, splitting will be done at all whitespace 1945 substrings. Otherwise, splits occur at the given separator. 1946 1947 At most maxsplit splits will be done. But unlike PyUnicode_Split 1948 PyUnicode_RSplit splits from the end of the string. If negative, 1949 no limit is set. 1950 1951 Separators are not included in the resulting list. 1952 1953 */ 1954 1955 PyAPI_FUNC(PyObject*) PyUnicode_RSplit( 1956 PyObject *s, /* String to split */ 1957 PyObject *sep, /* String separator */ 1958 Py_ssize_t maxsplit /* Maxsplit count */ 1959 ); 1960 1961 /* Translate a string by applying a character mapping table to it and 1962 return the resulting Unicode object. 1963 1964 The mapping table must map Unicode ordinal integers to Unicode strings, 1965 Unicode ordinal integers or None (causing deletion of the character). 1966 1967 Mapping tables may be dictionaries or sequences. Unmapped character 1968 ordinals (ones which cause a LookupError) are left untouched and 1969 are copied as-is. 1970 1971 */ 1972 1973 PyAPI_FUNC(PyObject *) PyUnicode_Translate( 1974 PyObject *str, /* String */ 1975 PyObject *table, /* Translate table */ 1976 const char *errors /* error handling */ 1977 ); 1978 1979 /* Join a sequence of strings using the given separator and return 1980 the resulting Unicode string. */ 1981 1982 PyAPI_FUNC(PyObject*) PyUnicode_Join( 1983 PyObject *separator, /* Separator string */ 1984 PyObject *seq /* Sequence object */ 1985 ); 1986 1987 #ifndef Py_LIMITED_API 1988 PyAPI_FUNC(PyObject *) _PyUnicode_JoinArray( 1989 PyObject *separator, 1990 PyObject **items, 1991 Py_ssize_t seqlen 1992 ); 1993 #endif /* Py_LIMITED_API */ 1994 1995 /* Return 1 if substr matches str[start:end] at the given tail end, 0 1996 otherwise. */ 1997 1998 PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch( 1999 PyObject *str, /* String */ 2000 PyObject *substr, /* Prefix or Suffix string */ 2001 Py_ssize_t start, /* Start index */ 2002 Py_ssize_t end, /* Stop index */ 2003 int direction /* Tail end: -1 prefix, +1 suffix */ 2004 ); 2005 2006 /* Return the first position of substr in str[start:end] using the 2007 given search direction or -1 if not found. -2 is returned in case 2008 an error occurred and an exception is set. */ 2009 2010 PyAPI_FUNC(Py_ssize_t) PyUnicode_Find( 2011 PyObject *str, /* String */ 2012 PyObject *substr, /* Substring to find */ 2013 Py_ssize_t start, /* Start index */ 2014 Py_ssize_t end, /* Stop index */ 2015 int direction /* Find direction: +1 forward, -1 backward */ 2016 ); 2017 2018 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 2019 /* Like PyUnicode_Find, but search for single character only. */ 2020 PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar( 2021 PyObject *str, 2022 Py_UCS4 ch, 2023 Py_ssize_t start, 2024 Py_ssize_t end, 2025 int direction 2026 ); 2027 #endif 2028 2029 /* Count the number of occurrences of substr in str[start:end]. */ 2030 2031 PyAPI_FUNC(Py_ssize_t) PyUnicode_Count( 2032 PyObject *str, /* String */ 2033 PyObject *substr, /* Substring to count */ 2034 Py_ssize_t start, /* Start index */ 2035 Py_ssize_t end /* Stop index */ 2036 ); 2037 2038 /* Replace at most maxcount occurrences of substr in str with replstr 2039 and return the resulting Unicode object. */ 2040 2041 PyAPI_FUNC(PyObject *) PyUnicode_Replace( 2042 PyObject *str, /* String */ 2043 PyObject *substr, /* Substring to find */ 2044 PyObject *replstr, /* Substring to replace */ 2045 Py_ssize_t maxcount /* Max. number of replacements to apply; 2046 -1 = all */ 2047 ); 2048 2049 /* Compare two strings and return -1, 0, 1 for less than, equal, 2050 greater than resp. 2051 Raise an exception and return -1 on error. */ 2052 2053 PyAPI_FUNC(int) PyUnicode_Compare( 2054 PyObject *left, /* Left string */ 2055 PyObject *right /* Right string */ 2056 ); 2057 2058 #ifndef Py_LIMITED_API 2059 /* Test whether a unicode is equal to ASCII identifier. Return 1 if true, 2060 0 otherwise. The right argument must be ASCII identifier. 2061 Any error occurs inside will be cleared before return. */ 2062 2063 PyAPI_FUNC(int) _PyUnicode_EqualToASCIIId( 2064 PyObject *left, /* Left string */ 2065 _Py_Identifier *right /* Right identifier */ 2066 ); 2067 #endif 2068 2069 /* Compare a Unicode object with C string and return -1, 0, 1 for less than, 2070 equal, and greater than, respectively. It is best to pass only 2071 ASCII-encoded strings, but the function interprets the input string as 2072 ISO-8859-1 if it contains non-ASCII characters. 2073 This function does not raise exceptions. */ 2074 2075 PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString( 2076 PyObject *left, 2077 const char *right /* ASCII-encoded string */ 2078 ); 2079 2080 #ifndef Py_LIMITED_API 2081 /* Test whether a unicode is equal to ASCII string. Return 1 if true, 2082 0 otherwise. The right argument must be ASCII-encoded string. 2083 Any error occurs inside will be cleared before return. */ 2084 2085 PyAPI_FUNC(int) _PyUnicode_EqualToASCIIString( 2086 PyObject *left, 2087 const char *right /* ASCII-encoded string */ 2088 ); 2089 #endif 2090 2091 /* Rich compare two strings and return one of the following: 2092 2093 - NULL in case an exception was raised 2094 - Py_True or Py_False for successful comparisons 2095 - Py_NotImplemented in case the type combination is unknown 2096 2097 Possible values for op: 2098 2099 Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE 2100 2101 */ 2102 2103 PyAPI_FUNC(PyObject *) PyUnicode_RichCompare( 2104 PyObject *left, /* Left string */ 2105 PyObject *right, /* Right string */ 2106 int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */ 2107 ); 2108 2109 /* Apply an argument tuple or dictionary to a format string and return 2110 the resulting Unicode string. */ 2111 2112 PyAPI_FUNC(PyObject *) PyUnicode_Format( 2113 PyObject *format, /* Format string */ 2114 PyObject *args /* Argument tuple or dictionary */ 2115 ); 2116 2117 /* Checks whether element is contained in container and return 1/0 2118 accordingly. 2119 2120 element has to coerce to a one element Unicode string. -1 is 2121 returned in case of an error. */ 2122 2123 PyAPI_FUNC(int) PyUnicode_Contains( 2124 PyObject *container, /* Container string */ 2125 PyObject *element /* Element string */ 2126 ); 2127 2128 /* Checks whether argument is a valid identifier. */ 2129 2130 PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s); 2131 2132 #ifndef Py_LIMITED_API 2133 /* Externally visible for str.strip(unicode) */ 2134 PyAPI_FUNC(PyObject *) _PyUnicode_XStrip( 2135 PyObject *self, 2136 int striptype, 2137 PyObject *sepobj 2138 ); 2139 #endif 2140 2141 /* Using explicit passed-in values, insert the thousands grouping 2142 into the string pointed to by buffer. For the argument descriptions, 2143 see Objects/stringlib/localeutil.h */ 2144 #ifndef Py_LIMITED_API 2145 PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping( 2146 _PyUnicodeWriter *writer, 2147 Py_ssize_t n_buffer, 2148 PyObject *digits, 2149 Py_ssize_t d_pos, 2150 Py_ssize_t n_digits, 2151 Py_ssize_t min_width, 2152 const char *grouping, 2153 PyObject *thousands_sep, 2154 Py_UCS4 *maxchar); 2155 #endif 2156 /* === Characters Type APIs =============================================== */ 2157 2158 /* Helper array used by Py_UNICODE_ISSPACE(). */ 2159 2160 #ifndef Py_LIMITED_API 2161 PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[]; 2162 2163 /* These should not be used directly. Use the Py_UNICODE_IS* and 2164 Py_UNICODE_TO* macros instead. 2165 2166 These APIs are implemented in Objects/unicodectype.c. 2167 2168 */ 2169 2170 PyAPI_FUNC(int) _PyUnicode_IsLowercase( 2171 Py_UCS4 ch /* Unicode character */ 2172 ); 2173 2174 PyAPI_FUNC(int) _PyUnicode_IsUppercase( 2175 Py_UCS4 ch /* Unicode character */ 2176 ); 2177 2178 PyAPI_FUNC(int) _PyUnicode_IsTitlecase( 2179 Py_UCS4 ch /* Unicode character */ 2180 ); 2181 2182 PyAPI_FUNC(int) _PyUnicode_IsXidStart( 2183 Py_UCS4 ch /* Unicode character */ 2184 ); 2185 2186 PyAPI_FUNC(int) _PyUnicode_IsXidContinue( 2187 Py_UCS4 ch /* Unicode character */ 2188 ); 2189 2190 PyAPI_FUNC(int) _PyUnicode_IsWhitespace( 2191 const Py_UCS4 ch /* Unicode character */ 2192 ); 2193 2194 PyAPI_FUNC(int) _PyUnicode_IsLinebreak( 2195 const Py_UCS4 ch /* Unicode character */ 2196 ); 2197 2198 PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase( 2199 Py_UCS4 ch /* Unicode character */ 2200 ); 2201 2202 PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase( 2203 Py_UCS4 ch /* Unicode character */ 2204 ); 2205 2206 PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase( 2207 Py_UCS4 ch /* Unicode character */ 2208 ); 2209 2210 PyAPI_FUNC(int) _PyUnicode_ToLowerFull( 2211 Py_UCS4 ch, /* Unicode character */ 2212 Py_UCS4 *res 2213 ); 2214 2215 PyAPI_FUNC(int) _PyUnicode_ToTitleFull( 2216 Py_UCS4 ch, /* Unicode character */ 2217 Py_UCS4 *res 2218 ); 2219 2220 PyAPI_FUNC(int) _PyUnicode_ToUpperFull( 2221 Py_UCS4 ch, /* Unicode character */ 2222 Py_UCS4 *res 2223 ); 2224 2225 PyAPI_FUNC(int) _PyUnicode_ToFoldedFull( 2226 Py_UCS4 ch, /* Unicode character */ 2227 Py_UCS4 *res 2228 ); 2229 2230 PyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable( 2231 Py_UCS4 ch /* Unicode character */ 2232 ); 2233 2234 PyAPI_FUNC(int) _PyUnicode_IsCased( 2235 Py_UCS4 ch /* Unicode character */ 2236 ); 2237 2238 PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit( 2239 Py_UCS4 ch /* Unicode character */ 2240 ); 2241 2242 PyAPI_FUNC(int) _PyUnicode_ToDigit( 2243 Py_UCS4 ch /* Unicode character */ 2244 ); 2245 2246 PyAPI_FUNC(double) _PyUnicode_ToNumeric( 2247 Py_UCS4 ch /* Unicode character */ 2248 ); 2249 2250 PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit( 2251 Py_UCS4 ch /* Unicode character */ 2252 ); 2253 2254 PyAPI_FUNC(int) _PyUnicode_IsDigit( 2255 Py_UCS4 ch /* Unicode character */ 2256 ); 2257 2258 PyAPI_FUNC(int) _PyUnicode_IsNumeric( 2259 Py_UCS4 ch /* Unicode character */ 2260 ); 2261 2262 PyAPI_FUNC(int) _PyUnicode_IsPrintable( 2263 Py_UCS4 ch /* Unicode character */ 2264 ); 2265 2266 PyAPI_FUNC(int) _PyUnicode_IsAlpha( 2267 Py_UCS4 ch /* Unicode character */ 2268 ); 2269 2270 PyAPI_FUNC(size_t) Py_UNICODE_strlen( 2271 const Py_UNICODE *u 2272 ); 2273 2274 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy( 2275 Py_UNICODE *s1, 2276 const Py_UNICODE *s2); 2277 2278 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat( 2279 Py_UNICODE *s1, const Py_UNICODE *s2); 2280 2281 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy( 2282 Py_UNICODE *s1, 2283 const Py_UNICODE *s2, 2284 size_t n); 2285 2286 PyAPI_FUNC(int) Py_UNICODE_strcmp( 2287 const Py_UNICODE *s1, 2288 const Py_UNICODE *s2 2289 ); 2290 2291 PyAPI_FUNC(int) Py_UNICODE_strncmp( 2292 const Py_UNICODE *s1, 2293 const Py_UNICODE *s2, 2294 size_t n 2295 ); 2296 2297 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr( 2298 const Py_UNICODE *s, 2299 Py_UNICODE c 2300 ); 2301 2302 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr( 2303 const Py_UNICODE *s, 2304 Py_UNICODE c 2305 ); 2306 2307 PyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject *, int, int, int); 2308 2309 /* Create a copy of a unicode string ending with a nul character. Return NULL 2310 and raise a MemoryError exception on memory allocation failure, otherwise 2311 return a new allocated buffer (use PyMem_Free() to free the buffer). */ 2312 2313 PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy( 2314 PyObject *unicode 2315 ); 2316 #endif /* Py_LIMITED_API */ 2317 2318 #if defined(Py_DEBUG) && !defined(Py_LIMITED_API) 2319 PyAPI_FUNC(int) _PyUnicode_CheckConsistency( 2320 PyObject *op, 2321 int check_content); 2322 #elif !defined(NDEBUG) 2323 /* For asserts that call _PyUnicode_CheckConsistency(), which would 2324 * otherwise be a problem when building with asserts but without Py_DEBUG. */ 2325 #define _PyUnicode_CheckConsistency(op, check_content) PyUnicode_Check(op) 2326 #endif 2327 2328 #ifndef Py_LIMITED_API 2329 /* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/ 2330 PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*); 2331 /* Clear all static strings. */ 2332 PyAPI_FUNC(void) _PyUnicode_ClearStaticStrings(void); 2333 2334 /* Fast equality check when the inputs are known to be exact unicode types 2335 and where the hash values are equal (i.e. a very probable match) */ 2336 PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *); 2337 #endif /* !Py_LIMITED_API */ 2338 2339 #ifdef __cplusplus 2340 } 2341 #endif 2342 #endif /* !Py_UNICODEOBJECT_H */ 2343