1 #ifndef Py_UNICODEOBJECT_H
2 #define Py_UNICODEOBJECT_H
3 
4 #include <stdarg.h>
5 
6 /*
7 
8 Unicode implementation based on original code by Fredrik Lundh,
9 modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
10 Unicode Integration Proposal. (See
11 http://www.egenix.com/files/python/unicode-proposal.txt).
12 
13 Copyright (c) Corporation for National Research Initiatives.
14 
15 
16  Original header:
17  --------------------------------------------------------------------
18 
19  * Yet another Unicode string type for Python.  This type supports the
20  * 16-bit Basic Multilingual Plane (BMP) only.
21  *
22  * Written by Fredrik Lundh, January 1999.
23  *
24  * Copyright (c) 1999 by Secret Labs AB.
25  * Copyright (c) 1999 by Fredrik Lundh.
26  *
27  * fredrik@pythonware.com
28  * http://www.pythonware.com
29  *
30  * --------------------------------------------------------------------
31  * This Unicode String Type is
32  *
33  * Copyright (c) 1999 by Secret Labs AB
34  * Copyright (c) 1999 by Fredrik Lundh
35  *
36  * By obtaining, using, and/or copying this software and/or its
37  * associated documentation, you agree that you have read, understood,
38  * and will comply with the following terms and conditions:
39  *
40  * Permission to use, copy, modify, and distribute this software and its
41  * associated documentation for any purpose and without fee is hereby
42  * granted, provided that the above copyright notice appears in all
43  * copies, and that both that copyright notice and this permission notice
44  * appear in supporting documentation, and that the name of Secret Labs
45  * AB or the author not be used in advertising or publicity pertaining to
46  * distribution of the software without specific, written prior
47  * permission.
48  *
49  * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
50  * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
51  * FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
52  * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
53  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
54  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
55  * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
56  * -------------------------------------------------------------------- */
57 
58 #include <ctype.h>
59 
60 /* === Internal API ======================================================= */
61 
62 /* --- Internal Unicode Format -------------------------------------------- */
63 
64 /* Python 3.x requires unicode */
65 #define Py_USING_UNICODE
66 
67 #ifndef SIZEOF_WCHAR_T
68 #error Must define SIZEOF_WCHAR_T
69 #endif
70 
71 #define Py_UNICODE_SIZE SIZEOF_WCHAR_T
72 
73 /* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE.
74    Otherwise, Unicode strings are stored as UCS-2 (with limited support
75    for UTF-16) */
76 
77 #if Py_UNICODE_SIZE >= 4
78 #define Py_UNICODE_WIDE
79 #endif
80 
81 /* Set these flags if the platform has "wchar.h" and the
82    wchar_t type is a 16-bit unsigned type */
83 /* #define HAVE_WCHAR_H */
84 /* #define HAVE_USABLE_WCHAR_T */
85 
86 /* Py_UNICODE was the native Unicode storage format (code unit) used by
87    Python and represents a single Unicode element in the Unicode type.
88    With PEP 393, Py_UNICODE is deprecated and replaced with a
89    typedef to wchar_t. */
90 
91 #ifndef Py_LIMITED_API
92 #define PY_UNICODE_TYPE wchar_t
93 typedef wchar_t Py_UNICODE;
94 #endif
95 
96 /* If the compiler provides a wchar_t type we try to support it
97    through the interface functions PyUnicode_FromWideChar(),
98    PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */
99 
100 #ifdef HAVE_USABLE_WCHAR_T
101 # ifndef HAVE_WCHAR_H
102 #  define HAVE_WCHAR_H
103 # endif
104 #endif
105 
106 #ifdef HAVE_WCHAR_H
107 /* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
108 # ifdef _HAVE_BSDI
109 #  include <time.h>
110 # endif
111 #  include <wchar.h>
112 #endif
113 
114 /* Py_UCS4 and Py_UCS2 are typedefs for the respective
115    unicode representations. */
116 typedef uint32_t Py_UCS4;
117 typedef uint16_t Py_UCS2;
118 typedef uint8_t Py_UCS1;
119 
120 /* --- Internal Unicode Operations ---------------------------------------- */
121 
122 /* Since splitting on whitespace is an important use case, and
123    whitespace in most situations is solely ASCII whitespace, we
124    optimize for the common case by using a quick look-up table
125    _Py_ascii_whitespace (see below) with an inlined check.
126 
127  */
128 #ifndef Py_LIMITED_API
129 #define Py_UNICODE_ISSPACE(ch) \
130     ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
131 
132 #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
133 #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
134 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
135 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
136 
137 #define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
138 #define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
139 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
140 
141 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
142 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
143 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
144 #define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
145 
146 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
147 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
148 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
149 
150 #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
151 
152 #define Py_UNICODE_ISALNUM(ch) \
153        (Py_UNICODE_ISALPHA(ch) || \
154     Py_UNICODE_ISDECIMAL(ch) || \
155     Py_UNICODE_ISDIGIT(ch) || \
156     Py_UNICODE_ISNUMERIC(ch))
157 
158 #define Py_UNICODE_COPY(target, source, length) \
159     memcpy((target), (source), (length)*sizeof(Py_UNICODE))
160 
161 #define Py_UNICODE_FILL(target, value, length) \
162     do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
163         for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
164     } while (0)
165 
166 /* macros to work with surrogates */
167 #define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF)
168 #define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDBFF)
169 #define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= (ch) && (ch) <= 0xDFFF)
170 /* Join two surrogate characters and return a single Py_UCS4 value. */
171 #define Py_UNICODE_JOIN_SURROGATES(high, low)  \
172     (((((Py_UCS4)(high) & 0x03FF) << 10) |      \
173       ((Py_UCS4)(low) & 0x03FF)) + 0x10000)
174 /* high surrogate = top 10 bits added to D800 */
175 #define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10))
176 /* low surrogate = bottom 10 bits added to DC00 */
177 #define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF))
178 
179 /* Check if substring matches at given offset.  The offset must be
180    valid, and the substring must not be empty. */
181 
182 #define Py_UNICODE_MATCH(string, offset, substring) \
183     ((*((string)->wstr + (offset)) == *((substring)->wstr)) && \
184      ((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \
185      !memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE)))
186 
187 #endif /* Py_LIMITED_API */
188 
189 #ifdef __cplusplus
190 extern "C" {
191 #endif
192 
193 /* --- Unicode Type ------------------------------------------------------- */
194 
195 #ifndef Py_LIMITED_API
196 
197 /* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
198    structure. state.ascii and state.compact are set, and the data
199    immediately follow the structure. utf8_length and wstr_length can be found
200    in the length field; the utf8 pointer is equal to the data pointer. */
201 typedef struct {
202     /* There are 4 forms of Unicode strings:
203 
204        - compact ascii:
205 
206          * structure = PyASCIIObject
207          * test: PyUnicode_IS_COMPACT_ASCII(op)
208          * kind = PyUnicode_1BYTE_KIND
209          * compact = 1
210          * ascii = 1
211          * ready = 1
212          * (length is the length of the utf8 and wstr strings)
213          * (data starts just after the structure)
214          * (since ASCII is decoded from UTF-8, the utf8 string are the data)
215 
216        - compact:
217 
218          * structure = PyCompactUnicodeObject
219          * test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op)
220          * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
221            PyUnicode_4BYTE_KIND
222          * compact = 1
223          * ready = 1
224          * ascii = 0
225          * utf8 is not shared with data
226          * utf8_length = 0 if utf8 is NULL
227          * wstr is shared with data and wstr_length=length
228            if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
229            or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4
230          * wstr_length = 0 if wstr is NULL
231          * (data starts just after the structure)
232 
233        - legacy string, not ready:
234 
235          * structure = PyUnicodeObject
236          * test: kind == PyUnicode_WCHAR_KIND
237          * length = 0 (use wstr_length)
238          * hash = -1
239          * kind = PyUnicode_WCHAR_KIND
240          * compact = 0
241          * ascii = 0
242          * ready = 0
243          * interned = SSTATE_NOT_INTERNED
244          * wstr is not NULL
245          * data.any is NULL
246          * utf8 is NULL
247          * utf8_length = 0
248 
249        - legacy string, ready:
250 
251          * structure = PyUnicodeObject structure
252          * test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND
253          * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
254            PyUnicode_4BYTE_KIND
255          * compact = 0
256          * ready = 1
257          * data.any is not NULL
258          * utf8 is shared and utf8_length = length with data.any if ascii = 1
259          * utf8_length = 0 if utf8 is NULL
260          * wstr is shared with data.any and wstr_length = length
261            if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
262            or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
263          * wstr_length = 0 if wstr is NULL
264 
265        Compact strings use only one memory block (structure + characters),
266        whereas legacy strings use one block for the structure and one block
267        for characters.
268 
269        Legacy strings are created by PyUnicode_FromUnicode() and
270        PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
271        when PyUnicode_READY() is called.
272 
273        See also _PyUnicode_CheckConsistency().
274     */
275     PyObject_HEAD
276     Py_ssize_t length;          /* Number of code points in the string */
277     Py_hash_t hash;             /* Hash value; -1 if not set */
278     struct {
279         /*
280            SSTATE_NOT_INTERNED (0)
281            SSTATE_INTERNED_MORTAL (1)
282            SSTATE_INTERNED_IMMORTAL (2)
283 
284            If interned != SSTATE_NOT_INTERNED, the two references from the
285            dictionary to this object are *not* counted in ob_refcnt.
286          */
287         unsigned int interned:2;
288         /* Character size:
289 
290            - PyUnicode_WCHAR_KIND (0):
291 
292              * character type = wchar_t (16 or 32 bits, depending on the
293                platform)
294 
295            - PyUnicode_1BYTE_KIND (1):
296 
297              * character type = Py_UCS1 (8 bits, unsigned)
298              * all characters are in the range U+0000-U+00FF (latin1)
299              * if ascii is set, all characters are in the range U+0000-U+007F
300                (ASCII), otherwise at least one character is in the range
301                U+0080-U+00FF
302 
303            - PyUnicode_2BYTE_KIND (2):
304 
305              * character type = Py_UCS2 (16 bits, unsigned)
306              * all characters are in the range U+0000-U+FFFF (BMP)
307              * at least one character is in the range U+0100-U+FFFF
308 
309            - PyUnicode_4BYTE_KIND (4):
310 
311              * character type = Py_UCS4 (32 bits, unsigned)
312              * all characters are in the range U+0000-U+10FFFF
313              * at least one character is in the range U+10000-U+10FFFF
314          */
315         unsigned int kind:3;
316         /* Compact is with respect to the allocation scheme. Compact unicode
317            objects only require one memory block while non-compact objects use
318            one block for the PyUnicodeObject struct and another for its data
319            buffer. */
320         unsigned int compact:1;
321         /* The string only contains characters in the range U+0000-U+007F (ASCII)
322            and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
323            set, use the PyASCIIObject structure. */
324         unsigned int ascii:1;
325         /* The ready flag indicates whether the object layout is initialized
326            completely. This means that this is either a compact object, or
327            the data pointer is filled out. The bit is redundant, and helps
328            to minimize the test in PyUnicode_IS_READY(). */
329         unsigned int ready:1;
330         /* Padding to ensure that PyUnicode_DATA() is always aligned to
331            4 bytes (see issue #19537 on m68k). */
332         unsigned int :24;
333     } state;
334     wchar_t *wstr;              /* wchar_t representation (null-terminated) */
335 } PyASCIIObject;
336 
337 /* Non-ASCII strings allocated through PyUnicode_New use the
338    PyCompactUnicodeObject structure. state.compact is set, and the data
339    immediately follow the structure. */
340 typedef struct {
341     PyASCIIObject _base;
342     Py_ssize_t utf8_length;     /* Number of bytes in utf8, excluding the
343                                  * terminating \0. */
344     char *utf8;                 /* UTF-8 representation (null-terminated) */
345     Py_ssize_t wstr_length;     /* Number of code points in wstr, possible
346                                  * surrogates count as two code points. */
347 } PyCompactUnicodeObject;
348 
349 /* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
350    PyUnicodeObject structure. The actual string data is initially in the wstr
351    block, and copied into the data block using _PyUnicode_Ready. */
352 typedef struct {
353     PyCompactUnicodeObject _base;
354     union {
355         void *any;
356         Py_UCS1 *latin1;
357         Py_UCS2 *ucs2;
358         Py_UCS4 *ucs4;
359     } data;                     /* Canonical, smallest-form Unicode buffer */
360 } PyUnicodeObject;
361 #endif
362 
363 PyAPI_DATA(PyTypeObject) PyUnicode_Type;
364 PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
365 
366 #define PyUnicode_Check(op) \
367                  PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
368 #define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)
369 
370 /* Fast access macros */
371 #ifndef Py_LIMITED_API
372 
373 #define PyUnicode_WSTR_LENGTH(op) \
374     (PyUnicode_IS_COMPACT_ASCII(op) ?                  \
375      ((PyASCIIObject*)op)->length :                    \
376      ((PyCompactUnicodeObject*)op)->wstr_length)
377 
378 /* Returns the deprecated Py_UNICODE representation's size in code units
379    (this includes surrogate pairs as 2 units).
380    If the Py_UNICODE representation is not available, it will be computed
381    on request.  Use PyUnicode_GET_LENGTH() for the length in code points. */
382 
383 #define PyUnicode_GET_SIZE(op)                       \
384     (assert(PyUnicode_Check(op)),                    \
385      (((PyASCIIObject *)(op))->wstr) ?               \
386       PyUnicode_WSTR_LENGTH(op) :                    \
387       ((void)PyUnicode_AsUnicode((PyObject *)(op)),  \
388        assert(((PyASCIIObject *)(op))->wstr),        \
389        PyUnicode_WSTR_LENGTH(op)))
390 
391 #define PyUnicode_GET_DATA_SIZE(op) \
392     (PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE)
393 
394 /* Alias for PyUnicode_AsUnicode().  This will create a wchar_t/Py_UNICODE
395    representation on demand.  Using this macro is very inefficient now,
396    try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
397    use PyUnicode_WRITE() and PyUnicode_READ(). */
398 
399 #define PyUnicode_AS_UNICODE(op) \
400     (assert(PyUnicode_Check(op)), \
401      (((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \
402       PyUnicode_AsUnicode((PyObject *)(op)))
403 
404 #define PyUnicode_AS_DATA(op) \
405     ((const char *)(PyUnicode_AS_UNICODE(op)))
406 
407 
408 /* --- Flexible String Representation Helper Macros (PEP 393) -------------- */
409 
410 /* Values for PyASCIIObject.state: */
411 
412 /* Interning state. */
413 #define SSTATE_NOT_INTERNED 0
414 #define SSTATE_INTERNED_MORTAL 1
415 #define SSTATE_INTERNED_IMMORTAL 2
416 
417 /* Return true if the string contains only ASCII characters, or 0 if not. The
418    string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be
419    ready. */
420 #define PyUnicode_IS_ASCII(op)                   \
421     (assert(PyUnicode_Check(op)),                \
422      assert(PyUnicode_IS_READY(op)),             \
423      ((PyASCIIObject*)op)->state.ascii)
424 
425 /* Return true if the string is compact or 0 if not.
426    No type checks or Ready calls are performed. */
427 #define PyUnicode_IS_COMPACT(op) \
428     (((PyASCIIObject*)(op))->state.compact)
429 
430 /* Return true if the string is a compact ASCII string (use PyASCIIObject
431    structure), or 0 if not.  No type checks or Ready calls are performed. */
432 #define PyUnicode_IS_COMPACT_ASCII(op)                 \
433     (((PyASCIIObject*)op)->state.ascii && PyUnicode_IS_COMPACT(op))
434 
435 enum PyUnicode_Kind {
436 /* String contains only wstr byte characters.  This is only possible
437    when the string was created with a legacy API and _PyUnicode_Ready()
438    has not been called yet.  */
439     PyUnicode_WCHAR_KIND = 0,
440 /* Return values of the PyUnicode_KIND() macro: */
441     PyUnicode_1BYTE_KIND = 1,
442     PyUnicode_2BYTE_KIND = 2,
443     PyUnicode_4BYTE_KIND = 4
444 };
445 
446 /* Return pointers to the canonical representation cast to unsigned char,
447    Py_UCS2, or Py_UCS4 for direct character access.
448    No checks are performed, use PyUnicode_KIND() before to ensure
449    these will work correctly. */
450 
451 #define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op))
452 #define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op))
453 #define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op))
454 
455 /* Return one of the PyUnicode_*_KIND values defined above. */
456 #define PyUnicode_KIND(op) \
457     (assert(PyUnicode_Check(op)), \
458      assert(PyUnicode_IS_READY(op)),            \
459      ((PyASCIIObject *)(op))->state.kind)
460 
461 /* Return a void pointer to the raw unicode buffer. */
462 #define _PyUnicode_COMPACT_DATA(op)                     \
463     (PyUnicode_IS_ASCII(op) ?                   \
464      ((void*)((PyASCIIObject*)(op) + 1)) :              \
465      ((void*)((PyCompactUnicodeObject*)(op) + 1)))
466 
467 #define _PyUnicode_NONCOMPACT_DATA(op)                  \
468     (assert(((PyUnicodeObject*)(op))->data.any),        \
469      ((((PyUnicodeObject *)(op))->data.any)))
470 
471 #define PyUnicode_DATA(op) \
472     (assert(PyUnicode_Check(op)), \
473      PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) :   \
474      _PyUnicode_NONCOMPACT_DATA(op))
475 
476 /* In the access macros below, "kind" may be evaluated more than once.
477    All other macro parameters are evaluated exactly once, so it is safe
478    to put side effects into them (such as increasing the index). */
479 
480 /* Write into the canonical representation, this macro does not do any sanity
481    checks and is intended for usage in loops.  The caller should cache the
482    kind and data pointers obtained from other macro calls.
483    index is the index in the string (starts at 0) and value is the new
484    code point value which should be written to that location. */
485 #define PyUnicode_WRITE(kind, data, index, value) \
486     do { \
487         switch ((kind)) { \
488         case PyUnicode_1BYTE_KIND: { \
489             ((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \
490             break; \
491         } \
492         case PyUnicode_2BYTE_KIND: { \
493             ((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \
494             break; \
495         } \
496         default: { \
497             assert((kind) == PyUnicode_4BYTE_KIND); \
498             ((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \
499         } \
500         } \
501     } while (0)
502 
503 /* Read a code point from the string's canonical representation.  No checks
504    or ready calls are performed. */
505 #define PyUnicode_READ(kind, data, index) \
506     ((Py_UCS4) \
507     ((kind) == PyUnicode_1BYTE_KIND ? \
508         ((const Py_UCS1 *)(data))[(index)] : \
509         ((kind) == PyUnicode_2BYTE_KIND ? \
510             ((const Py_UCS2 *)(data))[(index)] : \
511             ((const Py_UCS4 *)(data))[(index)] \
512         ) \
513     ))
514 
515 /* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it
516    calls PyUnicode_KIND() and might call it twice.  For single reads, use
517    PyUnicode_READ_CHAR, for multiple consecutive reads callers should
518    cache kind and use PyUnicode_READ instead. */
519 #define PyUnicode_READ_CHAR(unicode, index) \
520     (assert(PyUnicode_Check(unicode)),          \
521      assert(PyUnicode_IS_READY(unicode)),       \
522      (Py_UCS4)                                  \
523         (PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \
524             ((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \
525             (PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \
526                 ((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \
527                 ((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \
528             ) \
529         ))
530 
531 /* Returns the length of the unicode string. The caller has to make sure that
532    the string has it's canonical representation set before calling
533    this macro.  Call PyUnicode_(FAST_)Ready to ensure that. */
534 #define PyUnicode_GET_LENGTH(op)                \
535     (assert(PyUnicode_Check(op)),               \
536      assert(PyUnicode_IS_READY(op)),            \
537      ((PyASCIIObject *)(op))->length)
538 
539 
540 /* Fast check to determine whether an object is ready. Equivalent to
541    PyUnicode_IS_COMPACT(op) || ((PyUnicodeObject*)(op))->data.any) */
542 
543 #define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready)
544 
545 /* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best
546    case.  If the canonical representation is not yet set, it will still call
547    _PyUnicode_Ready().
548    Returns 0 on success and -1 on errors. */
549 #define PyUnicode_READY(op)                        \
550     (assert(PyUnicode_Check(op)),                       \
551      (PyUnicode_IS_READY(op) ?                          \
552       0 : _PyUnicode_Ready((PyObject *)(op))))
553 
554 /* Return a maximum character value which is suitable for creating another
555    string based on op.  This is always an approximation but more efficient
556    than iterating over the string. */
557 #define PyUnicode_MAX_CHAR_VALUE(op) \
558     (assert(PyUnicode_IS_READY(op)),                                    \
559      (PyUnicode_IS_ASCII(op) ?                                          \
560       (0x7f) :                                                          \
561       (PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ?                     \
562        (0xffU) :                                                        \
563        (PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ?                    \
564         (0xffffU) :                                                     \
565         (0x10ffffU)))))
566 
567 #endif
568 
569 /* --- Constants ---------------------------------------------------------- */
570 
571 /* This Unicode character will be used as replacement character during
572    decoding if the errors argument is set to "replace". Note: the
573    Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
574    Unicode 3.0. */
575 
576 #define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD)
577 
578 /* === Public API ========================================================= */
579 
580 /* --- Plain Py_UNICODE --------------------------------------------------- */
581 
582 /* With PEP 393, this is the recommended way to allocate a new unicode object.
583    This function will allocate the object and its buffer in a single memory
584    block.  Objects created using this function are not resizable. */
585 #ifndef Py_LIMITED_API
586 PyAPI_FUNC(PyObject*) PyUnicode_New(
587     Py_ssize_t size,            /* Number of code points in the new string */
588     Py_UCS4 maxchar             /* maximum code point value in the string */
589     );
590 #endif
591 
592 /* Initializes the canonical string representation from the deprecated
593    wstr/Py_UNICODE representation. This function is used to convert Unicode
594    objects which were created using the old API to the new flexible format
595    introduced with PEP 393.
596 
597    Don't call this function directly, use the public PyUnicode_READY() macro
598    instead. */
599 #ifndef Py_LIMITED_API
600 PyAPI_FUNC(int) _PyUnicode_Ready(
601     PyObject *unicode           /* Unicode object */
602     );
603 #endif
604 
605 /* Get a copy of a Unicode string. */
606 #ifndef Py_LIMITED_API
607 PyAPI_FUNC(PyObject*) _PyUnicode_Copy(
608     PyObject *unicode
609     );
610 #endif
611 
612 /* Copy character from one unicode object into another, this function performs
613    character conversion when necessary and falls back to memcpy() if possible.
614 
615    Fail if to is too small (smaller than *how_many* or smaller than
616    len(from)-from_start), or if kind(from[from_start:from_start+how_many]) >
617    kind(to), or if *to* has more than 1 reference.
618 
619    Return the number of written character, or return -1 and raise an exception
620    on error.
621 
622    Pseudo-code:
623 
624        how_many = min(how_many, len(from) - from_start)
625        to[to_start:to_start+how_many] = from[from_start:from_start+how_many]
626        return how_many
627 
628    Note: The function doesn't write a terminating null character.
629    */
630 #ifndef Py_LIMITED_API
631 PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
632     PyObject *to,
633     Py_ssize_t to_start,
634     PyObject *from,
635     Py_ssize_t from_start,
636     Py_ssize_t how_many
637     );
638 
639 /* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so
640    may crash if parameters are invalid (e.g. if the output string
641    is too short). */
642 PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters(
643     PyObject *to,
644     Py_ssize_t to_start,
645     PyObject *from,
646     Py_ssize_t from_start,
647     Py_ssize_t how_many
648     );
649 #endif
650 
651 #ifndef Py_LIMITED_API
652 /* Fill a string with a character: write fill_char into
653    unicode[start:start+length].
654 
655    Fail if fill_char is bigger than the string maximum character, or if the
656    string has more than 1 reference.
657 
658    Return the number of written character, or return -1 and raise an exception
659    on error. */
660 PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill(
661     PyObject *unicode,
662     Py_ssize_t start,
663     Py_ssize_t length,
664     Py_UCS4 fill_char
665     );
666 
667 /* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash
668    if parameters are invalid (e.g. if length is longer than the string). */
669 PyAPI_FUNC(void) _PyUnicode_FastFill(
670     PyObject *unicode,
671     Py_ssize_t start,
672     Py_ssize_t length,
673     Py_UCS4 fill_char
674     );
675 #endif
676 
677 /* Create a Unicode Object from the Py_UNICODE buffer u of the given
678    size.
679 
680    u may be NULL which causes the contents to be undefined. It is the
681    user's responsibility to fill in the needed data afterwards. Note
682    that modifying the Unicode object contents after construction is
683    only allowed if u was set to NULL.
684 
685    The buffer is copied into the new object. */
686 
687 #ifndef Py_LIMITED_API
688 PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
689     const Py_UNICODE *u,        /* Unicode buffer */
690     Py_ssize_t size             /* size of buffer */
691     );
692 #endif
693 
694 /* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */
695 PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
696     const char *u,             /* UTF-8 encoded string */
697     Py_ssize_t size            /* size of buffer */
698     );
699 
700 /* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
701    UTF-8 encoded bytes.  The size is determined with strlen(). */
702 PyAPI_FUNC(PyObject*) PyUnicode_FromString(
703     const char *u              /* UTF-8 encoded string */
704     );
705 
706 #ifndef Py_LIMITED_API
707 /* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters.
708    Scan the string to find the maximum character. */
709 PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
710     int kind,
711     const void *buffer,
712     Py_ssize_t size);
713 
714 /* Create a new string from a buffer of ASCII characters.
715    WARNING: Don't check if the string contains any non-ASCII character. */
716 PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII(
717     const char *buffer,
718     Py_ssize_t size);
719 #endif
720 
721 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
722 PyAPI_FUNC(PyObject*) PyUnicode_Substring(
723     PyObject *str,
724     Py_ssize_t start,
725     Py_ssize_t end);
726 #endif
727 
728 #ifndef Py_LIMITED_API
729 /* Compute the maximum character of the substring unicode[start:end].
730    Return 127 for an empty string. */
731 PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
732     PyObject *unicode,
733     Py_ssize_t start,
734     Py_ssize_t end);
735 #endif
736 
737 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
738 /* Copy the string into a UCS4 buffer including the null character if copy_null
739    is set. Return NULL and raise an exception on error. Raise a SystemError if
740    the buffer is smaller than the string. Return buffer on success.
741 
742    buflen is the length of the buffer in (Py_UCS4) characters. */
743 PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4(
744     PyObject *unicode,
745     Py_UCS4* buffer,
746     Py_ssize_t buflen,
747     int copy_null);
748 
749 /* Copy the string into a UCS4 buffer. A new buffer is allocated using
750  * PyMem_Malloc; if this fails, NULL is returned with a memory error
751    exception set. */
752 PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode);
753 #endif
754 
755 #ifndef Py_LIMITED_API
756 /* Return a read-only pointer to the Unicode object's internal
757    Py_UNICODE buffer.
758    If the wchar_t/Py_UNICODE representation is not yet available, this
759    function will calculate it. */
760 
761 PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
762     PyObject *unicode           /* Unicode object */
763     );
764 
765 /* Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string
766    contains null characters. */
767 PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
768     PyObject *unicode           /* Unicode object */
769     );
770 
771 /* Return a read-only pointer to the Unicode object's internal
772    Py_UNICODE buffer and save the length at size.
773    If the wchar_t/Py_UNICODE representation is not yet available, this
774    function will calculate it. */
775 
776 PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
777     PyObject *unicode,          /* Unicode object */
778     Py_ssize_t *size            /* location where to save the length */
779     );
780 #endif
781 
782 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
783 /* Get the length of the Unicode object. */
784 
785 PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength(
786     PyObject *unicode
787 );
788 #endif
789 
790 /* Get the number of Py_UNICODE units in the
791    string representation. */
792 
793 PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(
794     PyObject *unicode           /* Unicode object */
795     );
796 
797 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
798 /* Read a character from the string. */
799 
800 PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar(
801     PyObject *unicode,
802     Py_ssize_t index
803     );
804 
805 /* Write a character to the string. The string must have been created through
806    PyUnicode_New, must not be shared, and must not have been hashed yet.
807 
808    Return 0 on success, -1 on error. */
809 
810 PyAPI_FUNC(int) PyUnicode_WriteChar(
811     PyObject *unicode,
812     Py_ssize_t index,
813     Py_UCS4 character
814     );
815 #endif
816 
817 #ifndef Py_LIMITED_API
818 /* Get the maximum ordinal for a Unicode character. */
819 PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
820 #endif
821 
822 /* Resize a Unicode object. The length is the number of characters, except
823    if the kind of the string is PyUnicode_WCHAR_KIND: in this case, the length
824    is the number of Py_UNICODE characters.
825 
826    *unicode is modified to point to the new (resized) object and 0
827    returned on success.
828 
829    Try to resize the string in place (which is usually faster than allocating
830    a new string and copy characters), or create a new string.
831 
832    Error handling is implemented as follows: an exception is set, -1
833    is returned and *unicode left untouched.
834 
835    WARNING: The function doesn't check string content, the result may not be a
836             string in canonical representation. */
837 
838 PyAPI_FUNC(int) PyUnicode_Resize(
839     PyObject **unicode,         /* Pointer to the Unicode object */
840     Py_ssize_t length           /* New length */
841     );
842 
843 /* Decode obj to a Unicode object.
844 
845    bytes, bytearray and other bytes-like objects are decoded according to the
846    given encoding and error handler. The encoding and error handler can be
847    NULL to have the interface use UTF-8 and "strict".
848 
849    All other objects (including Unicode objects) raise an exception.
850 
851    The API returns NULL in case of an error. The caller is responsible
852    for decref'ing the returned objects.
853 
854 */
855 
856 PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
857     PyObject *obj,              /* Object */
858     const char *encoding,       /* encoding */
859     const char *errors          /* error handling */
860     );
861 
862 /* Copy an instance of a Unicode subtype to a new true Unicode object if
863    necessary. If obj is already a true Unicode object (not a subtype), return
864    the reference with *incremented* refcount.
865 
866    The API returns NULL in case of an error. The caller is responsible
867    for decref'ing the returned objects.
868 
869 */
870 
871 PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
872     PyObject *obj      /* Object */
873     );
874 
875 PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(
876     const char *format,   /* ASCII-encoded string  */
877     va_list vargs
878     );
879 PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
880     const char *format,   /* ASCII-encoded string  */
881     ...
882     );
883 
884 #ifndef Py_LIMITED_API
885 typedef struct {
886     PyObject *buffer;
887     void *data;
888     enum PyUnicode_Kind kind;
889     Py_UCS4 maxchar;
890     Py_ssize_t size;
891     Py_ssize_t pos;
892 
893     /* minimum number of allocated characters (default: 0) */
894     Py_ssize_t min_length;
895 
896     /* minimum character (default: 127, ASCII) */
897     Py_UCS4 min_char;
898 
899     /* If non-zero, overallocate the buffer (default: 0). */
900     unsigned char overallocate;
901 
902     /* If readonly is 1, buffer is a shared string (cannot be modified)
903        and size is set to 0. */
904     unsigned char readonly;
905 } _PyUnicodeWriter ;
906 
907 /* Initialize a Unicode writer.
908  *
909  * By default, the minimum buffer size is 0 character and overallocation is
910  * disabled. Set min_length, min_char and overallocate attributes to control
911  * the allocation of the buffer. */
912 PyAPI_FUNC(void)
913 _PyUnicodeWriter_Init(_PyUnicodeWriter *writer);
914 
915 /* Prepare the buffer to write 'length' characters
916    with the specified maximum character.
917 
918    Return 0 on success, raise an exception and return -1 on error. */
919 #define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR)             \
920     (((MAXCHAR) <= (WRITER)->maxchar                                  \
921       && (LENGTH) <= (WRITER)->size - (WRITER)->pos)                  \
922      ? 0                                                              \
923      : (((LENGTH) == 0)                                               \
924         ? 0                                                           \
925         : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
926 
927 /* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
928    instead. */
929 PyAPI_FUNC(int)
930 _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
931                                  Py_ssize_t length, Py_UCS4 maxchar);
932 
933 /* Prepare the buffer to have at least the kind KIND.
934    For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
935    support characters in range U+000-U+FFFF.
936 
937    Return 0 on success, raise an exception and return -1 on error. */
938 #define _PyUnicodeWriter_PrepareKind(WRITER, KIND)                    \
939     (assert((KIND) != PyUnicode_WCHAR_KIND),                          \
940      (KIND) <= (WRITER)->kind                                         \
941      ? 0                                                              \
942      : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
943 
944 /* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
945    macro instead. */
946 PyAPI_FUNC(int)
947 _PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
948                                      enum PyUnicode_Kind kind);
949 
950 /* Append a Unicode character.
951    Return 0 on success, raise an exception and return -1 on error. */
952 PyAPI_FUNC(int)
953 _PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
954     Py_UCS4 ch
955     );
956 
957 /* Append a Unicode string.
958    Return 0 on success, raise an exception and return -1 on error. */
959 PyAPI_FUNC(int)
960 _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
961     PyObject *str               /* Unicode string */
962     );
963 
964 /* Append a substring of a Unicode string.
965    Return 0 on success, raise an exception and return -1 on error. */
966 PyAPI_FUNC(int)
967 _PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
968     PyObject *str,              /* Unicode string */
969     Py_ssize_t start,
970     Py_ssize_t end
971     );
972 
973 /* Append an ASCII-encoded byte string.
974    Return 0 on success, raise an exception and return -1 on error. */
975 PyAPI_FUNC(int)
976 _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
977     const char *str,           /* ASCII-encoded byte string */
978     Py_ssize_t len             /* number of bytes, or -1 if unknown */
979     );
980 
981 /* Append a latin1-encoded byte string.
982    Return 0 on success, raise an exception and return -1 on error. */
983 PyAPI_FUNC(int)
984 _PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
985     const char *str,           /* latin1-encoded byte string */
986     Py_ssize_t len             /* length in bytes */
987     );
988 
989 /* Get the value of the writer as a Unicode string. Clear the
990    buffer of the writer. Raise an exception and return NULL
991    on error. */
992 PyAPI_FUNC(PyObject *)
993 _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
994 
995 /* Deallocate memory of a writer (clear its internal buffer). */
996 PyAPI_FUNC(void)
997 _PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
998 #endif
999 
1000 #ifndef Py_LIMITED_API
1001 /* Format the object based on the format_spec, as defined in PEP 3101
1002    (Advanced String Formatting). */
1003 PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
1004     _PyUnicodeWriter *writer,
1005     PyObject *obj,
1006     PyObject *format_spec,
1007     Py_ssize_t start,
1008     Py_ssize_t end);
1009 #endif
1010 
1011 PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);
1012 PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);
1013 PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
1014     const char *u              /* UTF-8 encoded string */
1015     );
1016 #ifndef Py_LIMITED_API
1017 PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void);
1018 #endif
1019 
1020 /* Use only if you know it's a string */
1021 #define PyUnicode_CHECK_INTERNED(op) \
1022     (((PyASCIIObject *)(op))->state.interned)
1023 
1024 /* --- wchar_t support for platforms which support it --------------------- */
1025 
1026 #ifdef HAVE_WCHAR_H
1027 
1028 /* Create a Unicode Object from the wchar_t buffer w of the given
1029    size.
1030 
1031    The buffer is copied into the new object. */
1032 
1033 PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
1034     const wchar_t *w,           /* wchar_t buffer */
1035     Py_ssize_t size             /* size of buffer */
1036     );
1037 
1038 /* Copies the Unicode Object contents into the wchar_t buffer w.  At
1039    most size wchar_t characters are copied.
1040 
1041    Note that the resulting wchar_t string may or may not be
1042    0-terminated.  It is the responsibility of the caller to make sure
1043    that the wchar_t string is 0-terminated in case this is required by
1044    the application.
1045 
1046    Returns the number of wchar_t characters copied (excluding a
1047    possibly trailing 0-termination character) or -1 in case of an
1048    error. */
1049 
1050 PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
1051     PyObject *unicode,          /* Unicode object */
1052     wchar_t *w,                 /* wchar_t buffer */
1053     Py_ssize_t size             /* size of buffer */
1054     );
1055 
1056 /* Convert the Unicode object to a wide character string. The output string
1057    always ends with a nul character. If size is not NULL, write the number of
1058    wide characters (excluding the null character) into *size.
1059 
1060    Returns a buffer allocated by PyMem_Malloc() (use PyMem_Free() to free it)
1061    on success. On error, returns NULL, *size is undefined and raises a
1062    MemoryError. */
1063 
1064 PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString(
1065     PyObject *unicode,          /* Unicode object */
1066     Py_ssize_t *size            /* number of characters of the result */
1067     );
1068 
1069 #ifndef Py_LIMITED_API
1070 /* Similar to PyUnicode_AsWideCharString(unicode, NULL), but check if
1071    the string contains null characters. */
1072 PyAPI_FUNC(wchar_t*) _PyUnicode_AsWideCharString(
1073     PyObject *unicode           /* Unicode object */
1074     );
1075 
1076 PyAPI_FUNC(void*) _PyUnicode_AsKind(PyObject *s, unsigned int kind);
1077 #endif
1078 
1079 #endif
1080 
1081 /* --- Unicode ordinals --------------------------------------------------- */
1082 
1083 /* Create a Unicode Object from the given Unicode code point ordinal.
1084 
1085    The ordinal must be in range(0x110000). A ValueError is
1086    raised in case it is not.
1087 
1088 */
1089 
1090 PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
1091 
1092 /* --- Free-list management ----------------------------------------------- */
1093 
1094 /* Clear the free list used by the Unicode implementation.
1095 
1096    This can be used to release memory used for objects on the free
1097    list back to the Python memory allocator.
1098 
1099 */
1100 
1101 PyAPI_FUNC(int) PyUnicode_ClearFreeList(void);
1102 
1103 /* === Builtin Codecs =====================================================
1104 
1105    Many of these APIs take two arguments encoding and errors. These
1106    parameters encoding and errors have the same semantics as the ones
1107    of the builtin str() API.
1108 
1109    Setting encoding to NULL causes the default encoding (UTF-8) to be used.
1110 
1111    Error handling is set by errors which may also be set to NULL
1112    meaning to use the default handling defined for the codec. Default
1113    error handling for all builtin codecs is "strict" (ValueErrors are
1114    raised).
1115 
1116    The codecs all use a similar interface. Only deviation from the
1117    generic ones are documented.
1118 
1119 */
1120 
1121 /* --- Manage the default encoding ---------------------------------------- */
1122 
1123 /* Returns a pointer to the default encoding (UTF-8) of the
1124    Unicode object unicode and the size of the encoded representation
1125    in bytes stored in *size.
1126 
1127    In case of an error, no *size is set.
1128 
1129    This function caches the UTF-8 encoded string in the unicodeobject
1130    and subsequent calls will return the same string.  The memory is released
1131    when the unicodeobject is deallocated.
1132 
1133    _PyUnicode_AsStringAndSize is a #define for PyUnicode_AsUTF8AndSize to
1134    support the previous internal function with the same behaviour.
1135 
1136    *** This API is for interpreter INTERNAL USE ONLY and will likely
1137    *** be removed or changed in the future.
1138 
1139    *** If you need to access the Unicode object as UTF-8 bytes string,
1140    *** please use PyUnicode_AsUTF8String() instead.
1141 */
1142 
1143 #ifndef Py_LIMITED_API
1144 PyAPI_FUNC(char *) PyUnicode_AsUTF8AndSize(
1145     PyObject *unicode,
1146     Py_ssize_t *size);
1147 #define _PyUnicode_AsStringAndSize PyUnicode_AsUTF8AndSize
1148 #endif
1149 
1150 /* Returns a pointer to the default encoding (UTF-8) of the
1151    Unicode object unicode.
1152 
1153    Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation
1154    in the unicodeobject.
1155 
1156    _PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to
1157    support the previous internal function with the same behaviour.
1158 
1159    Use of this API is DEPRECATED since no size information can be
1160    extracted from the returned data.
1161 
1162    *** This API is for interpreter INTERNAL USE ONLY and will likely
1163    *** be removed or changed for Python 3.1.
1164 
1165    *** If you need to access the Unicode object as UTF-8 bytes string,
1166    *** please use PyUnicode_AsUTF8String() instead.
1167 
1168 */
1169 
1170 #ifndef Py_LIMITED_API
1171 PyAPI_FUNC(char *) PyUnicode_AsUTF8(PyObject *unicode);
1172 #define _PyUnicode_AsString PyUnicode_AsUTF8
1173 #endif
1174 
1175 /* Returns "utf-8".  */
1176 
1177 PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
1178 
1179 /* --- Generic Codecs ----------------------------------------------------- */
1180 
1181 /* Create a Unicode object by decoding the encoded string s of the
1182    given size. */
1183 
1184 PyAPI_FUNC(PyObject*) PyUnicode_Decode(
1185     const char *s,              /* encoded string */
1186     Py_ssize_t size,            /* size of buffer */
1187     const char *encoding,       /* encoding */
1188     const char *errors          /* error handling */
1189     );
1190 
1191 /* Decode a Unicode object unicode and return the result as Python
1192    object.
1193 
1194    This API is DEPRECATED. The only supported standard encoding is rot13.
1195    Use PyCodec_Decode() to decode with rot13 and non-standard codecs
1196    that decode from str. */
1197 
1198 PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject(
1199     PyObject *unicode,          /* Unicode object */
1200     const char *encoding,       /* encoding */
1201     const char *errors          /* error handling */
1202     ) Py_DEPRECATED(3.6);
1203 
1204 /* Decode a Unicode object unicode and return the result as Unicode
1205    object.
1206 
1207    This API is DEPRECATED. The only supported standard encoding is rot13.
1208    Use PyCodec_Decode() to decode with rot13 and non-standard codecs
1209    that decode from str to str. */
1210 
1211 PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode(
1212     PyObject *unicode,          /* Unicode object */
1213     const char *encoding,       /* encoding */
1214     const char *errors          /* error handling */
1215     ) Py_DEPRECATED(3.6);
1216 
1217 /* Encodes a Py_UNICODE buffer of the given size and returns a
1218    Python string object. */
1219 
1220 #ifndef Py_LIMITED_API
1221 PyAPI_FUNC(PyObject*) PyUnicode_Encode(
1222     const Py_UNICODE *s,        /* Unicode char buffer */
1223     Py_ssize_t size,            /* number of Py_UNICODE chars to encode */
1224     const char *encoding,       /* encoding */
1225     const char *errors          /* error handling */
1226     );
1227 #endif
1228 
1229 /* Encodes a Unicode object and returns the result as Python
1230    object.
1231 
1232    This API is DEPRECATED.  It is superceeded by PyUnicode_AsEncodedString()
1233    since all standard encodings (except rot13) encode str to bytes.
1234    Use PyCodec_Encode() for encoding with rot13 and non-standard codecs
1235    that encode form str to non-bytes. */
1236 
1237 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
1238     PyObject *unicode,          /* Unicode object */
1239     const char *encoding,       /* encoding */
1240     const char *errors          /* error handling */
1241     ) Py_DEPRECATED(3.6);
1242 
1243 /* Encodes a Unicode object and returns the result as Python string
1244    object. */
1245 
1246 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
1247     PyObject *unicode,          /* Unicode object */
1248     const char *encoding,       /* encoding */
1249     const char *errors          /* error handling */
1250     );
1251 
1252 /* Encodes a Unicode object and returns the result as Unicode
1253    object.
1254 
1255    This API is DEPRECATED.  The only supported standard encodings is rot13.
1256    Use PyCodec_Encode() to encode with rot13 and non-standard codecs
1257    that encode from str to str. */
1258 
1259 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode(
1260     PyObject *unicode,          /* Unicode object */
1261     const char *encoding,       /* encoding */
1262     const char *errors          /* error handling */
1263     ) Py_DEPRECATED(3.6);
1264 
1265 /* Build an encoding map. */
1266 
1267 PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
1268     PyObject* string            /* 256 character map */
1269    );
1270 
1271 /* --- UTF-7 Codecs ------------------------------------------------------- */
1272 
1273 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
1274     const char *string,         /* UTF-7 encoded string */
1275     Py_ssize_t length,          /* size of string */
1276     const char *errors          /* error handling */
1277     );
1278 
1279 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
1280     const char *string,         /* UTF-7 encoded string */
1281     Py_ssize_t length,          /* size of string */
1282     const char *errors,         /* error handling */
1283     Py_ssize_t *consumed        /* bytes consumed */
1284     );
1285 
1286 #ifndef Py_LIMITED_API
1287 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
1288     const Py_UNICODE *data,     /* Unicode char buffer */
1289     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
1290     int base64SetO,             /* Encode RFC2152 Set O characters in base64 */
1291     int base64WhiteSpace,       /* Encode whitespace (sp, ht, nl, cr) in base64 */
1292     const char *errors          /* error handling */
1293     );
1294 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7(
1295     PyObject *unicode,          /* Unicode object */
1296     int base64SetO,             /* Encode RFC2152 Set O characters in base64 */
1297     int base64WhiteSpace,       /* Encode whitespace (sp, ht, nl, cr) in base64 */
1298     const char *errors          /* error handling */
1299     );
1300 #endif
1301 
1302 /* --- UTF-8 Codecs ------------------------------------------------------- */
1303 
1304 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
1305     const char *string,         /* UTF-8 encoded string */
1306     Py_ssize_t length,          /* size of string */
1307     const char *errors          /* error handling */
1308     );
1309 
1310 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
1311     const char *string,         /* UTF-8 encoded string */
1312     Py_ssize_t length,          /* size of string */
1313     const char *errors,         /* error handling */
1314     Py_ssize_t *consumed        /* bytes consumed */
1315     );
1316 
1317 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
1318     PyObject *unicode           /* Unicode object */
1319     );
1320 
1321 #ifndef Py_LIMITED_API
1322 PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
1323     PyObject *unicode,
1324     const char *errors);
1325 
1326 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
1327     const Py_UNICODE *data,     /* Unicode char buffer */
1328     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
1329     const char *errors          /* error handling */
1330     );
1331 #endif
1332 
1333 /* --- UTF-32 Codecs ------------------------------------------------------ */
1334 
1335 /* Decodes length bytes from a UTF-32 encoded buffer string and returns
1336    the corresponding Unicode object.
1337 
1338    errors (if non-NULL) defines the error handling. It defaults
1339    to "strict".
1340 
1341    If byteorder is non-NULL, the decoder starts decoding using the
1342    given byte order:
1343 
1344     *byteorder == -1: little endian
1345     *byteorder == 0:  native order
1346     *byteorder == 1:  big endian
1347 
1348    In native mode, the first four bytes of the stream are checked for a
1349    BOM mark. If found, the BOM mark is analysed, the byte order
1350    adjusted and the BOM skipped.  In the other modes, no BOM mark
1351    interpretation is done. After completion, *byteorder is set to the
1352    current byte order at the end of input data.
1353 
1354    If byteorder is NULL, the codec starts in native order mode.
1355 
1356 */
1357 
1358 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32(
1359     const char *string,         /* UTF-32 encoded string */
1360     Py_ssize_t length,          /* size of string */
1361     const char *errors,         /* error handling */
1362     int *byteorder              /* pointer to byteorder to use
1363                                    0=native;-1=LE,1=BE; updated on
1364                                    exit */
1365     );
1366 
1367 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful(
1368     const char *string,         /* UTF-32 encoded string */
1369     Py_ssize_t length,          /* size of string */
1370     const char *errors,         /* error handling */
1371     int *byteorder,             /* pointer to byteorder to use
1372                                    0=native;-1=LE,1=BE; updated on
1373                                    exit */
1374     Py_ssize_t *consumed        /* bytes consumed */
1375     );
1376 
1377 /* Returns a Python string using the UTF-32 encoding in native byte
1378    order. The string always starts with a BOM mark.  */
1379 
1380 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String(
1381     PyObject *unicode           /* Unicode object */
1382     );
1383 
1384 /* Returns a Python string object holding the UTF-32 encoded value of
1385    the Unicode data.
1386 
1387    If byteorder is not 0, output is written according to the following
1388    byte order:
1389 
1390    byteorder == -1: little endian
1391    byteorder == 0:  native byte order (writes a BOM mark)
1392    byteorder == 1:  big endian
1393 
1394    If byteorder is 0, the output string will always start with the
1395    Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
1396    prepended.
1397 
1398 */
1399 
1400 #ifndef Py_LIMITED_API
1401 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
1402     const Py_UNICODE *data,     /* Unicode char buffer */
1403     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
1404     const char *errors,         /* error handling */
1405     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
1406     );
1407 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32(
1408     PyObject *object,           /* Unicode object */
1409     const char *errors,         /* error handling */
1410     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
1411     );
1412 #endif
1413 
1414 /* --- UTF-16 Codecs ------------------------------------------------------ */
1415 
1416 /* Decodes length bytes from a UTF-16 encoded buffer string and returns
1417    the corresponding Unicode object.
1418 
1419    errors (if non-NULL) defines the error handling. It defaults
1420    to "strict".
1421 
1422    If byteorder is non-NULL, the decoder starts decoding using the
1423    given byte order:
1424 
1425     *byteorder == -1: little endian
1426     *byteorder == 0:  native order
1427     *byteorder == 1:  big endian
1428 
1429    In native mode, the first two bytes of the stream are checked for a
1430    BOM mark. If found, the BOM mark is analysed, the byte order
1431    adjusted and the BOM skipped.  In the other modes, no BOM mark
1432    interpretation is done. After completion, *byteorder is set to the
1433    current byte order at the end of input data.
1434 
1435    If byteorder is NULL, the codec starts in native order mode.
1436 
1437 */
1438 
1439 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
1440     const char *string,         /* UTF-16 encoded string */
1441     Py_ssize_t length,          /* size of string */
1442     const char *errors,         /* error handling */
1443     int *byteorder              /* pointer to byteorder to use
1444                                    0=native;-1=LE,1=BE; updated on
1445                                    exit */
1446     );
1447 
1448 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
1449     const char *string,         /* UTF-16 encoded string */
1450     Py_ssize_t length,          /* size of string */
1451     const char *errors,         /* error handling */
1452     int *byteorder,             /* pointer to byteorder to use
1453                                    0=native;-1=LE,1=BE; updated on
1454                                    exit */
1455     Py_ssize_t *consumed        /* bytes consumed */
1456     );
1457 
1458 /* Returns a Python string using the UTF-16 encoding in native byte
1459    order. The string always starts with a BOM mark.  */
1460 
1461 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
1462     PyObject *unicode           /* Unicode object */
1463     );
1464 
1465 /* Returns a Python string object holding the UTF-16 encoded value of
1466    the Unicode data.
1467 
1468    If byteorder is not 0, output is written according to the following
1469    byte order:
1470 
1471    byteorder == -1: little endian
1472    byteorder == 0:  native byte order (writes a BOM mark)
1473    byteorder == 1:  big endian
1474 
1475    If byteorder is 0, the output string will always start with the
1476    Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
1477    prepended.
1478 
1479    Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
1480    UCS-2. This trick makes it possible to add full UTF-16 capabilities
1481    at a later point without compromising the APIs.
1482 
1483 */
1484 
1485 #ifndef Py_LIMITED_API
1486 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
1487     const Py_UNICODE *data,     /* Unicode char buffer */
1488     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
1489     const char *errors,         /* error handling */
1490     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
1491     );
1492 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
1493     PyObject* unicode,          /* Unicode object */
1494     const char *errors,         /* error handling */
1495     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
1496     );
1497 #endif
1498 
1499 /* --- Unicode-Escape Codecs ---------------------------------------------- */
1500 
1501 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
1502     const char *string,         /* Unicode-Escape encoded string */
1503     Py_ssize_t length,          /* size of string */
1504     const char *errors          /* error handling */
1505     );
1506 
1507 #ifndef Py_LIMITED_API
1508 /* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
1509    chars. */
1510 PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscape(
1511         const char *string,     /* Unicode-Escape encoded string */
1512         Py_ssize_t length,      /* size of string */
1513         const char *errors,     /* error handling */
1514         const char **first_invalid_escape  /* on return, points to first
1515                                               invalid escaped char in
1516                                               string. */
1517 );
1518 #endif
1519 
1520 PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
1521     PyObject *unicode           /* Unicode object */
1522     );
1523 
1524 #ifndef Py_LIMITED_API
1525 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
1526     const Py_UNICODE *data,     /* Unicode char buffer */
1527     Py_ssize_t length           /* Number of Py_UNICODE chars to encode */
1528     );
1529 #endif
1530 
1531 /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
1532 
1533 PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
1534     const char *string,         /* Raw-Unicode-Escape encoded string */
1535     Py_ssize_t length,          /* size of string */
1536     const char *errors          /* error handling */
1537     );
1538 
1539 PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
1540     PyObject *unicode           /* Unicode object */
1541     );
1542 
1543 #ifndef Py_LIMITED_API
1544 PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
1545     const Py_UNICODE *data,     /* Unicode char buffer */
1546     Py_ssize_t length           /* Number of Py_UNICODE chars to encode */
1547     );
1548 #endif
1549 
1550 /* --- Unicode Internal Codec ---------------------------------------------
1551 
1552     Only for internal use in _codecsmodule.c */
1553 
1554 #ifndef Py_LIMITED_API
1555 PyObject *_PyUnicode_DecodeUnicodeInternal(
1556     const char *string,
1557     Py_ssize_t length,
1558     const char *errors
1559     );
1560 #endif
1561 
1562 /* --- Latin-1 Codecs -----------------------------------------------------
1563 
1564    Note: Latin-1 corresponds to the first 256 Unicode ordinals.
1565 
1566 */
1567 
1568 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
1569     const char *string,         /* Latin-1 encoded string */
1570     Py_ssize_t length,          /* size of string */
1571     const char *errors          /* error handling */
1572     );
1573 
1574 PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
1575     PyObject *unicode           /* Unicode object */
1576     );
1577 
1578 #ifndef Py_LIMITED_API
1579 PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
1580     PyObject* unicode,
1581     const char* errors);
1582 
1583 PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
1584     const Py_UNICODE *data,     /* Unicode char buffer */
1585     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
1586     const char *errors          /* error handling */
1587     );
1588 #endif
1589 
1590 /* --- ASCII Codecs -------------------------------------------------------
1591 
1592    Only 7-bit ASCII data is excepted. All other codes generate errors.
1593 
1594 */
1595 
1596 PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
1597     const char *string,         /* ASCII encoded string */
1598     Py_ssize_t length,          /* size of string */
1599     const char *errors          /* error handling */
1600     );
1601 
1602 PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
1603     PyObject *unicode           /* Unicode object */
1604     );
1605 
1606 #ifndef Py_LIMITED_API
1607 PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString(
1608     PyObject* unicode,
1609     const char* errors);
1610 
1611 PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
1612     const Py_UNICODE *data,     /* Unicode char buffer */
1613     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
1614     const char *errors          /* error handling */
1615     );
1616 #endif
1617 
1618 /* --- Character Map Codecs -----------------------------------------------
1619 
1620    This codec uses mappings to encode and decode characters.
1621 
1622    Decoding mappings must map byte ordinals (integers in the range from 0 to
1623    255) to Unicode strings, integers (which are then interpreted as Unicode
1624    ordinals) or None.  Unmapped data bytes (ones which cause a LookupError)
1625    as well as mapped to None, 0xFFFE or '\ufffe' are treated as "undefined
1626    mapping" and cause an error.
1627 
1628    Encoding mappings must map Unicode ordinal integers to bytes objects,
1629    integers in the range from 0 to 255 or None.  Unmapped character
1630    ordinals (ones which cause a LookupError) as well as mapped to
1631    None are treated as "undefined mapping" and cause an error.
1632 
1633 */
1634 
1635 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
1636     const char *string,         /* Encoded string */
1637     Py_ssize_t length,          /* size of string */
1638     PyObject *mapping,          /* decoding mapping */
1639     const char *errors          /* error handling */
1640     );
1641 
1642 PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
1643     PyObject *unicode,          /* Unicode object */
1644     PyObject *mapping           /* encoding mapping */
1645     );
1646 
1647 #ifndef Py_LIMITED_API
1648 PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
1649     const Py_UNICODE *data,     /* Unicode char buffer */
1650     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
1651     PyObject *mapping,          /* encoding mapping */
1652     const char *errors          /* error handling */
1653     );
1654 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap(
1655     PyObject *unicode,          /* Unicode object */
1656     PyObject *mapping,          /* encoding mapping */
1657     const char *errors          /* error handling */
1658     );
1659 #endif
1660 
1661 /* Translate a Py_UNICODE buffer of the given length by applying a
1662    character mapping table to it and return the resulting Unicode
1663    object.
1664 
1665    The mapping table must map Unicode ordinal integers to Unicode strings,
1666    Unicode ordinal integers or None (causing deletion of the character).
1667 
1668    Mapping tables may be dictionaries or sequences. Unmapped character
1669    ordinals (ones which cause a LookupError) are left untouched and
1670    are copied as-is.
1671 
1672 */
1673 
1674 #ifndef Py_LIMITED_API
1675 PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
1676     const Py_UNICODE *data,     /* Unicode char buffer */
1677     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
1678     PyObject *table,            /* Translate table */
1679     const char *errors          /* error handling */
1680     );
1681 #endif
1682 
1683 #ifdef MS_WINDOWS
1684 
1685 /* --- MBCS codecs for Windows -------------------------------------------- */
1686 
1687 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
1688     const char *string,         /* MBCS encoded string */
1689     Py_ssize_t length,          /* size of string */
1690     const char *errors          /* error handling */
1691     );
1692 
1693 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
1694     const char *string,         /* MBCS encoded string */
1695     Py_ssize_t length,          /* size of string */
1696     const char *errors,         /* error handling */
1697     Py_ssize_t *consumed        /* bytes consumed */
1698     );
1699 
1700 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
1701 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCodePageStateful(
1702     int code_page,              /* code page number */
1703     const char *string,         /* encoded string */
1704     Py_ssize_t length,          /* size of string */
1705     const char *errors,         /* error handling */
1706     Py_ssize_t *consumed        /* bytes consumed */
1707     );
1708 #endif
1709 
1710 PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
1711     PyObject *unicode           /* Unicode object */
1712     );
1713 
1714 #ifndef Py_LIMITED_API
1715 PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
1716     const Py_UNICODE *data,     /* Unicode char buffer */
1717     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
1718     const char *errors          /* error handling */
1719     );
1720 #endif
1721 
1722 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
1723 PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage(
1724     int code_page,              /* code page number */
1725     PyObject *unicode,          /* Unicode object */
1726     const char *errors          /* error handling */
1727     );
1728 #endif
1729 
1730 #endif /* MS_WINDOWS */
1731 
1732 /* --- Decimal Encoder ---------------------------------------------------- */
1733 
1734 /* Takes a Unicode string holding a decimal value and writes it into
1735    an output buffer using standard ASCII digit codes.
1736 
1737    The output buffer has to provide at least length+1 bytes of storage
1738    area. The output string is 0-terminated.
1739 
1740    The encoder converts whitespace to ' ', decimal characters to their
1741    corresponding ASCII digit and all other Latin-1 characters except
1742    \0 as-is. Characters outside this range (Unicode ordinals 1-256)
1743    are treated as errors. This includes embedded NULL bytes.
1744 
1745    Error handling is defined by the errors argument:
1746 
1747       NULL or "strict": raise a ValueError
1748       "ignore": ignore the wrong characters (these are not copied to the
1749                 output buffer)
1750       "replace": replaces illegal characters with '?'
1751 
1752    Returns 0 on success, -1 on failure.
1753 
1754 */
1755 
1756 #ifndef Py_LIMITED_API
1757 PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
1758     Py_UNICODE *s,              /* Unicode buffer */
1759     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
1760     char *output,               /* Output buffer; must have size >= length */
1761     const char *errors          /* error handling */
1762     );
1763 #endif
1764 
1765 /* Transforms code points that have decimal digit property to the
1766    corresponding ASCII digit code points.
1767 
1768    Returns a new Unicode string on success, NULL on failure.
1769 */
1770 
1771 #ifndef Py_LIMITED_API
1772 PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII(
1773     Py_UNICODE *s,              /* Unicode buffer */
1774     Py_ssize_t length           /* Number of Py_UNICODE chars to transform */
1775     );
1776 #endif
1777 
1778 /* Similar to PyUnicode_TransformDecimalToASCII(), but takes a PyObject
1779    as argument instead of a raw buffer and length.  This function additionally
1780    transforms spaces to ASCII because this is what the callers in longobject,
1781    floatobject, and complexobject did anyways. */
1782 
1783 #ifndef Py_LIMITED_API
1784 PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
1785     PyObject *unicode           /* Unicode object */
1786     );
1787 #endif
1788 
1789 /* --- Locale encoding --------------------------------------------------- */
1790 
1791 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
1792 /* Decode a string from the current locale encoding. The decoder is strict if
1793    *surrogateescape* is equal to zero, otherwise it uses the 'surrogateescape'
1794    error handler (PEP 383) to escape undecodable bytes. If a byte sequence can
1795    be decoded as a surrogate character and *surrogateescape* is not equal to
1796    zero, the byte sequence is escaped using the 'surrogateescape' error handler
1797    instead of being decoded. *str* must end with a null character but cannot
1798    contain embedded null characters. */
1799 
1800 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize(
1801     const char *str,
1802     Py_ssize_t len,
1803     const char *errors);
1804 
1805 /* Similar to PyUnicode_DecodeLocaleAndSize(), but compute the string
1806    length using strlen(). */
1807 
1808 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale(
1809     const char *str,
1810     const char *errors);
1811 
1812 /* Encode a Unicode object to the current locale encoding. The encoder is
1813    strict is *surrogateescape* is equal to zero, otherwise the
1814    "surrogateescape" error handler is used. Return a bytes object. The string
1815    cannot contain embedded null characters. */
1816 
1817 PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale(
1818     PyObject *unicode,
1819     const char *errors
1820     );
1821 #endif
1822 
1823 /* --- File system encoding ---------------------------------------------- */
1824 
1825 /* ParseTuple converter: encode str objects to bytes using
1826    PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */
1827 
1828 PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*);
1829 
1830 /* ParseTuple converter: decode bytes objects to unicode using
1831    PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */
1832 
1833 PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*);
1834 
1835 /* Decode a null-terminated string using Py_FileSystemDefaultEncoding
1836    and the "surrogateescape" error handler.
1837 
1838    If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1839    encoding.
1840 
1841    Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known.
1842 */
1843 
1844 PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
1845     const char *s               /* encoded string */
1846     );
1847 
1848 /* Decode a string using Py_FileSystemDefaultEncoding
1849    and the "surrogateescape" error handler.
1850 
1851    If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1852    encoding.
1853 */
1854 
1855 PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
1856     const char *s,               /* encoded string */
1857     Py_ssize_t size              /* size */
1858     );
1859 
1860 /* Encode a Unicode object to Py_FileSystemDefaultEncoding with the
1861    "surrogateescape" error handler, and return bytes.
1862 
1863    If Py_FileSystemDefaultEncoding is not set, fall back to the locale
1864    encoding.
1865 */
1866 
1867 PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault(
1868     PyObject *unicode
1869     );
1870 
1871 /* --- Methods & Slots ----------------------------------------------------
1872 
1873    These are capable of handling Unicode objects and strings on input
1874    (we refer to them as strings in the descriptions) and return
1875    Unicode objects or integers as appropriate. */
1876 
1877 /* Concat two strings giving a new Unicode string. */
1878 
1879 PyAPI_FUNC(PyObject*) PyUnicode_Concat(
1880     PyObject *left,             /* Left string */
1881     PyObject *right             /* Right string */
1882     );
1883 
1884 /* Concat two strings and put the result in *pleft
1885    (sets *pleft to NULL on error) */
1886 
1887 PyAPI_FUNC(void) PyUnicode_Append(
1888     PyObject **pleft,           /* Pointer to left string */
1889     PyObject *right             /* Right string */
1890     );
1891 
1892 /* Concat two strings, put the result in *pleft and drop the right object
1893    (sets *pleft to NULL on error) */
1894 
1895 PyAPI_FUNC(void) PyUnicode_AppendAndDel(
1896     PyObject **pleft,           /* Pointer to left string */
1897     PyObject *right             /* Right string */
1898     );
1899 
1900 /* Split a string giving a list of Unicode strings.
1901 
1902    If sep is NULL, splitting will be done at all whitespace
1903    substrings. Otherwise, splits occur at the given separator.
1904 
1905    At most maxsplit splits will be done. If negative, no limit is set.
1906 
1907    Separators are not included in the resulting list.
1908 
1909 */
1910 
1911 PyAPI_FUNC(PyObject*) PyUnicode_Split(
1912     PyObject *s,                /* String to split */
1913     PyObject *sep,              /* String separator */
1914     Py_ssize_t maxsplit         /* Maxsplit count */
1915     );
1916 
1917 /* Dito, but split at line breaks.
1918 
1919    CRLF is considered to be one line break. Line breaks are not
1920    included in the resulting list. */
1921 
1922 PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
1923     PyObject *s,                /* String to split */
1924     int keepends                /* If true, line end markers are included */
1925     );
1926 
1927 /* Partition a string using a given separator. */
1928 
1929 PyAPI_FUNC(PyObject*) PyUnicode_Partition(
1930     PyObject *s,                /* String to partition */
1931     PyObject *sep               /* String separator */
1932     );
1933 
1934 /* Partition a string using a given separator, searching from the end of the
1935    string. */
1936 
1937 PyAPI_FUNC(PyObject*) PyUnicode_RPartition(
1938     PyObject *s,                /* String to partition */
1939     PyObject *sep               /* String separator */
1940     );
1941 
1942 /* Split a string giving a list of Unicode strings.
1943 
1944    If sep is NULL, splitting will be done at all whitespace
1945    substrings. Otherwise, splits occur at the given separator.
1946 
1947    At most maxsplit splits will be done. But unlike PyUnicode_Split
1948    PyUnicode_RSplit splits from the end of the string. If negative,
1949    no limit is set.
1950 
1951    Separators are not included in the resulting list.
1952 
1953 */
1954 
1955 PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
1956     PyObject *s,                /* String to split */
1957     PyObject *sep,              /* String separator */
1958     Py_ssize_t maxsplit         /* Maxsplit count */
1959     );
1960 
1961 /* Translate a string by applying a character mapping table to it and
1962    return the resulting Unicode object.
1963 
1964    The mapping table must map Unicode ordinal integers to Unicode strings,
1965    Unicode ordinal integers or None (causing deletion of the character).
1966 
1967    Mapping tables may be dictionaries or sequences. Unmapped character
1968    ordinals (ones which cause a LookupError) are left untouched and
1969    are copied as-is.
1970 
1971 */
1972 
1973 PyAPI_FUNC(PyObject *) PyUnicode_Translate(
1974     PyObject *str,              /* String */
1975     PyObject *table,            /* Translate table */
1976     const char *errors          /* error handling */
1977     );
1978 
1979 /* Join a sequence of strings using the given separator and return
1980    the resulting Unicode string. */
1981 
1982 PyAPI_FUNC(PyObject*) PyUnicode_Join(
1983     PyObject *separator,        /* Separator string */
1984     PyObject *seq               /* Sequence object */
1985     );
1986 
1987 #ifndef Py_LIMITED_API
1988 PyAPI_FUNC(PyObject *) _PyUnicode_JoinArray(
1989     PyObject *separator,
1990     PyObject **items,
1991     Py_ssize_t seqlen
1992     );
1993 #endif /* Py_LIMITED_API */
1994 
1995 /* Return 1 if substr matches str[start:end] at the given tail end, 0
1996    otherwise. */
1997 
1998 PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch(
1999     PyObject *str,              /* String */
2000     PyObject *substr,           /* Prefix or Suffix string */
2001     Py_ssize_t start,           /* Start index */
2002     Py_ssize_t end,             /* Stop index */
2003     int direction               /* Tail end: -1 prefix, +1 suffix */
2004     );
2005 
2006 /* Return the first position of substr in str[start:end] using the
2007    given search direction or -1 if not found. -2 is returned in case
2008    an error occurred and an exception is set. */
2009 
2010 PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
2011     PyObject *str,              /* String */
2012     PyObject *substr,           /* Substring to find */
2013     Py_ssize_t start,           /* Start index */
2014     Py_ssize_t end,             /* Stop index */
2015     int direction               /* Find direction: +1 forward, -1 backward */
2016     );
2017 
2018 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
2019 /* Like PyUnicode_Find, but search for single character only. */
2020 PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar(
2021     PyObject *str,
2022     Py_UCS4 ch,
2023     Py_ssize_t start,
2024     Py_ssize_t end,
2025     int direction
2026     );
2027 #endif
2028 
2029 /* Count the number of occurrences of substr in str[start:end]. */
2030 
2031 PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
2032     PyObject *str,              /* String */
2033     PyObject *substr,           /* Substring to count */
2034     Py_ssize_t start,           /* Start index */
2035     Py_ssize_t end              /* Stop index */
2036     );
2037 
2038 /* Replace at most maxcount occurrences of substr in str with replstr
2039    and return the resulting Unicode object. */
2040 
2041 PyAPI_FUNC(PyObject *) PyUnicode_Replace(
2042     PyObject *str,              /* String */
2043     PyObject *substr,           /* Substring to find */
2044     PyObject *replstr,          /* Substring to replace */
2045     Py_ssize_t maxcount         /* Max. number of replacements to apply;
2046                                    -1 = all */
2047     );
2048 
2049 /* Compare two strings and return -1, 0, 1 for less than, equal,
2050    greater than resp.
2051    Raise an exception and return -1 on error. */
2052 
2053 PyAPI_FUNC(int) PyUnicode_Compare(
2054     PyObject *left,             /* Left string */
2055     PyObject *right             /* Right string */
2056     );
2057 
2058 #ifndef Py_LIMITED_API
2059 /* Test whether a unicode is equal to ASCII identifier.  Return 1 if true,
2060    0 otherwise.  The right argument must be ASCII identifier.
2061    Any error occurs inside will be cleared before return. */
2062 
2063 PyAPI_FUNC(int) _PyUnicode_EqualToASCIIId(
2064     PyObject *left,             /* Left string */
2065     _Py_Identifier *right       /* Right identifier */
2066     );
2067 #endif
2068 
2069 /* Compare a Unicode object with C string and return -1, 0, 1 for less than,
2070    equal, and greater than, respectively.  It is best to pass only
2071    ASCII-encoded strings, but the function interprets the input string as
2072    ISO-8859-1 if it contains non-ASCII characters.
2073    This function does not raise exceptions. */
2074 
2075 PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
2076     PyObject *left,
2077     const char *right           /* ASCII-encoded string */
2078     );
2079 
2080 #ifndef Py_LIMITED_API
2081 /* Test whether a unicode is equal to ASCII string.  Return 1 if true,
2082    0 otherwise.  The right argument must be ASCII-encoded string.
2083    Any error occurs inside will be cleared before return. */
2084 
2085 PyAPI_FUNC(int) _PyUnicode_EqualToASCIIString(
2086     PyObject *left,
2087     const char *right           /* ASCII-encoded string */
2088     );
2089 #endif
2090 
2091 /* Rich compare two strings and return one of the following:
2092 
2093    - NULL in case an exception was raised
2094    - Py_True or Py_False for successful comparisons
2095    - Py_NotImplemented in case the type combination is unknown
2096 
2097    Possible values for op:
2098 
2099      Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
2100 
2101 */
2102 
2103 PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
2104     PyObject *left,             /* Left string */
2105     PyObject *right,            /* Right string */
2106     int op                      /* Operation: Py_EQ, Py_NE, Py_GT, etc. */
2107     );
2108 
2109 /* Apply an argument tuple or dictionary to a format string and return
2110    the resulting Unicode string. */
2111 
2112 PyAPI_FUNC(PyObject *) PyUnicode_Format(
2113     PyObject *format,           /* Format string */
2114     PyObject *args              /* Argument tuple or dictionary */
2115     );
2116 
2117 /* Checks whether element is contained in container and return 1/0
2118    accordingly.
2119 
2120    element has to coerce to a one element Unicode string. -1 is
2121    returned in case of an error. */
2122 
2123 PyAPI_FUNC(int) PyUnicode_Contains(
2124     PyObject *container,        /* Container string */
2125     PyObject *element           /* Element string */
2126     );
2127 
2128 /* Checks whether argument is a valid identifier. */
2129 
2130 PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s);
2131 
2132 #ifndef Py_LIMITED_API
2133 /* Externally visible for str.strip(unicode) */
2134 PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
2135     PyObject *self,
2136     int striptype,
2137     PyObject *sepobj
2138     );
2139 #endif
2140 
2141 /* Using explicit passed-in values, insert the thousands grouping
2142    into the string pointed to by buffer.  For the argument descriptions,
2143    see Objects/stringlib/localeutil.h */
2144 #ifndef Py_LIMITED_API
2145 PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
2146     _PyUnicodeWriter *writer,
2147     Py_ssize_t n_buffer,
2148     PyObject *digits,
2149     Py_ssize_t d_pos,
2150     Py_ssize_t n_digits,
2151     Py_ssize_t min_width,
2152     const char *grouping,
2153     PyObject *thousands_sep,
2154     Py_UCS4 *maxchar);
2155 #endif
2156 /* === Characters Type APIs =============================================== */
2157 
2158 /* Helper array used by Py_UNICODE_ISSPACE(). */
2159 
2160 #ifndef Py_LIMITED_API
2161 PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
2162 
2163 /* These should not be used directly. Use the Py_UNICODE_IS* and
2164    Py_UNICODE_TO* macros instead.
2165 
2166    These APIs are implemented in Objects/unicodectype.c.
2167 
2168 */
2169 
2170 PyAPI_FUNC(int) _PyUnicode_IsLowercase(
2171     Py_UCS4 ch       /* Unicode character */
2172     );
2173 
2174 PyAPI_FUNC(int) _PyUnicode_IsUppercase(
2175     Py_UCS4 ch       /* Unicode character */
2176     );
2177 
2178 PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
2179     Py_UCS4 ch       /* Unicode character */
2180     );
2181 
2182 PyAPI_FUNC(int) _PyUnicode_IsXidStart(
2183     Py_UCS4 ch       /* Unicode character */
2184     );
2185 
2186 PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
2187     Py_UCS4 ch       /* Unicode character */
2188     );
2189 
2190 PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
2191     const Py_UCS4 ch         /* Unicode character */
2192     );
2193 
2194 PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
2195     const Py_UCS4 ch         /* Unicode character */
2196     );
2197 
2198 PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
2199     Py_UCS4 ch       /* Unicode character */
2200     );
2201 
2202 PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
2203     Py_UCS4 ch       /* Unicode character */
2204     );
2205 
2206 PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
2207     Py_UCS4 ch       /* Unicode character */
2208     );
2209 
2210 PyAPI_FUNC(int) _PyUnicode_ToLowerFull(
2211     Py_UCS4 ch,       /* Unicode character */
2212     Py_UCS4 *res
2213     );
2214 
2215 PyAPI_FUNC(int) _PyUnicode_ToTitleFull(
2216     Py_UCS4 ch,       /* Unicode character */
2217     Py_UCS4 *res
2218     );
2219 
2220 PyAPI_FUNC(int) _PyUnicode_ToUpperFull(
2221     Py_UCS4 ch,       /* Unicode character */
2222     Py_UCS4 *res
2223     );
2224 
2225 PyAPI_FUNC(int) _PyUnicode_ToFoldedFull(
2226     Py_UCS4 ch,       /* Unicode character */
2227     Py_UCS4 *res
2228     );
2229 
2230 PyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable(
2231     Py_UCS4 ch         /* Unicode character */
2232     );
2233 
2234 PyAPI_FUNC(int) _PyUnicode_IsCased(
2235     Py_UCS4 ch         /* Unicode character */
2236     );
2237 
2238 PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
2239     Py_UCS4 ch       /* Unicode character */
2240     );
2241 
2242 PyAPI_FUNC(int) _PyUnicode_ToDigit(
2243     Py_UCS4 ch       /* Unicode character */
2244     );
2245 
2246 PyAPI_FUNC(double) _PyUnicode_ToNumeric(
2247     Py_UCS4 ch       /* Unicode character */
2248     );
2249 
2250 PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
2251     Py_UCS4 ch       /* Unicode character */
2252     );
2253 
2254 PyAPI_FUNC(int) _PyUnicode_IsDigit(
2255     Py_UCS4 ch       /* Unicode character */
2256     );
2257 
2258 PyAPI_FUNC(int) _PyUnicode_IsNumeric(
2259     Py_UCS4 ch       /* Unicode character */
2260     );
2261 
2262 PyAPI_FUNC(int) _PyUnicode_IsPrintable(
2263     Py_UCS4 ch       /* Unicode character */
2264     );
2265 
2266 PyAPI_FUNC(int) _PyUnicode_IsAlpha(
2267     Py_UCS4 ch       /* Unicode character */
2268     );
2269 
2270 PyAPI_FUNC(size_t) Py_UNICODE_strlen(
2271     const Py_UNICODE *u
2272     );
2273 
2274 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy(
2275     Py_UNICODE *s1,
2276     const Py_UNICODE *s2);
2277 
2278 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat(
2279     Py_UNICODE *s1, const Py_UNICODE *s2);
2280 
2281 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy(
2282     Py_UNICODE *s1,
2283     const Py_UNICODE *s2,
2284     size_t n);
2285 
2286 PyAPI_FUNC(int) Py_UNICODE_strcmp(
2287     const Py_UNICODE *s1,
2288     const Py_UNICODE *s2
2289     );
2290 
2291 PyAPI_FUNC(int) Py_UNICODE_strncmp(
2292     const Py_UNICODE *s1,
2293     const Py_UNICODE *s2,
2294     size_t n
2295     );
2296 
2297 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr(
2298     const Py_UNICODE *s,
2299     Py_UNICODE c
2300     );
2301 
2302 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr(
2303     const Py_UNICODE *s,
2304     Py_UNICODE c
2305     );
2306 
2307 PyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject *, int, int, int);
2308 
2309 /* Create a copy of a unicode string ending with a nul character. Return NULL
2310    and raise a MemoryError exception on memory allocation failure, otherwise
2311    return a new allocated buffer (use PyMem_Free() to free the buffer). */
2312 
2313 PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy(
2314     PyObject *unicode
2315     );
2316 #endif /* Py_LIMITED_API */
2317 
2318 #if defined(Py_DEBUG) && !defined(Py_LIMITED_API)
2319 PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
2320     PyObject *op,
2321     int check_content);
2322 #elif !defined(NDEBUG)
2323 /* For asserts that call _PyUnicode_CheckConsistency(), which would
2324  * otherwise be a problem when building with asserts but without Py_DEBUG. */
2325 #define _PyUnicode_CheckConsistency(op, check_content) PyUnicode_Check(op)
2326 #endif
2327 
2328 #ifndef Py_LIMITED_API
2329 /* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/
2330 PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*);
2331 /* Clear all static strings. */
2332 PyAPI_FUNC(void) _PyUnicode_ClearStaticStrings(void);
2333 
2334 /* Fast equality check when the inputs are known to be exact unicode types
2335    and where the hash values are equal (i.e. a very probable match) */
2336 PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *);
2337 #endif /* !Py_LIMITED_API */
2338 
2339 #ifdef __cplusplus
2340 }
2341 #endif
2342 #endif /* !Py_UNICODEOBJECT_H */
2343