1 #ifndef Py_CPYTHON_UNICODEOBJECT_H
2 #  error "this header file must not be included directly"
3 #endif
4 
5 /* Py_UNICODE was the native Unicode storage format (code unit) used by
6    Python and represents a single Unicode element in the Unicode type.
7    With PEP 393, Py_UNICODE is deprecated and replaced with a
8    typedef to wchar_t. */
9 #define PY_UNICODE_TYPE wchar_t
10 /* Py_DEPRECATED(3.3) */ typedef wchar_t Py_UNICODE;
11 
12 /* --- Internal Unicode Operations ---------------------------------------- */
13 
14 #ifndef USE_UNICODE_WCHAR_CACHE
15 #  define USE_UNICODE_WCHAR_CACHE 1
16 #endif /* USE_UNICODE_WCHAR_CACHE */
17 
18 /* Since splitting on whitespace is an important use case, and
19    whitespace in most situations is solely ASCII whitespace, we
20    optimize for the common case by using a quick look-up table
21    _Py_ascii_whitespace (see below) with an inlined check.
22 
23  */
24 #define Py_UNICODE_ISSPACE(ch) \
25     ((Py_UCS4)(ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
26 
27 #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
28 #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
29 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
30 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
31 
32 #define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
33 #define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
34 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
35 
36 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
37 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
38 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
39 #define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
40 
41 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
42 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
43 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
44 
45 #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
46 
47 #define Py_UNICODE_ISALNUM(ch) \
48        (Py_UNICODE_ISALPHA(ch) || \
49     Py_UNICODE_ISDECIMAL(ch) || \
50     Py_UNICODE_ISDIGIT(ch) || \
51     Py_UNICODE_ISNUMERIC(ch))
52 
53 Py_DEPRECATED(3.3) static inline void
Py_UNICODE_COPY(Py_UNICODE * target,const Py_UNICODE * source,Py_ssize_t length)54 Py_UNICODE_COPY(Py_UNICODE *target, const Py_UNICODE *source, Py_ssize_t length) {
55     memcpy(target, source, (size_t)(length) * sizeof(Py_UNICODE));
56 }
57 
58 Py_DEPRECATED(3.3) static inline void
Py_UNICODE_FILL(Py_UNICODE * target,Py_UNICODE value,Py_ssize_t length)59 Py_UNICODE_FILL(Py_UNICODE *target, Py_UNICODE value, Py_ssize_t length) {
60     Py_ssize_t i;
61     for (i = 0; i < length; i++) {
62         target[i] = value;
63     }
64 }
65 
66 /* macros to work with surrogates */
67 #define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF)
68 #define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDBFF)
69 #define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= (ch) && (ch) <= 0xDFFF)
70 /* Join two surrogate characters and return a single Py_UCS4 value. */
71 #define Py_UNICODE_JOIN_SURROGATES(high, low)  \
72     (((((Py_UCS4)(high) & 0x03FF) << 10) |      \
73       ((Py_UCS4)(low) & 0x03FF)) + 0x10000)
74 /* high surrogate = top 10 bits added to D800 */
75 #define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10))
76 /* low surrogate = bottom 10 bits added to DC00 */
77 #define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF))
78 
79 /* --- Unicode Type ------------------------------------------------------- */
80 
81 /* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
82    structure. state.ascii and state.compact are set, and the data
83    immediately follow the structure. utf8_length and wstr_length can be found
84    in the length field; the utf8 pointer is equal to the data pointer. */
85 typedef struct {
86     /* There are 4 forms of Unicode strings:
87 
88        - compact ascii:
89 
90          * structure = PyASCIIObject
91          * test: PyUnicode_IS_COMPACT_ASCII(op)
92          * kind = PyUnicode_1BYTE_KIND
93          * compact = 1
94          * ascii = 1
95          * ready = 1
96          * (length is the length of the utf8 and wstr strings)
97          * (data starts just after the structure)
98          * (since ASCII is decoded from UTF-8, the utf8 string are the data)
99 
100        - compact:
101 
102          * structure = PyCompactUnicodeObject
103          * test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op)
104          * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
105            PyUnicode_4BYTE_KIND
106          * compact = 1
107          * ready = 1
108          * ascii = 0
109          * utf8 is not shared with data
110          * utf8_length = 0 if utf8 is NULL
111          * wstr is shared with data and wstr_length=length
112            if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
113            or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4
114          * wstr_length = 0 if wstr is NULL
115          * (data starts just after the structure)
116 
117        - legacy string, not ready:
118 
119          * structure = PyUnicodeObject
120          * test: kind == PyUnicode_WCHAR_KIND
121          * length = 0 (use wstr_length)
122          * hash = -1
123          * kind = PyUnicode_WCHAR_KIND
124          * compact = 0
125          * ascii = 0
126          * ready = 0
127          * interned = SSTATE_NOT_INTERNED
128          * wstr is not NULL
129          * data.any is NULL
130          * utf8 is NULL
131          * utf8_length = 0
132 
133        - legacy string, ready:
134 
135          * structure = PyUnicodeObject structure
136          * test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND
137          * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
138            PyUnicode_4BYTE_KIND
139          * compact = 0
140          * ready = 1
141          * data.any is not NULL
142          * utf8 is shared and utf8_length = length with data.any if ascii = 1
143          * utf8_length = 0 if utf8 is NULL
144          * wstr is shared with data.any and wstr_length = length
145            if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
146            or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
147          * wstr_length = 0 if wstr is NULL
148 
149        Compact strings use only one memory block (structure + characters),
150        whereas legacy strings use one block for the structure and one block
151        for characters.
152 
153        Legacy strings are created by PyUnicode_FromUnicode() and
154        PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
155        when PyUnicode_READY() is called.
156 
157        See also _PyUnicode_CheckConsistency().
158     */
159     PyObject_HEAD
160     Py_ssize_t length;          /* Number of code points in the string */
161     Py_hash_t hash;             /* Hash value; -1 if not set */
162     struct {
163         /*
164            SSTATE_NOT_INTERNED (0)
165            SSTATE_INTERNED_MORTAL (1)
166            SSTATE_INTERNED_IMMORTAL (2)
167 
168            If interned != SSTATE_NOT_INTERNED, the two references from the
169            dictionary to this object are *not* counted in ob_refcnt.
170          */
171         unsigned int interned:2;
172         /* Character size:
173 
174            - PyUnicode_WCHAR_KIND (0):
175 
176              * character type = wchar_t (16 or 32 bits, depending on the
177                platform)
178 
179            - PyUnicode_1BYTE_KIND (1):
180 
181              * character type = Py_UCS1 (8 bits, unsigned)
182              * all characters are in the range U+0000-U+00FF (latin1)
183              * if ascii is set, all characters are in the range U+0000-U+007F
184                (ASCII), otherwise at least one character is in the range
185                U+0080-U+00FF
186 
187            - PyUnicode_2BYTE_KIND (2):
188 
189              * character type = Py_UCS2 (16 bits, unsigned)
190              * all characters are in the range U+0000-U+FFFF (BMP)
191              * at least one character is in the range U+0100-U+FFFF
192 
193            - PyUnicode_4BYTE_KIND (4):
194 
195              * character type = Py_UCS4 (32 bits, unsigned)
196              * all characters are in the range U+0000-U+10FFFF
197              * at least one character is in the range U+10000-U+10FFFF
198          */
199         unsigned int kind:3;
200         /* Compact is with respect to the allocation scheme. Compact unicode
201            objects only require one memory block while non-compact objects use
202            one block for the PyUnicodeObject struct and another for its data
203            buffer. */
204         unsigned int compact:1;
205         /* The string only contains characters in the range U+0000-U+007F (ASCII)
206            and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
207            set, use the PyASCIIObject structure. */
208         unsigned int ascii:1;
209         /* The ready flag indicates whether the object layout is initialized
210            completely. This means that this is either a compact object, or
211            the data pointer is filled out. The bit is redundant, and helps
212            to minimize the test in PyUnicode_IS_READY(). */
213         unsigned int ready:1;
214         /* Padding to ensure that PyUnicode_DATA() is always aligned to
215            4 bytes (see issue #19537 on m68k). */
216         unsigned int :24;
217     } state;
218     wchar_t *wstr;              /* wchar_t representation (null-terminated) */
219 } PyASCIIObject;
220 
221 /* Non-ASCII strings allocated through PyUnicode_New use the
222    PyCompactUnicodeObject structure. state.compact is set, and the data
223    immediately follow the structure. */
224 typedef struct {
225     PyASCIIObject _base;
226     Py_ssize_t utf8_length;     /* Number of bytes in utf8, excluding the
227                                  * terminating \0. */
228     char *utf8;                 /* UTF-8 representation (null-terminated) */
229     Py_ssize_t wstr_length;     /* Number of code points in wstr, possible
230                                  * surrogates count as two code points. */
231 } PyCompactUnicodeObject;
232 
233 /* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
234    PyUnicodeObject structure. The actual string data is initially in the wstr
235    block, and copied into the data block using _PyUnicode_Ready. */
236 typedef struct {
237     PyCompactUnicodeObject _base;
238     union {
239         void *any;
240         Py_UCS1 *latin1;
241         Py_UCS2 *ucs2;
242         Py_UCS4 *ucs4;
243     } data;                     /* Canonical, smallest-form Unicode buffer */
244 } PyUnicodeObject;
245 
246 PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
247     PyObject *op,
248     int check_content);
249 
250 /* Fast access macros */
251 
252 /* Returns the deprecated Py_UNICODE representation's size in code units
253    (this includes surrogate pairs as 2 units).
254    If the Py_UNICODE representation is not available, it will be computed
255    on request.  Use PyUnicode_GET_LENGTH() for the length in code points. */
256 
257 /* Py_DEPRECATED(3.3) */
258 #define PyUnicode_GET_SIZE(op)                       \
259     (assert(PyUnicode_Check(op)),                    \
260      (((PyASCIIObject *)(op))->wstr) ?               \
261       PyUnicode_WSTR_LENGTH(op) :                    \
262       ((void)PyUnicode_AsUnicode(_PyObject_CAST(op)),\
263        assert(((PyASCIIObject *)(op))->wstr),        \
264        PyUnicode_WSTR_LENGTH(op)))
265 
266 /* Py_DEPRECATED(3.3) */
267 #define PyUnicode_GET_DATA_SIZE(op) \
268     (PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE)
269 
270 /* Alias for PyUnicode_AsUnicode().  This will create a wchar_t/Py_UNICODE
271    representation on demand.  Using this macro is very inefficient now,
272    try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
273    use PyUnicode_WRITE() and PyUnicode_READ(). */
274 
275 /* Py_DEPRECATED(3.3) */
276 #define PyUnicode_AS_UNICODE(op) \
277     (assert(PyUnicode_Check(op)), \
278      (((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \
279       PyUnicode_AsUnicode(_PyObject_CAST(op)))
280 
281 /* Py_DEPRECATED(3.3) */
282 #define PyUnicode_AS_DATA(op) \
283     ((const char *)(PyUnicode_AS_UNICODE(op)))
284 
285 
286 /* --- Flexible String Representation Helper Macros (PEP 393) -------------- */
287 
288 /* Values for PyASCIIObject.state: */
289 
290 /* Interning state. */
291 #define SSTATE_NOT_INTERNED 0
292 #define SSTATE_INTERNED_MORTAL 1
293 #define SSTATE_INTERNED_IMMORTAL 2
294 
295 /* Return true if the string contains only ASCII characters, or 0 if not. The
296    string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be
297    ready. */
298 #define PyUnicode_IS_ASCII(op)                   \
299     (assert(PyUnicode_Check(op)),                \
300      assert(PyUnicode_IS_READY(op)),             \
301      ((PyASCIIObject*)op)->state.ascii)
302 
303 /* Return true if the string is compact or 0 if not.
304    No type checks or Ready calls are performed. */
305 #define PyUnicode_IS_COMPACT(op) \
306     (((PyASCIIObject*)(op))->state.compact)
307 
308 /* Return true if the string is a compact ASCII string (use PyASCIIObject
309    structure), or 0 if not.  No type checks or Ready calls are performed. */
310 #define PyUnicode_IS_COMPACT_ASCII(op)                 \
311     (((PyASCIIObject*)op)->state.ascii && PyUnicode_IS_COMPACT(op))
312 
313 enum PyUnicode_Kind {
314 /* String contains only wstr byte characters.  This is only possible
315    when the string was created with a legacy API and _PyUnicode_Ready()
316    has not been called yet.  */
317     PyUnicode_WCHAR_KIND = 0,
318 /* Return values of the PyUnicode_KIND() macro: */
319     PyUnicode_1BYTE_KIND = 1,
320     PyUnicode_2BYTE_KIND = 2,
321     PyUnicode_4BYTE_KIND = 4
322 };
323 
324 /* Return pointers to the canonical representation cast to unsigned char,
325    Py_UCS2, or Py_UCS4 for direct character access.
326    No checks are performed, use PyUnicode_KIND() before to ensure
327    these will work correctly. */
328 
329 #define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op))
330 #define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op))
331 #define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op))
332 
333 /* Return one of the PyUnicode_*_KIND values defined above. */
334 #define PyUnicode_KIND(op) \
335     (assert(PyUnicode_Check(op)), \
336      assert(PyUnicode_IS_READY(op)),            \
337      ((PyASCIIObject *)(op))->state.kind)
338 
339 /* Return a void pointer to the raw unicode buffer. */
340 #define _PyUnicode_COMPACT_DATA(op)                     \
341     (PyUnicode_IS_ASCII(op) ?                   \
342      ((void*)((PyASCIIObject*)(op) + 1)) :              \
343      ((void*)((PyCompactUnicodeObject*)(op) + 1)))
344 
345 #define _PyUnicode_NONCOMPACT_DATA(op)                  \
346     (assert(((PyUnicodeObject*)(op))->data.any),        \
347      ((((PyUnicodeObject *)(op))->data.any)))
348 
349 #define PyUnicode_DATA(op) \
350     (assert(PyUnicode_Check(op)), \
351      PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) :   \
352      _PyUnicode_NONCOMPACT_DATA(op))
353 
354 /* In the access macros below, "kind" may be evaluated more than once.
355    All other macro parameters are evaluated exactly once, so it is safe
356    to put side effects into them (such as increasing the index). */
357 
358 /* Write into the canonical representation, this macro does not do any sanity
359    checks and is intended for usage in loops.  The caller should cache the
360    kind and data pointers obtained from other macro calls.
361    index is the index in the string (starts at 0) and value is the new
362    code point value which should be written to that location. */
363 #define PyUnicode_WRITE(kind, data, index, value) \
364     do { \
365         switch ((kind)) { \
366         case PyUnicode_1BYTE_KIND: { \
367             ((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \
368             break; \
369         } \
370         case PyUnicode_2BYTE_KIND: { \
371             ((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \
372             break; \
373         } \
374         default: { \
375             assert((kind) == PyUnicode_4BYTE_KIND); \
376             ((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \
377         } \
378         } \
379     } while (0)
380 
381 /* Read a code point from the string's canonical representation.  No checks
382    or ready calls are performed. */
383 #define PyUnicode_READ(kind, data, index) \
384     ((Py_UCS4) \
385     ((kind) == PyUnicode_1BYTE_KIND ? \
386         ((const Py_UCS1 *)(data))[(index)] : \
387         ((kind) == PyUnicode_2BYTE_KIND ? \
388             ((const Py_UCS2 *)(data))[(index)] : \
389             ((const Py_UCS4 *)(data))[(index)] \
390         ) \
391     ))
392 
393 /* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it
394    calls PyUnicode_KIND() and might call it twice.  For single reads, use
395    PyUnicode_READ_CHAR, for multiple consecutive reads callers should
396    cache kind and use PyUnicode_READ instead. */
397 #define PyUnicode_READ_CHAR(unicode, index) \
398     (assert(PyUnicode_Check(unicode)),          \
399      assert(PyUnicode_IS_READY(unicode)),       \
400      (Py_UCS4)                                  \
401         (PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \
402             ((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \
403             (PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \
404                 ((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \
405                 ((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \
406             ) \
407         ))
408 
409 /* Returns the length of the unicode string. The caller has to make sure that
410    the string has it's canonical representation set before calling
411    this macro.  Call PyUnicode_(FAST_)Ready to ensure that. */
412 #define PyUnicode_GET_LENGTH(op)                \
413     (assert(PyUnicode_Check(op)),               \
414      assert(PyUnicode_IS_READY(op)),            \
415      ((PyASCIIObject *)(op))->length)
416 
417 
418 /* Fast check to determine whether an object is ready. Equivalent to
419    PyUnicode_IS_COMPACT(op) || ((PyUnicodeObject*)(op))->data.any */
420 
421 #define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready)
422 
423 /* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best
424    case.  If the canonical representation is not yet set, it will still call
425    _PyUnicode_Ready().
426    Returns 0 on success and -1 on errors. */
427 #define PyUnicode_READY(op)                        \
428     (assert(PyUnicode_Check(op)),                       \
429      (PyUnicode_IS_READY(op) ?                          \
430       0 : _PyUnicode_Ready(_PyObject_CAST(op))))
431 
432 /* Return a maximum character value which is suitable for creating another
433    string based on op.  This is always an approximation but more efficient
434    than iterating over the string. */
435 #define PyUnicode_MAX_CHAR_VALUE(op) \
436     (assert(PyUnicode_IS_READY(op)),                                    \
437      (PyUnicode_IS_ASCII(op) ?                                          \
438       (0x7f) :                                                          \
439       (PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ?                     \
440        (0xffU) :                                                        \
441        (PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ?                    \
442         (0xffffU) :                                                     \
443         (0x10ffffU)))))
444 
445 Py_DEPRECATED(3.3)
_PyUnicode_get_wstr_length(PyObject * op)446 static inline Py_ssize_t _PyUnicode_get_wstr_length(PyObject *op) {
447     return PyUnicode_IS_COMPACT_ASCII(op) ?
448             ((PyASCIIObject*)op)->length :
449             ((PyCompactUnicodeObject*)op)->wstr_length;
450 }
451 #define PyUnicode_WSTR_LENGTH(op) _PyUnicode_get_wstr_length((PyObject*)op)
452 
453 /* === Public API ========================================================= */
454 
455 /* --- Plain Py_UNICODE --------------------------------------------------- */
456 
457 /* With PEP 393, this is the recommended way to allocate a new unicode object.
458    This function will allocate the object and its buffer in a single memory
459    block.  Objects created using this function are not resizable. */
460 PyAPI_FUNC(PyObject*) PyUnicode_New(
461     Py_ssize_t size,            /* Number of code points in the new string */
462     Py_UCS4 maxchar             /* maximum code point value in the string */
463     );
464 
465 /* Initializes the canonical string representation from the deprecated
466    wstr/Py_UNICODE representation. This function is used to convert Unicode
467    objects which were created using the old API to the new flexible format
468    introduced with PEP 393.
469 
470    Don't call this function directly, use the public PyUnicode_READY() macro
471    instead. */
472 PyAPI_FUNC(int) _PyUnicode_Ready(
473     PyObject *unicode           /* Unicode object */
474     );
475 
476 /* Get a copy of a Unicode string. */
477 PyAPI_FUNC(PyObject*) _PyUnicode_Copy(
478     PyObject *unicode
479     );
480 
481 /* Copy character from one unicode object into another, this function performs
482    character conversion when necessary and falls back to memcpy() if possible.
483 
484    Fail if to is too small (smaller than *how_many* or smaller than
485    len(from)-from_start), or if kind(from[from_start:from_start+how_many]) >
486    kind(to), or if *to* has more than 1 reference.
487 
488    Return the number of written character, or return -1 and raise an exception
489    on error.
490 
491    Pseudo-code:
492 
493        how_many = min(how_many, len(from) - from_start)
494        to[to_start:to_start+how_many] = from[from_start:from_start+how_many]
495        return how_many
496 
497    Note: The function doesn't write a terminating null character.
498    */
499 PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
500     PyObject *to,
501     Py_ssize_t to_start,
502     PyObject *from,
503     Py_ssize_t from_start,
504     Py_ssize_t how_many
505     );
506 
507 /* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so
508    may crash if parameters are invalid (e.g. if the output string
509    is too short). */
510 PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters(
511     PyObject *to,
512     Py_ssize_t to_start,
513     PyObject *from,
514     Py_ssize_t from_start,
515     Py_ssize_t how_many
516     );
517 
518 /* Fill a string with a character: write fill_char into
519    unicode[start:start+length].
520 
521    Fail if fill_char is bigger than the string maximum character, or if the
522    string has more than 1 reference.
523 
524    Return the number of written character, or return -1 and raise an exception
525    on error. */
526 PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill(
527     PyObject *unicode,
528     Py_ssize_t start,
529     Py_ssize_t length,
530     Py_UCS4 fill_char
531     );
532 
533 /* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash
534    if parameters are invalid (e.g. if length is longer than the string). */
535 PyAPI_FUNC(void) _PyUnicode_FastFill(
536     PyObject *unicode,
537     Py_ssize_t start,
538     Py_ssize_t length,
539     Py_UCS4 fill_char
540     );
541 
542 /* Create a Unicode Object from the Py_UNICODE buffer u of the given
543    size.
544 
545    u may be NULL which causes the contents to be undefined. It is the
546    user's responsibility to fill in the needed data afterwards. Note
547    that modifying the Unicode object contents after construction is
548    only allowed if u was set to NULL.
549 
550    The buffer is copied into the new object. */
551 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
552     const Py_UNICODE *u,        /* Unicode buffer */
553     Py_ssize_t size             /* size of buffer */
554     );
555 
556 /* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters.
557    Scan the string to find the maximum character. */
558 PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
559     int kind,
560     const void *buffer,
561     Py_ssize_t size);
562 
563 /* Create a new string from a buffer of ASCII characters.
564    WARNING: Don't check if the string contains any non-ASCII character. */
565 PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII(
566     const char *buffer,
567     Py_ssize_t size);
568 
569 /* Compute the maximum character of the substring unicode[start:end].
570    Return 127 for an empty string. */
571 PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
572     PyObject *unicode,
573     Py_ssize_t start,
574     Py_ssize_t end);
575 
576 /* Return a read-only pointer to the Unicode object's internal
577    Py_UNICODE buffer.
578    If the wchar_t/Py_UNICODE representation is not yet available, this
579    function will calculate it. */
580 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
581     PyObject *unicode           /* Unicode object */
582     );
583 
584 /* Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string
585    contains null characters. */
586 PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
587     PyObject *unicode           /* Unicode object */
588     );
589 
590 /* Return a read-only pointer to the Unicode object's internal
591    Py_UNICODE buffer and save the length at size.
592    If the wchar_t/Py_UNICODE representation is not yet available, this
593    function will calculate it. */
594 
595 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
596     PyObject *unicode,          /* Unicode object */
597     Py_ssize_t *size            /* location where to save the length */
598     );
599 
600 
601 /* --- _PyUnicodeWriter API ----------------------------------------------- */
602 
603 typedef struct {
604     PyObject *buffer;
605     void *data;
606     enum PyUnicode_Kind kind;
607     Py_UCS4 maxchar;
608     Py_ssize_t size;
609     Py_ssize_t pos;
610 
611     /* minimum number of allocated characters (default: 0) */
612     Py_ssize_t min_length;
613 
614     /* minimum character (default: 127, ASCII) */
615     Py_UCS4 min_char;
616 
617     /* If non-zero, overallocate the buffer (default: 0). */
618     unsigned char overallocate;
619 
620     /* If readonly is 1, buffer is a shared string (cannot be modified)
621        and size is set to 0. */
622     unsigned char readonly;
623 } _PyUnicodeWriter ;
624 
625 /* Initialize a Unicode writer.
626  *
627  * By default, the minimum buffer size is 0 character and overallocation is
628  * disabled. Set min_length, min_char and overallocate attributes to control
629  * the allocation of the buffer. */
630 PyAPI_FUNC(void)
631 _PyUnicodeWriter_Init(_PyUnicodeWriter *writer);
632 
633 /* Prepare the buffer to write 'length' characters
634    with the specified maximum character.
635 
636    Return 0 on success, raise an exception and return -1 on error. */
637 #define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR)             \
638     (((MAXCHAR) <= (WRITER)->maxchar                                  \
639       && (LENGTH) <= (WRITER)->size - (WRITER)->pos)                  \
640      ? 0                                                              \
641      : (((LENGTH) == 0)                                               \
642         ? 0                                                           \
643         : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
644 
645 /* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
646    instead. */
647 PyAPI_FUNC(int)
648 _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
649                                  Py_ssize_t length, Py_UCS4 maxchar);
650 
651 /* Prepare the buffer to have at least the kind KIND.
652    For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
653    support characters in range U+000-U+FFFF.
654 
655    Return 0 on success, raise an exception and return -1 on error. */
656 #define _PyUnicodeWriter_PrepareKind(WRITER, KIND)                    \
657     (assert((KIND) != PyUnicode_WCHAR_KIND),                          \
658      (KIND) <= (WRITER)->kind                                         \
659      ? 0                                                              \
660      : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
661 
662 /* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
663    macro instead. */
664 PyAPI_FUNC(int)
665 _PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
666                                      enum PyUnicode_Kind kind);
667 
668 /* Append a Unicode character.
669    Return 0 on success, raise an exception and return -1 on error. */
670 PyAPI_FUNC(int)
671 _PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
672     Py_UCS4 ch
673     );
674 
675 /* Append a Unicode string.
676    Return 0 on success, raise an exception and return -1 on error. */
677 PyAPI_FUNC(int)
678 _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
679     PyObject *str               /* Unicode string */
680     );
681 
682 /* Append a substring of a Unicode string.
683    Return 0 on success, raise an exception and return -1 on error. */
684 PyAPI_FUNC(int)
685 _PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
686     PyObject *str,              /* Unicode string */
687     Py_ssize_t start,
688     Py_ssize_t end
689     );
690 
691 /* Append an ASCII-encoded byte string.
692    Return 0 on success, raise an exception and return -1 on error. */
693 PyAPI_FUNC(int)
694 _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
695     const char *str,           /* ASCII-encoded byte string */
696     Py_ssize_t len             /* number of bytes, or -1 if unknown */
697     );
698 
699 /* Append a latin1-encoded byte string.
700    Return 0 on success, raise an exception and return -1 on error. */
701 PyAPI_FUNC(int)
702 _PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
703     const char *str,           /* latin1-encoded byte string */
704     Py_ssize_t len             /* length in bytes */
705     );
706 
707 /* Get the value of the writer as a Unicode string. Clear the
708    buffer of the writer. Raise an exception and return NULL
709    on error. */
710 PyAPI_FUNC(PyObject *)
711 _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
712 
713 /* Deallocate memory of a writer (clear its internal buffer). */
714 PyAPI_FUNC(void)
715 _PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
716 
717 
718 /* Format the object based on the format_spec, as defined in PEP 3101
719    (Advanced String Formatting). */
720 PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
721     _PyUnicodeWriter *writer,
722     PyObject *obj,
723     PyObject *format_spec,
724     Py_ssize_t start,
725     Py_ssize_t end);
726 
727 /* --- Manage the default encoding ---------------------------------------- */
728 
729 /* Returns a pointer to the default encoding (UTF-8) of the
730    Unicode object unicode.
731 
732    Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation
733    in the unicodeobject.
734 
735    _PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to
736    support the previous internal function with the same behaviour.
737 
738    Use of this API is DEPRECATED since no size information can be
739    extracted from the returned data.
740 */
741 
742 PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode);
743 
744 #define _PyUnicode_AsString PyUnicode_AsUTF8
745 
746 /* --- Generic Codecs ----------------------------------------------------- */
747 
748 /* Encodes a Py_UNICODE buffer of the given size and returns a
749    Python string object. */
750 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_Encode(
751     const Py_UNICODE *s,        /* Unicode char buffer */
752     Py_ssize_t size,            /* number of Py_UNICODE chars to encode */
753     const char *encoding,       /* encoding */
754     const char *errors          /* error handling */
755     );
756 
757 /* --- UTF-7 Codecs ------------------------------------------------------- */
758 
759 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
760     const Py_UNICODE *data,     /* Unicode char buffer */
761     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
762     int base64SetO,             /* Encode RFC2152 Set O characters in base64 */
763     int base64WhiteSpace,       /* Encode whitespace (sp, ht, nl, cr) in base64 */
764     const char *errors          /* error handling */
765     );
766 
767 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7(
768     PyObject *unicode,          /* Unicode object */
769     int base64SetO,             /* Encode RFC2152 Set O characters in base64 */
770     int base64WhiteSpace,       /* Encode whitespace (sp, ht, nl, cr) in base64 */
771     const char *errors          /* error handling */
772     );
773 
774 /* --- UTF-8 Codecs ------------------------------------------------------- */
775 
776 PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
777     PyObject *unicode,
778     const char *errors);
779 
780 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
781     const Py_UNICODE *data,     /* Unicode char buffer */
782     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
783     const char *errors          /* error handling */
784     );
785 
786 /* --- UTF-32 Codecs ------------------------------------------------------ */
787 
788 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
789     const Py_UNICODE *data,     /* Unicode char buffer */
790     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
791     const char *errors,         /* error handling */
792     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
793     );
794 
795 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32(
796     PyObject *object,           /* Unicode object */
797     const char *errors,         /* error handling */
798     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
799     );
800 
801 /* --- UTF-16 Codecs ------------------------------------------------------ */
802 
803 /* Returns a Python string object holding the UTF-16 encoded value of
804    the Unicode data.
805 
806    If byteorder is not 0, output is written according to the following
807    byte order:
808 
809    byteorder == -1: little endian
810    byteorder == 0:  native byte order (writes a BOM mark)
811    byteorder == 1:  big endian
812 
813    If byteorder is 0, the output string will always start with the
814    Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
815    prepended.
816 
817    Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
818    UCS-2. This trick makes it possible to add full UTF-16 capabilities
819    at a later point without compromising the APIs.
820 
821 */
822 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
823     const Py_UNICODE *data,     /* Unicode char buffer */
824     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
825     const char *errors,         /* error handling */
826     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
827     );
828 
829 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
830     PyObject* unicode,          /* Unicode object */
831     const char *errors,         /* error handling */
832     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
833     );
834 
835 /* --- Unicode-Escape Codecs ---------------------------------------------- */
836 
837 /* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
838    chars. */
839 PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscape(
840         const char *string,     /* Unicode-Escape encoded string */
841         Py_ssize_t length,      /* size of string */
842         const char *errors,     /* error handling */
843         const char **first_invalid_escape  /* on return, points to first
844                                               invalid escaped char in
845                                               string. */
846 );
847 
848 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
849     const Py_UNICODE *data,     /* Unicode char buffer */
850     Py_ssize_t length           /* Number of Py_UNICODE chars to encode */
851     );
852 
853 /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
854 
855 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
856     const Py_UNICODE *data,     /* Unicode char buffer */
857     Py_ssize_t length           /* Number of Py_UNICODE chars to encode */
858     );
859 
860 /* --- Latin-1 Codecs ----------------------------------------------------- */
861 
862 PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
863     PyObject* unicode,
864     const char* errors);
865 
866 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
867     const Py_UNICODE *data,     /* Unicode char buffer */
868     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
869     const char *errors          /* error handling */
870     );
871 
872 /* --- ASCII Codecs ------------------------------------------------------- */
873 
874 PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString(
875     PyObject* unicode,
876     const char* errors);
877 
878 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
879     const Py_UNICODE *data,     /* Unicode char buffer */
880     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
881     const char *errors          /* error handling */
882     );
883 
884 /* --- Character Map Codecs ----------------------------------------------- */
885 
886 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
887     const Py_UNICODE *data,     /* Unicode char buffer */
888     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
889     PyObject *mapping,          /* encoding mapping */
890     const char *errors          /* error handling */
891     );
892 
893 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap(
894     PyObject *unicode,          /* Unicode object */
895     PyObject *mapping,          /* encoding mapping */
896     const char *errors          /* error handling */
897     );
898 
899 /* Translate a Py_UNICODE buffer of the given length by applying a
900    character mapping table to it and return the resulting Unicode
901    object.
902 
903    The mapping table must map Unicode ordinal integers to Unicode strings,
904    Unicode ordinal integers or None (causing deletion of the character).
905 
906    Mapping tables may be dictionaries or sequences. Unmapped character
907    ordinals (ones which cause a LookupError) are left untouched and
908    are copied as-is.
909 
910 */
911 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
912     const Py_UNICODE *data,     /* Unicode char buffer */
913     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
914     PyObject *table,            /* Translate table */
915     const char *errors          /* error handling */
916     );
917 
918 /* --- MBCS codecs for Windows -------------------------------------------- */
919 
920 #ifdef MS_WINDOWS
921 Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
922     const Py_UNICODE *data,     /* Unicode char buffer */
923     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
924     const char *errors          /* error handling */
925     );
926 #endif
927 
928 /* --- Decimal Encoder ---------------------------------------------------- */
929 
930 /* Takes a Unicode string holding a decimal value and writes it into
931    an output buffer using standard ASCII digit codes.
932 
933    The output buffer has to provide at least length+1 bytes of storage
934    area. The output string is 0-terminated.
935 
936    The encoder converts whitespace to ' ', decimal characters to their
937    corresponding ASCII digit and all other Latin-1 characters except
938    \0 as-is. Characters outside this range (Unicode ordinals 1-256)
939    are treated as errors. This includes embedded NULL bytes.
940 
941    Error handling is defined by the errors argument:
942 
943       NULL or "strict": raise a ValueError
944       "ignore": ignore the wrong characters (these are not copied to the
945                 output buffer)
946       "replace": replaces illegal characters with '?'
947 
948    Returns 0 on success, -1 on failure.
949 
950 */
951 
952 Py_DEPRECATED(3.3) PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
953     Py_UNICODE *s,              /* Unicode buffer */
954     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
955     char *output,               /* Output buffer; must have size >= length */
956     const char *errors          /* error handling */
957     );
958 
959 /* Transforms code points that have decimal digit property to the
960    corresponding ASCII digit code points.
961 
962    Returns a new Unicode string on success, NULL on failure.
963 */
964 
965 Py_DEPRECATED(3.3)
966 PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII(
967     Py_UNICODE *s,              /* Unicode buffer */
968     Py_ssize_t length           /* Number of Py_UNICODE chars to transform */
969     );
970 
971 /* Coverts a Unicode object holding a decimal value to an ASCII string
972    for using in int, float and complex parsers.
973    Transforms code points that have decimal digit property to the
974    corresponding ASCII digit code points.  Transforms spaces to ASCII.
975    Transforms code points starting from the first non-ASCII code point that
976    is neither a decimal digit nor a space to the end into '?'. */
977 
978 PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
979     PyObject *unicode           /* Unicode object */
980     );
981 
982 /* --- Methods & Slots ---------------------------------------------------- */
983 
984 PyAPI_FUNC(PyObject *) _PyUnicode_JoinArray(
985     PyObject *separator,
986     PyObject *const *items,
987     Py_ssize_t seqlen
988     );
989 
990 /* Test whether a unicode is equal to ASCII identifier.  Return 1 if true,
991    0 otherwise.  The right argument must be ASCII identifier.
992    Any error occurs inside will be cleared before return. */
993 PyAPI_FUNC(int) _PyUnicode_EqualToASCIIId(
994     PyObject *left,             /* Left string */
995     _Py_Identifier *right       /* Right identifier */
996     );
997 
998 /* Test whether a unicode is equal to ASCII string.  Return 1 if true,
999    0 otherwise.  The right argument must be ASCII-encoded string.
1000    Any error occurs inside will be cleared before return. */
1001 PyAPI_FUNC(int) _PyUnicode_EqualToASCIIString(
1002     PyObject *left,
1003     const char *right           /* ASCII-encoded string */
1004     );
1005 
1006 /* Externally visible for str.strip(unicode) */
1007 PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
1008     PyObject *self,
1009     int striptype,
1010     PyObject *sepobj
1011     );
1012 
1013 /* Using explicit passed-in values, insert the thousands grouping
1014    into the string pointed to by buffer.  For the argument descriptions,
1015    see Objects/stringlib/localeutil.h */
1016 PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
1017     _PyUnicodeWriter *writer,
1018     Py_ssize_t n_buffer,
1019     PyObject *digits,
1020     Py_ssize_t d_pos,
1021     Py_ssize_t n_digits,
1022     Py_ssize_t min_width,
1023     const char *grouping,
1024     PyObject *thousands_sep,
1025     Py_UCS4 *maxchar);
1026 
1027 /* === Characters Type APIs =============================================== */
1028 
1029 /* Helper array used by Py_UNICODE_ISSPACE(). */
1030 
1031 PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
1032 
1033 /* These should not be used directly. Use the Py_UNICODE_IS* and
1034    Py_UNICODE_TO* macros instead.
1035 
1036    These APIs are implemented in Objects/unicodectype.c.
1037 
1038 */
1039 
1040 PyAPI_FUNC(int) _PyUnicode_IsLowercase(
1041     Py_UCS4 ch       /* Unicode character */
1042     );
1043 
1044 PyAPI_FUNC(int) _PyUnicode_IsUppercase(
1045     Py_UCS4 ch       /* Unicode character */
1046     );
1047 
1048 PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
1049     Py_UCS4 ch       /* Unicode character */
1050     );
1051 
1052 PyAPI_FUNC(int) _PyUnicode_IsXidStart(
1053     Py_UCS4 ch       /* Unicode character */
1054     );
1055 
1056 PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
1057     Py_UCS4 ch       /* Unicode character */
1058     );
1059 
1060 PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
1061     const Py_UCS4 ch         /* Unicode character */
1062     );
1063 
1064 PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
1065     const Py_UCS4 ch         /* Unicode character */
1066     );
1067 
1068 /* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
1069     Py_UCS4 ch       /* Unicode character */
1070     );
1071 
1072 /* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
1073     Py_UCS4 ch       /* Unicode character */
1074     );
1075 
1076 Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
1077     Py_UCS4 ch       /* Unicode character */
1078     );
1079 
1080 PyAPI_FUNC(int) _PyUnicode_ToLowerFull(
1081     Py_UCS4 ch,       /* Unicode character */
1082     Py_UCS4 *res
1083     );
1084 
1085 PyAPI_FUNC(int) _PyUnicode_ToTitleFull(
1086     Py_UCS4 ch,       /* Unicode character */
1087     Py_UCS4 *res
1088     );
1089 
1090 PyAPI_FUNC(int) _PyUnicode_ToUpperFull(
1091     Py_UCS4 ch,       /* Unicode character */
1092     Py_UCS4 *res
1093     );
1094 
1095 PyAPI_FUNC(int) _PyUnicode_ToFoldedFull(
1096     Py_UCS4 ch,       /* Unicode character */
1097     Py_UCS4 *res
1098     );
1099 
1100 PyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable(
1101     Py_UCS4 ch         /* Unicode character */
1102     );
1103 
1104 PyAPI_FUNC(int) _PyUnicode_IsCased(
1105     Py_UCS4 ch         /* Unicode character */
1106     );
1107 
1108 PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
1109     Py_UCS4 ch       /* Unicode character */
1110     );
1111 
1112 PyAPI_FUNC(int) _PyUnicode_ToDigit(
1113     Py_UCS4 ch       /* Unicode character */
1114     );
1115 
1116 PyAPI_FUNC(double) _PyUnicode_ToNumeric(
1117     Py_UCS4 ch       /* Unicode character */
1118     );
1119 
1120 PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
1121     Py_UCS4 ch       /* Unicode character */
1122     );
1123 
1124 PyAPI_FUNC(int) _PyUnicode_IsDigit(
1125     Py_UCS4 ch       /* Unicode character */
1126     );
1127 
1128 PyAPI_FUNC(int) _PyUnicode_IsNumeric(
1129     Py_UCS4 ch       /* Unicode character */
1130     );
1131 
1132 PyAPI_FUNC(int) _PyUnicode_IsPrintable(
1133     Py_UCS4 ch       /* Unicode character */
1134     );
1135 
1136 PyAPI_FUNC(int) _PyUnicode_IsAlpha(
1137     Py_UCS4 ch       /* Unicode character */
1138     );
1139 
1140 PyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject *, int, int, int);
1141 
1142 /* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/
1143 PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*);
1144 
1145 /* Fast equality check when the inputs are known to be exact unicode types
1146    and where the hash values are equal (i.e. a very probable match) */
1147 PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *);
1148 
1149 PyAPI_FUNC(int) _PyUnicode_WideCharString_Converter(PyObject *, void *);
1150 PyAPI_FUNC(int) _PyUnicode_WideCharString_Opt_Converter(PyObject *, void *);
1151 
1152 PyAPI_FUNC(Py_ssize_t) _PyUnicode_ScanIdentifier(PyObject *);
1153