1 /* ------------------------------------------------------------------------
2 
3    _codecs -- Provides access to the codec registry and the builtin
4               codecs.
5 
6    This module should never be imported directly. The standard library
7    module "codecs" wraps this builtin module for use within Python.
8 
9    The codec registry is accessible via:
10 
11      register(search_function) -> None
12 
13      lookup(encoding) -> CodecInfo object
14 
15    The builtin Unicode codecs use the following interface:
16 
17      <encoding>_encode(Unicode_object[,errors='strict']) ->
18         (string object, bytes consumed)
19 
20      <encoding>_decode(char_buffer_obj[,errors='strict']) ->
21         (Unicode object, bytes consumed)
22 
23    These <encoding>s are available: utf_8, unicode_escape,
24    raw_unicode_escape, latin_1, ascii (7-bit), mbcs (on win32).
25 
26 
27 Written by Marc-Andre Lemburg (mal@lemburg.com).
28 
29 Copyright (c) Corporation for National Research Initiatives.
30 
31    ------------------------------------------------------------------------ */
32 
33 #define PY_SSIZE_T_CLEAN
34 #include "Python.h"
35 
36 #ifdef MS_WINDOWS
37 #include <windows.h>
38 #endif
39 
40 /*[clinic input]
41 module _codecs
42 [clinic start generated code]*/
43 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/
44 
45 #include "clinic/_codecsmodule.c.h"
46 
47 /* --- Registry ----------------------------------------------------------- */
48 
49 /*[clinic input]
50 _codecs.register
51     search_function: object
52     /
53 
54 Register a codec search function.
55 
56 Search functions are expected to take one argument, the encoding name in
57 all lower case letters, and either return None, or a tuple of functions
58 (encoder, decoder, stream_reader, stream_writer) (or a CodecInfo object).
59 [clinic start generated code]*/
60 
61 static PyObject *
_codecs_register(PyObject * module,PyObject * search_function)62 _codecs_register(PyObject *module, PyObject *search_function)
63 /*[clinic end generated code: output=d1bf21e99db7d6d3 input=369578467955cae4]*/
64 {
65     if (PyCodec_Register(search_function))
66         return NULL;
67 
68     Py_RETURN_NONE;
69 }
70 
71 /*[clinic input]
72 _codecs.unregister
73     search_function: object
74     /
75 
76 Unregister a codec search function and clear the registry's cache.
77 
78 If the search function is not registered, do nothing.
79 [clinic start generated code]*/
80 
81 static PyObject *
_codecs_unregister(PyObject * module,PyObject * search_function)82 _codecs_unregister(PyObject *module, PyObject *search_function)
83 /*[clinic end generated code: output=1f0edee9cf246399 input=dd7c004c652d345e]*/
84 {
85     if (PyCodec_Unregister(search_function) < 0) {
86         return NULL;
87     }
88 
89     Py_RETURN_NONE;
90 }
91 
92 /*[clinic input]
93 _codecs.lookup
94     encoding: str
95     /
96 
97 Looks up a codec tuple in the Python codec registry and returns a CodecInfo object.
98 [clinic start generated code]*/
99 
100 static PyObject *
_codecs_lookup_impl(PyObject * module,const char * encoding)101 _codecs_lookup_impl(PyObject *module, const char *encoding)
102 /*[clinic end generated code: output=9f0afa572080c36d input=3c572c0db3febe9c]*/
103 {
104     return _PyCodec_Lookup(encoding);
105 }
106 
107 /*[clinic input]
108 _codecs.encode
109     obj: object
110     encoding: str(c_default="NULL") = "utf-8"
111     errors: str(c_default="NULL") = "strict"
112 
113 Encodes obj using the codec registered for encoding.
114 
115 The default encoding is 'utf-8'.  errors may be given to set a
116 different error handling scheme.  Default is 'strict' meaning that encoding
117 errors raise a ValueError.  Other possible values are 'ignore', 'replace'
118 and 'backslashreplace' as well as any other name registered with
119 codecs.register_error that can handle ValueErrors.
120 [clinic start generated code]*/
121 
122 static PyObject *
_codecs_encode_impl(PyObject * module,PyObject * obj,const char * encoding,const char * errors)123 _codecs_encode_impl(PyObject *module, PyObject *obj, const char *encoding,
124                     const char *errors)
125 /*[clinic end generated code: output=385148eb9a067c86 input=cd5b685040ff61f0]*/
126 {
127     if (encoding == NULL)
128         encoding = PyUnicode_GetDefaultEncoding();
129 
130     /* Encode via the codec registry */
131     return PyCodec_Encode(obj, encoding, errors);
132 }
133 
134 /*[clinic input]
135 _codecs.decode
136     obj: object
137     encoding: str(c_default="NULL") = "utf-8"
138     errors: str(c_default="NULL") = "strict"
139 
140 Decodes obj using the codec registered for encoding.
141 
142 Default encoding is 'utf-8'.  errors may be given to set a
143 different error handling scheme.  Default is 'strict' meaning that encoding
144 errors raise a ValueError.  Other possible values are 'ignore', 'replace'
145 and 'backslashreplace' as well as any other name registered with
146 codecs.register_error that can handle ValueErrors.
147 [clinic start generated code]*/
148 
149 static PyObject *
_codecs_decode_impl(PyObject * module,PyObject * obj,const char * encoding,const char * errors)150 _codecs_decode_impl(PyObject *module, PyObject *obj, const char *encoding,
151                     const char *errors)
152 /*[clinic end generated code: output=679882417dc3a0bd input=7702c0cc2fa1add6]*/
153 {
154     if (encoding == NULL)
155         encoding = PyUnicode_GetDefaultEncoding();
156 
157     /* Decode via the codec registry */
158     return PyCodec_Decode(obj, encoding, errors);
159 }
160 
161 /* --- Helpers ------------------------------------------------------------ */
162 
163 static
codec_tuple(PyObject * decoded,Py_ssize_t len)164 PyObject *codec_tuple(PyObject *decoded,
165                       Py_ssize_t len)
166 {
167     if (decoded == NULL)
168         return NULL;
169     return Py_BuildValue("Nn", decoded, len);
170 }
171 
172 /* --- String codecs ------------------------------------------------------ */
173 /*[clinic input]
174 _codecs.escape_decode
175     data: Py_buffer(accept={str, buffer})
176     errors: str(accept={str, NoneType}) = None
177     /
178 [clinic start generated code]*/
179 
180 static PyObject *
_codecs_escape_decode_impl(PyObject * module,Py_buffer * data,const char * errors)181 _codecs_escape_decode_impl(PyObject *module, Py_buffer *data,
182                            const char *errors)
183 /*[clinic end generated code: output=505200ba8056979a input=77298a561c90bd82]*/
184 {
185     PyObject *decoded = PyBytes_DecodeEscape(data->buf, data->len,
186                                              errors, 0, NULL);
187     return codec_tuple(decoded, data->len);
188 }
189 
190 /*[clinic input]
191 _codecs.escape_encode
192     data: object(subclass_of='&PyBytes_Type')
193     errors: str(accept={str, NoneType}) = None
194     /
195 [clinic start generated code]*/
196 
197 static PyObject *
_codecs_escape_encode_impl(PyObject * module,PyObject * data,const char * errors)198 _codecs_escape_encode_impl(PyObject *module, PyObject *data,
199                            const char *errors)
200 /*[clinic end generated code: output=4af1d477834bab34 input=8f4b144799a94245]*/
201 {
202     Py_ssize_t size;
203     Py_ssize_t newsize;
204     PyObject *v;
205 
206     size = PyBytes_GET_SIZE(data);
207     if (size > PY_SSIZE_T_MAX / 4) {
208         PyErr_SetString(PyExc_OverflowError,
209             "string is too large to encode");
210             return NULL;
211     }
212     newsize = 4*size;
213     v = PyBytes_FromStringAndSize(NULL, newsize);
214 
215     if (v == NULL) {
216         return NULL;
217     }
218     else {
219         Py_ssize_t i;
220         char c;
221         char *p = PyBytes_AS_STRING(v);
222 
223         for (i = 0; i < size; i++) {
224             /* There's at least enough room for a hex escape */
225             assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
226             c = PyBytes_AS_STRING(data)[i];
227             if (c == '\'' || c == '\\')
228                 *p++ = '\\', *p++ = c;
229             else if (c == '\t')
230                 *p++ = '\\', *p++ = 't';
231             else if (c == '\n')
232                 *p++ = '\\', *p++ = 'n';
233             else if (c == '\r')
234                 *p++ = '\\', *p++ = 'r';
235             else if (c < ' ' || c >= 0x7f) {
236                 *p++ = '\\';
237                 *p++ = 'x';
238                 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
239                 *p++ = Py_hexdigits[c & 0xf];
240             }
241             else
242                 *p++ = c;
243         }
244         *p = '\0';
245         if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
246             return NULL;
247         }
248     }
249 
250     return codec_tuple(v, size);
251 }
252 
253 /* --- Decoder ------------------------------------------------------------ */
254 /*[clinic input]
255 _codecs.utf_7_decode
256     data: Py_buffer
257     errors: str(accept={str, NoneType}) = None
258     final: bool(accept={int}) = False
259     /
260 [clinic start generated code]*/
261 
262 static PyObject *
_codecs_utf_7_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)263 _codecs_utf_7_decode_impl(PyObject *module, Py_buffer *data,
264                           const char *errors, int final)
265 /*[clinic end generated code: output=0cd3a944a32a4089 input=22c395d357815d26]*/
266 {
267     Py_ssize_t consumed = data->len;
268     PyObject *decoded = PyUnicode_DecodeUTF7Stateful(data->buf, data->len,
269                                                      errors,
270                                                      final ? NULL : &consumed);
271     return codec_tuple(decoded, consumed);
272 }
273 
274 /*[clinic input]
275 _codecs.utf_8_decode
276     data: Py_buffer
277     errors: str(accept={str, NoneType}) = None
278     final: bool(accept={int}) = False
279     /
280 [clinic start generated code]*/
281 
282 static PyObject *
_codecs_utf_8_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)283 _codecs_utf_8_decode_impl(PyObject *module, Py_buffer *data,
284                           const char *errors, int final)
285 /*[clinic end generated code: output=10f74dec8d9bb8bf input=f611b3867352ba59]*/
286 {
287     Py_ssize_t consumed = data->len;
288     PyObject *decoded = PyUnicode_DecodeUTF8Stateful(data->buf, data->len,
289                                                      errors,
290                                                      final ? NULL : &consumed);
291     return codec_tuple(decoded, consumed);
292 }
293 
294 /*[clinic input]
295 _codecs.utf_16_decode
296     data: Py_buffer
297     errors: str(accept={str, NoneType}) = None
298     final: bool(accept={int}) = False
299     /
300 [clinic start generated code]*/
301 
302 static PyObject *
_codecs_utf_16_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)303 _codecs_utf_16_decode_impl(PyObject *module, Py_buffer *data,
304                            const char *errors, int final)
305 /*[clinic end generated code: output=783b442abcbcc2d0 input=191d360bd7309180]*/
306 {
307     int byteorder = 0;
308     /* This is overwritten unless final is true. */
309     Py_ssize_t consumed = data->len;
310     PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
311                                                       errors, &byteorder,
312                                                       final ? NULL : &consumed);
313     return codec_tuple(decoded, consumed);
314 }
315 
316 /*[clinic input]
317 _codecs.utf_16_le_decode
318     data: Py_buffer
319     errors: str(accept={str, NoneType}) = None
320     final: bool(accept={int}) = False
321     /
322 [clinic start generated code]*/
323 
324 static PyObject *
_codecs_utf_16_le_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)325 _codecs_utf_16_le_decode_impl(PyObject *module, Py_buffer *data,
326                               const char *errors, int final)
327 /*[clinic end generated code: output=899b9e6364379dcd input=c6904fdc27fb4724]*/
328 {
329     int byteorder = -1;
330     /* This is overwritten unless final is true. */
331     Py_ssize_t consumed = data->len;
332     PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
333                                                       errors, &byteorder,
334                                                       final ? NULL : &consumed);
335     return codec_tuple(decoded, consumed);
336 }
337 
338 /*[clinic input]
339 _codecs.utf_16_be_decode
340     data: Py_buffer
341     errors: str(accept={str, NoneType}) = None
342     final: bool(accept={int}) = False
343     /
344 [clinic start generated code]*/
345 
346 static PyObject *
_codecs_utf_16_be_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)347 _codecs_utf_16_be_decode_impl(PyObject *module, Py_buffer *data,
348                               const char *errors, int final)
349 /*[clinic end generated code: output=49f6465ea07669c8 input=e49012400974649b]*/
350 {
351     int byteorder = 1;
352     /* This is overwritten unless final is true. */
353     Py_ssize_t consumed = data->len;
354     PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
355                                                       errors, &byteorder,
356                                                       final ? NULL : &consumed);
357     return codec_tuple(decoded, consumed);
358 }
359 
360 /* This non-standard version also provides access to the byteorder
361    parameter of the builtin UTF-16 codec.
362 
363    It returns a tuple (unicode, bytesread, byteorder) with byteorder
364    being the value in effect at the end of data.
365 
366 */
367 /*[clinic input]
368 _codecs.utf_16_ex_decode
369     data: Py_buffer
370     errors: str(accept={str, NoneType}) = None
371     byteorder: int = 0
372     final: bool(accept={int}) = False
373     /
374 [clinic start generated code]*/
375 
376 static PyObject *
_codecs_utf_16_ex_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int byteorder,int final)377 _codecs_utf_16_ex_decode_impl(PyObject *module, Py_buffer *data,
378                               const char *errors, int byteorder, int final)
379 /*[clinic end generated code: output=0f385f251ecc1988 input=5a9c19f2e6b6cf0e]*/
380 {
381     /* This is overwritten unless final is true. */
382     Py_ssize_t consumed = data->len;
383 
384     PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
385                                                       errors, &byteorder,
386                                                       final ? NULL : &consumed);
387     if (decoded == NULL)
388         return NULL;
389     return Py_BuildValue("Nni", decoded, consumed, byteorder);
390 }
391 
392 /*[clinic input]
393 _codecs.utf_32_decode
394     data: Py_buffer
395     errors: str(accept={str, NoneType}) = None
396     final: bool(accept={int}) = False
397     /
398 [clinic start generated code]*/
399 
400 static PyObject *
_codecs_utf_32_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)401 _codecs_utf_32_decode_impl(PyObject *module, Py_buffer *data,
402                            const char *errors, int final)
403 /*[clinic end generated code: output=2fc961807f7b145f input=fd7193965627eb58]*/
404 {
405     int byteorder = 0;
406     /* This is overwritten unless final is true. */
407     Py_ssize_t consumed = data->len;
408     PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
409                                                       errors, &byteorder,
410                                                       final ? NULL : &consumed);
411     return codec_tuple(decoded, consumed);
412 }
413 
414 /*[clinic input]
415 _codecs.utf_32_le_decode
416     data: Py_buffer
417     errors: str(accept={str, NoneType}) = None
418     final: bool(accept={int}) = False
419     /
420 [clinic start generated code]*/
421 
422 static PyObject *
_codecs_utf_32_le_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)423 _codecs_utf_32_le_decode_impl(PyObject *module, Py_buffer *data,
424                               const char *errors, int final)
425 /*[clinic end generated code: output=ec8f46b67a94f3e6 input=9078ec70acfe7613]*/
426 {
427     int byteorder = -1;
428     /* This is overwritten unless final is true. */
429     Py_ssize_t consumed = data->len;
430     PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
431                                                       errors, &byteorder,
432                                                       final ? NULL : &consumed);
433     return codec_tuple(decoded, consumed);
434 }
435 
436 /*[clinic input]
437 _codecs.utf_32_be_decode
438     data: Py_buffer
439     errors: str(accept={str, NoneType}) = None
440     final: bool(accept={int}) = False
441     /
442 [clinic start generated code]*/
443 
444 static PyObject *
_codecs_utf_32_be_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)445 _codecs_utf_32_be_decode_impl(PyObject *module, Py_buffer *data,
446                               const char *errors, int final)
447 /*[clinic end generated code: output=ff82bae862c92c4e input=f1ae1bbbb86648ff]*/
448 {
449     int byteorder = 1;
450     /* This is overwritten unless final is true. */
451     Py_ssize_t consumed = data->len;
452     PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
453                                                       errors, &byteorder,
454                                                       final ? NULL : &consumed);
455     return codec_tuple(decoded, consumed);
456 }
457 
458 /* This non-standard version also provides access to the byteorder
459    parameter of the builtin UTF-32 codec.
460 
461    It returns a tuple (unicode, bytesread, byteorder) with byteorder
462    being the value in effect at the end of data.
463 
464 */
465 /*[clinic input]
466 _codecs.utf_32_ex_decode
467     data: Py_buffer
468     errors: str(accept={str, NoneType}) = None
469     byteorder: int = 0
470     final: bool(accept={int}) = False
471     /
472 [clinic start generated code]*/
473 
474 static PyObject *
_codecs_utf_32_ex_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int byteorder,int final)475 _codecs_utf_32_ex_decode_impl(PyObject *module, Py_buffer *data,
476                               const char *errors, int byteorder, int final)
477 /*[clinic end generated code: output=6bfb177dceaf4848 input=e46a73bc859d0bd0]*/
478 {
479     Py_ssize_t consumed = data->len;
480     PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
481                                                       errors, &byteorder,
482                                                       final ? NULL : &consumed);
483     if (decoded == NULL)
484         return NULL;
485     return Py_BuildValue("Nni", decoded, consumed, byteorder);
486 }
487 
488 /*[clinic input]
489 _codecs.unicode_escape_decode
490     data: Py_buffer(accept={str, buffer})
491     errors: str(accept={str, NoneType}) = None
492     final: bool(accept={int}) = True
493     /
494 [clinic start generated code]*/
495 
496 static PyObject *
_codecs_unicode_escape_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)497 _codecs_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
498                                    const char *errors, int final)
499 /*[clinic end generated code: output=b284f97b12c635ee input=6154f039a9f7c639]*/
500 {
501     Py_ssize_t consumed = data->len;
502     PyObject *decoded = _PyUnicode_DecodeUnicodeEscapeStateful(data->buf, data->len,
503                                                                errors,
504                                                                final ? NULL : &consumed);
505     return codec_tuple(decoded, consumed);
506 }
507 
508 /*[clinic input]
509 _codecs.raw_unicode_escape_decode
510     data: Py_buffer(accept={str, buffer})
511     errors: str(accept={str, NoneType}) = None
512     final: bool(accept={int}) = True
513     /
514 [clinic start generated code]*/
515 
516 static PyObject *
_codecs_raw_unicode_escape_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)517 _codecs_raw_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
518                                        const char *errors, int final)
519 /*[clinic end generated code: output=11dbd96301e2879e input=2d166191beb3235a]*/
520 {
521     Py_ssize_t consumed = data->len;
522     PyObject *decoded = _PyUnicode_DecodeRawUnicodeEscapeStateful(data->buf, data->len,
523                                                                   errors,
524                                                                   final ? NULL : &consumed);
525     return codec_tuple(decoded, consumed);
526 }
527 
528 /*[clinic input]
529 _codecs.latin_1_decode
530     data: Py_buffer
531     errors: str(accept={str, NoneType}) = None
532     /
533 [clinic start generated code]*/
534 
535 static PyObject *
_codecs_latin_1_decode_impl(PyObject * module,Py_buffer * data,const char * errors)536 _codecs_latin_1_decode_impl(PyObject *module, Py_buffer *data,
537                             const char *errors)
538 /*[clinic end generated code: output=07f3dfa3f72c7d8f input=76ca58fd6dcd08c7]*/
539 {
540     PyObject *decoded = PyUnicode_DecodeLatin1(data->buf, data->len, errors);
541     return codec_tuple(decoded, data->len);
542 }
543 
544 /*[clinic input]
545 _codecs.ascii_decode
546     data: Py_buffer
547     errors: str(accept={str, NoneType}) = None
548     /
549 [clinic start generated code]*/
550 
551 static PyObject *
_codecs_ascii_decode_impl(PyObject * module,Py_buffer * data,const char * errors)552 _codecs_ascii_decode_impl(PyObject *module, Py_buffer *data,
553                           const char *errors)
554 /*[clinic end generated code: output=2627d72058d42429 input=e428a267a04b4481]*/
555 {
556     PyObject *decoded = PyUnicode_DecodeASCII(data->buf, data->len, errors);
557     return codec_tuple(decoded, data->len);
558 }
559 
560 /*[clinic input]
561 _codecs.charmap_decode
562     data: Py_buffer
563     errors: str(accept={str, NoneType}) = None
564     mapping: object = None
565     /
566 [clinic start generated code]*/
567 
568 static PyObject *
_codecs_charmap_decode_impl(PyObject * module,Py_buffer * data,const char * errors,PyObject * mapping)569 _codecs_charmap_decode_impl(PyObject *module, Py_buffer *data,
570                             const char *errors, PyObject *mapping)
571 /*[clinic end generated code: output=2c335b09778cf895 input=15b69df43458eb40]*/
572 {
573     PyObject *decoded;
574 
575     if (mapping == Py_None)
576         mapping = NULL;
577 
578     decoded = PyUnicode_DecodeCharmap(data->buf, data->len, mapping, errors);
579     return codec_tuple(decoded, data->len);
580 }
581 
582 #ifdef MS_WINDOWS
583 
584 /*[clinic input]
585 _codecs.mbcs_decode
586     data: Py_buffer
587     errors: str(accept={str, NoneType}) = None
588     final: bool(accept={int}) = False
589     /
590 [clinic start generated code]*/
591 
592 static PyObject *
_codecs_mbcs_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)593 _codecs_mbcs_decode_impl(PyObject *module, Py_buffer *data,
594                          const char *errors, int final)
595 /*[clinic end generated code: output=39b65b8598938c4b input=1c1d50f08fa53789]*/
596 {
597     Py_ssize_t consumed = data->len;
598     PyObject *decoded = PyUnicode_DecodeMBCSStateful(data->buf, data->len,
599             errors, final ? NULL : &consumed);
600     return codec_tuple(decoded, consumed);
601 }
602 
603 /*[clinic input]
604 _codecs.oem_decode
605     data: Py_buffer
606     errors: str(accept={str, NoneType}) = None
607     final: bool(accept={int}) = False
608     /
609 [clinic start generated code]*/
610 
611 static PyObject *
_codecs_oem_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)612 _codecs_oem_decode_impl(PyObject *module, Py_buffer *data,
613                         const char *errors, int final)
614 /*[clinic end generated code: output=da1617612f3fcad8 input=81b67cba811022e5]*/
615 {
616     Py_ssize_t consumed = data->len;
617     PyObject *decoded = PyUnicode_DecodeCodePageStateful(CP_OEMCP,
618         data->buf, data->len, errors, final ? NULL : &consumed);
619     return codec_tuple(decoded, consumed);
620 }
621 
622 /*[clinic input]
623 _codecs.code_page_decode
624     codepage: int
625     data: Py_buffer
626     errors: str(accept={str, NoneType}) = None
627     final: bool(accept={int}) = False
628     /
629 [clinic start generated code]*/
630 
631 static PyObject *
_codecs_code_page_decode_impl(PyObject * module,int codepage,Py_buffer * data,const char * errors,int final)632 _codecs_code_page_decode_impl(PyObject *module, int codepage,
633                               Py_buffer *data, const char *errors, int final)
634 /*[clinic end generated code: output=53008ea967da3fff input=c5f58d036cb63575]*/
635 {
636     Py_ssize_t consumed = data->len;
637     PyObject *decoded = PyUnicode_DecodeCodePageStateful(codepage,
638                                                          data->buf, data->len,
639                                                          errors,
640                                                          final ? NULL : &consumed);
641     return codec_tuple(decoded, consumed);
642 }
643 
644 #endif /* MS_WINDOWS */
645 
646 /* --- Encoder ------------------------------------------------------------ */
647 
648 /*[clinic input]
649 _codecs.readbuffer_encode
650     data: Py_buffer(accept={str, buffer})
651     errors: str(accept={str, NoneType}) = None
652     /
653 [clinic start generated code]*/
654 
655 static PyObject *
_codecs_readbuffer_encode_impl(PyObject * module,Py_buffer * data,const char * errors)656 _codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data,
657                                const char *errors)
658 /*[clinic end generated code: output=c645ea7cdb3d6e86 input=aa10cfdf252455c5]*/
659 {
660     PyObject *result = PyBytes_FromStringAndSize(data->buf, data->len);
661     return codec_tuple(result, data->len);
662 }
663 
664 /*[clinic input]
665 _codecs.utf_7_encode
666     str: unicode
667     errors: str(accept={str, NoneType}) = None
668     /
669 [clinic start generated code]*/
670 
671 static PyObject *
_codecs_utf_7_encode_impl(PyObject * module,PyObject * str,const char * errors)672 _codecs_utf_7_encode_impl(PyObject *module, PyObject *str,
673                           const char *errors)
674 /*[clinic end generated code: output=0feda21ffc921bc8 input=2546dbbb3fa53114]*/
675 {
676     return codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
677                        PyUnicode_GET_LENGTH(str));
678 }
679 
680 /*[clinic input]
681 _codecs.utf_8_encode
682     str: unicode
683     errors: str(accept={str, NoneType}) = None
684     /
685 [clinic start generated code]*/
686 
687 static PyObject *
_codecs_utf_8_encode_impl(PyObject * module,PyObject * str,const char * errors)688 _codecs_utf_8_encode_impl(PyObject *module, PyObject *str,
689                           const char *errors)
690 /*[clinic end generated code: output=02bf47332b9c796c input=a3e71ae01c3f93f3]*/
691 {
692     return codec_tuple(_PyUnicode_AsUTF8String(str, errors),
693                        PyUnicode_GET_LENGTH(str));
694 }
695 
696 /* This version provides access to the byteorder parameter of the
697    builtin UTF-16 codecs as optional third argument. It defaults to 0
698    which means: use the native byte order and prepend the data with a
699    BOM mark.
700 
701 */
702 
703 /*[clinic input]
704 _codecs.utf_16_encode
705     str: unicode
706     errors: str(accept={str, NoneType}) = None
707     byteorder: int = 0
708     /
709 [clinic start generated code]*/
710 
711 static PyObject *
_codecs_utf_16_encode_impl(PyObject * module,PyObject * str,const char * errors,int byteorder)712 _codecs_utf_16_encode_impl(PyObject *module, PyObject *str,
713                            const char *errors, int byteorder)
714 /*[clinic end generated code: output=c654e13efa2e64e4 input=68cdc2eb8338555d]*/
715 {
716     return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
717                        PyUnicode_GET_LENGTH(str));
718 }
719 
720 /*[clinic input]
721 _codecs.utf_16_le_encode
722     str: unicode
723     errors: str(accept={str, NoneType}) = None
724     /
725 [clinic start generated code]*/
726 
727 static PyObject *
_codecs_utf_16_le_encode_impl(PyObject * module,PyObject * str,const char * errors)728 _codecs_utf_16_le_encode_impl(PyObject *module, PyObject *str,
729                               const char *errors)
730 /*[clinic end generated code: output=431b01e55f2d4995 input=83d042706eed6798]*/
731 {
732     return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
733                        PyUnicode_GET_LENGTH(str));
734 }
735 
736 /*[clinic input]
737 _codecs.utf_16_be_encode
738     str: unicode
739     errors: str(accept={str, NoneType}) = None
740     /
741 [clinic start generated code]*/
742 
743 static PyObject *
_codecs_utf_16_be_encode_impl(PyObject * module,PyObject * str,const char * errors)744 _codecs_utf_16_be_encode_impl(PyObject *module, PyObject *str,
745                               const char *errors)
746 /*[clinic end generated code: output=96886a6fd54dcae3 input=6f1e9e623b03071b]*/
747 {
748     return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
749                        PyUnicode_GET_LENGTH(str));
750 }
751 
752 /* This version provides access to the byteorder parameter of the
753    builtin UTF-32 codecs as optional third argument. It defaults to 0
754    which means: use the native byte order and prepend the data with a
755    BOM mark.
756 
757 */
758 
759 /*[clinic input]
760 _codecs.utf_32_encode
761     str: unicode
762     errors: str(accept={str, NoneType}) = None
763     byteorder: int = 0
764     /
765 [clinic start generated code]*/
766 
767 static PyObject *
_codecs_utf_32_encode_impl(PyObject * module,PyObject * str,const char * errors,int byteorder)768 _codecs_utf_32_encode_impl(PyObject *module, PyObject *str,
769                            const char *errors, int byteorder)
770 /*[clinic end generated code: output=5c760da0c09a8b83 input=8ec4c64d983bc52b]*/
771 {
772     return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
773                        PyUnicode_GET_LENGTH(str));
774 }
775 
776 /*[clinic input]
777 _codecs.utf_32_le_encode
778     str: unicode
779     errors: str(accept={str, NoneType}) = None
780     /
781 [clinic start generated code]*/
782 
783 static PyObject *
_codecs_utf_32_le_encode_impl(PyObject * module,PyObject * str,const char * errors)784 _codecs_utf_32_le_encode_impl(PyObject *module, PyObject *str,
785                               const char *errors)
786 /*[clinic end generated code: output=b65cd176de8e36d6 input=f0918d41de3eb1b1]*/
787 {
788     return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
789                        PyUnicode_GET_LENGTH(str));
790 }
791 
792 /*[clinic input]
793 _codecs.utf_32_be_encode
794     str: unicode
795     errors: str(accept={str, NoneType}) = None
796     /
797 [clinic start generated code]*/
798 
799 static PyObject *
_codecs_utf_32_be_encode_impl(PyObject * module,PyObject * str,const char * errors)800 _codecs_utf_32_be_encode_impl(PyObject *module, PyObject *str,
801                               const char *errors)
802 /*[clinic end generated code: output=1d9e71a9358709e9 input=967a99a95748b557]*/
803 {
804     return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
805                        PyUnicode_GET_LENGTH(str));
806 }
807 
808 /*[clinic input]
809 _codecs.unicode_escape_encode
810     str: unicode
811     errors: str(accept={str, NoneType}) = None
812     /
813 [clinic start generated code]*/
814 
815 static PyObject *
_codecs_unicode_escape_encode_impl(PyObject * module,PyObject * str,const char * errors)816 _codecs_unicode_escape_encode_impl(PyObject *module, PyObject *str,
817                                    const char *errors)
818 /*[clinic end generated code: output=66271b30bc4f7a3c input=8c4de07597054e33]*/
819 {
820     return codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
821                        PyUnicode_GET_LENGTH(str));
822 }
823 
824 /*[clinic input]
825 _codecs.raw_unicode_escape_encode
826     str: unicode
827     errors: str(accept={str, NoneType}) = None
828     /
829 [clinic start generated code]*/
830 
831 static PyObject *
_codecs_raw_unicode_escape_encode_impl(PyObject * module,PyObject * str,const char * errors)832 _codecs_raw_unicode_escape_encode_impl(PyObject *module, PyObject *str,
833                                        const char *errors)
834 /*[clinic end generated code: output=a66a806ed01c830a input=4aa6f280d78e4574]*/
835 {
836     return codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
837                        PyUnicode_GET_LENGTH(str));
838 }
839 
840 /*[clinic input]
841 _codecs.latin_1_encode
842     str: unicode
843     errors: str(accept={str, NoneType}) = None
844     /
845 [clinic start generated code]*/
846 
847 static PyObject *
_codecs_latin_1_encode_impl(PyObject * module,PyObject * str,const char * errors)848 _codecs_latin_1_encode_impl(PyObject *module, PyObject *str,
849                             const char *errors)
850 /*[clinic end generated code: output=2c28c83a27884e08 input=ec3ef74bf85c5c5d]*/
851 {
852     return codec_tuple(_PyUnicode_AsLatin1String(str, errors),
853                        PyUnicode_GET_LENGTH(str));
854 }
855 
856 /*[clinic input]
857 _codecs.ascii_encode
858     str: unicode
859     errors: str(accept={str, NoneType}) = None
860     /
861 [clinic start generated code]*/
862 
863 static PyObject *
_codecs_ascii_encode_impl(PyObject * module,PyObject * str,const char * errors)864 _codecs_ascii_encode_impl(PyObject *module, PyObject *str,
865                           const char *errors)
866 /*[clinic end generated code: output=b5e035182d33befc input=93e6e602838bd3de]*/
867 {
868     return codec_tuple(_PyUnicode_AsASCIIString(str, errors),
869                        PyUnicode_GET_LENGTH(str));
870 }
871 
872 /*[clinic input]
873 _codecs.charmap_encode
874     str: unicode
875     errors: str(accept={str, NoneType}) = None
876     mapping: object = None
877     /
878 [clinic start generated code]*/
879 
880 static PyObject *
_codecs_charmap_encode_impl(PyObject * module,PyObject * str,const char * errors,PyObject * mapping)881 _codecs_charmap_encode_impl(PyObject *module, PyObject *str,
882                             const char *errors, PyObject *mapping)
883 /*[clinic end generated code: output=047476f48495a9e9 input=2a98feae73dadce8]*/
884 {
885     if (mapping == Py_None)
886         mapping = NULL;
887 
888     return codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
889                        PyUnicode_GET_LENGTH(str));
890 }
891 
892 /*[clinic input]
893 _codecs.charmap_build
894     map: unicode
895     /
896 [clinic start generated code]*/
897 
898 static PyObject *
_codecs_charmap_build_impl(PyObject * module,PyObject * map)899 _codecs_charmap_build_impl(PyObject *module, PyObject *map)
900 /*[clinic end generated code: output=bb073c27031db9ac input=d91a91d1717dbc6d]*/
901 {
902     return PyUnicode_BuildEncodingMap(map);
903 }
904 
905 #ifdef MS_WINDOWS
906 
907 /*[clinic input]
908 _codecs.mbcs_encode
909     str: unicode
910     errors: str(accept={str, NoneType}) = None
911     /
912 [clinic start generated code]*/
913 
914 static PyObject *
_codecs_mbcs_encode_impl(PyObject * module,PyObject * str,const char * errors)915 _codecs_mbcs_encode_impl(PyObject *module, PyObject *str, const char *errors)
916 /*[clinic end generated code: output=76e2e170c966c080 input=2e932fc289ea5a5b]*/
917 {
918     return codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
919                        PyUnicode_GET_LENGTH(str));
920 }
921 
922 /*[clinic input]
923 _codecs.oem_encode
924     str: unicode
925     errors: str(accept={str, NoneType}) = None
926     /
927 [clinic start generated code]*/
928 
929 static PyObject *
_codecs_oem_encode_impl(PyObject * module,PyObject * str,const char * errors)930 _codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors)
931 /*[clinic end generated code: output=65d5982c737de649 input=9eac86dc21eb14f2]*/
932 {
933     return codec_tuple(PyUnicode_EncodeCodePage(CP_OEMCP, str, errors),
934         PyUnicode_GET_LENGTH(str));
935 }
936 
937 /*[clinic input]
938 _codecs.code_page_encode
939     code_page: int
940     str: unicode
941     errors: str(accept={str, NoneType}) = None
942     /
943 [clinic start generated code]*/
944 
945 static PyObject *
_codecs_code_page_encode_impl(PyObject * module,int code_page,PyObject * str,const char * errors)946 _codecs_code_page_encode_impl(PyObject *module, int code_page, PyObject *str,
947                               const char *errors)
948 /*[clinic end generated code: output=45673f6085657a9e input=7d18a33bc8cd0f94]*/
949 {
950     return codec_tuple(PyUnicode_EncodeCodePage(code_page, str, errors),
951                        PyUnicode_GET_LENGTH(str));
952 }
953 
954 #endif /* MS_WINDOWS */
955 
956 /* --- Error handler registry --------------------------------------------- */
957 
958 /*[clinic input]
959 _codecs.register_error
960     errors: str
961     handler: object
962     /
963 
964 Register the specified error handler under the name errors.
965 
966 handler must be a callable object, that will be called with an exception
967 instance containing information about the location of the encoding/decoding
968 error and must return a (replacement, new position) tuple.
969 [clinic start generated code]*/
970 
971 static PyObject *
_codecs_register_error_impl(PyObject * module,const char * errors,PyObject * handler)972 _codecs_register_error_impl(PyObject *module, const char *errors,
973                             PyObject *handler)
974 /*[clinic end generated code: output=fa2f7d1879b3067d input=5e6709203c2e33fe]*/
975 {
976     if (PyCodec_RegisterError(errors, handler))
977         return NULL;
978     Py_RETURN_NONE;
979 }
980 
981 /*[clinic input]
982 _codecs.lookup_error
983     name: str
984     /
985 
986 lookup_error(errors) -> handler
987 
988 Return the error handler for the specified error handling name or raise a
989 LookupError, if no handler exists under this name.
990 [clinic start generated code]*/
991 
992 static PyObject *
_codecs_lookup_error_impl(PyObject * module,const char * name)993 _codecs_lookup_error_impl(PyObject *module, const char *name)
994 /*[clinic end generated code: output=087f05dc0c9a98cc input=4775dd65e6235aba]*/
995 {
996     return PyCodec_LookupError(name);
997 }
998 
999 /* --- Module API --------------------------------------------------------- */
1000 
1001 static PyMethodDef _codecs_functions[] = {
1002     _CODECS_REGISTER_METHODDEF
1003     _CODECS_UNREGISTER_METHODDEF
1004     _CODECS_LOOKUP_METHODDEF
1005     _CODECS_ENCODE_METHODDEF
1006     _CODECS_DECODE_METHODDEF
1007     _CODECS_ESCAPE_ENCODE_METHODDEF
1008     _CODECS_ESCAPE_DECODE_METHODDEF
1009     _CODECS_UTF_8_ENCODE_METHODDEF
1010     _CODECS_UTF_8_DECODE_METHODDEF
1011     _CODECS_UTF_7_ENCODE_METHODDEF
1012     _CODECS_UTF_7_DECODE_METHODDEF
1013     _CODECS_UTF_16_ENCODE_METHODDEF
1014     _CODECS_UTF_16_LE_ENCODE_METHODDEF
1015     _CODECS_UTF_16_BE_ENCODE_METHODDEF
1016     _CODECS_UTF_16_DECODE_METHODDEF
1017     _CODECS_UTF_16_LE_DECODE_METHODDEF
1018     _CODECS_UTF_16_BE_DECODE_METHODDEF
1019     _CODECS_UTF_16_EX_DECODE_METHODDEF
1020     _CODECS_UTF_32_ENCODE_METHODDEF
1021     _CODECS_UTF_32_LE_ENCODE_METHODDEF
1022     _CODECS_UTF_32_BE_ENCODE_METHODDEF
1023     _CODECS_UTF_32_DECODE_METHODDEF
1024     _CODECS_UTF_32_LE_DECODE_METHODDEF
1025     _CODECS_UTF_32_BE_DECODE_METHODDEF
1026     _CODECS_UTF_32_EX_DECODE_METHODDEF
1027     _CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF
1028     _CODECS_UNICODE_ESCAPE_DECODE_METHODDEF
1029     _CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF
1030     _CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF
1031     _CODECS_LATIN_1_ENCODE_METHODDEF
1032     _CODECS_LATIN_1_DECODE_METHODDEF
1033     _CODECS_ASCII_ENCODE_METHODDEF
1034     _CODECS_ASCII_DECODE_METHODDEF
1035     _CODECS_CHARMAP_ENCODE_METHODDEF
1036     _CODECS_CHARMAP_DECODE_METHODDEF
1037     _CODECS_CHARMAP_BUILD_METHODDEF
1038     _CODECS_READBUFFER_ENCODE_METHODDEF
1039     _CODECS_MBCS_ENCODE_METHODDEF
1040     _CODECS_MBCS_DECODE_METHODDEF
1041     _CODECS_OEM_ENCODE_METHODDEF
1042     _CODECS_OEM_DECODE_METHODDEF
1043     _CODECS_CODE_PAGE_ENCODE_METHODDEF
1044     _CODECS_CODE_PAGE_DECODE_METHODDEF
1045     _CODECS_REGISTER_ERROR_METHODDEF
1046     _CODECS_LOOKUP_ERROR_METHODDEF
1047     {NULL, NULL}                /* sentinel */
1048 };
1049 
1050 static PyModuleDef_Slot _codecs_slots[] = {
1051     {0, NULL}
1052 };
1053 
1054 static struct PyModuleDef codecsmodule = {
1055         PyModuleDef_HEAD_INIT,
1056         "_codecs",
1057         NULL,
1058         0,
1059         _codecs_functions,
1060         _codecs_slots,
1061         NULL,
1062         NULL,
1063         NULL
1064 };
1065 
1066 PyMODINIT_FUNC
PyInit__codecs(void)1067 PyInit__codecs(void)
1068 {
1069     return PyModuleDef_Init(&codecsmodule);
1070 }
1071