1 /* ------------------------------------------------------------------------
2 
3    _codecs -- Provides access to the codec registry and the builtin
4               codecs.
5 
6    This module should never be imported directly. The standard library
7    module "codecs" wraps this builtin module for use within Python.
8 
9    The codec registry is accessible via:
10 
11      register(search_function) -> None
12 
13      lookup(encoding) -> CodecInfo object
14 
15    The builtin Unicode codecs use the following interface:
16 
17      <encoding>_encode(Unicode_object[,errors='strict']) ->
18         (string object, bytes consumed)
19 
20      <encoding>_decode(char_buffer_obj[,errors='strict']) ->
21         (Unicode object, bytes consumed)
22 
23    These <encoding>s are available: utf_8, unicode_escape,
24    raw_unicode_escape, latin_1, ascii (7-bit), mbcs (on win32).
25 
26 
27 Written by Marc-Andre Lemburg (mal@lemburg.com).
28 
29 Copyright (c) Corporation for National Research Initiatives.
30 
31    ------------------------------------------------------------------------ */
32 
33 #define PY_SSIZE_T_CLEAN
34 #include "Python.h"
35 
36 #ifdef MS_WINDOWS
37 #include <windows.h>
38 #endif
39 
40 /*[clinic input]
41 module _codecs
42 [clinic start generated code]*/
43 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/
44 
45 #include "clinic/_codecsmodule.c.h"
46 
47 /* --- Registry ----------------------------------------------------------- */
48 
49 /*[clinic input]
50 _codecs.register
51     search_function: object
52     /
53 
54 Register a codec search function.
55 
56 Search functions are expected to take one argument, the encoding name in
57 all lower case letters, and either return None, or a tuple of functions
58 (encoder, decoder, stream_reader, stream_writer) (or a CodecInfo object).
59 [clinic start generated code]*/
60 
61 static PyObject *
_codecs_register(PyObject * module,PyObject * search_function)62 _codecs_register(PyObject *module, PyObject *search_function)
63 /*[clinic end generated code: output=d1bf21e99db7d6d3 input=369578467955cae4]*/
64 {
65     if (PyCodec_Register(search_function))
66         return NULL;
67 
68     Py_RETURN_NONE;
69 }
70 
71 /*[clinic input]
72 _codecs.lookup
73     encoding: str
74     /
75 
76 Looks up a codec tuple in the Python codec registry and returns a CodecInfo object.
77 [clinic start generated code]*/
78 
79 static PyObject *
_codecs_lookup_impl(PyObject * module,const char * encoding)80 _codecs_lookup_impl(PyObject *module, const char *encoding)
81 /*[clinic end generated code: output=9f0afa572080c36d input=3c572c0db3febe9c]*/
82 {
83     return _PyCodec_Lookup(encoding);
84 }
85 
86 /*[clinic input]
87 _codecs.encode
88     obj: object
89     encoding: str(c_default="NULL") = "utf-8"
90     errors: str(c_default="NULL") = "strict"
91 
92 Encodes obj using the codec registered for encoding.
93 
94 The default encoding is 'utf-8'.  errors may be given to set a
95 different error handling scheme.  Default is 'strict' meaning that encoding
96 errors raise a ValueError.  Other possible values are 'ignore', 'replace'
97 and 'backslashreplace' as well as any other name registered with
98 codecs.register_error that can handle ValueErrors.
99 [clinic start generated code]*/
100 
101 static PyObject *
_codecs_encode_impl(PyObject * module,PyObject * obj,const char * encoding,const char * errors)102 _codecs_encode_impl(PyObject *module, PyObject *obj, const char *encoding,
103                     const char *errors)
104 /*[clinic end generated code: output=385148eb9a067c86 input=cd5b685040ff61f0]*/
105 {
106     if (encoding == NULL)
107         encoding = PyUnicode_GetDefaultEncoding();
108 
109     /* Encode via the codec registry */
110     return PyCodec_Encode(obj, encoding, errors);
111 }
112 
113 /*[clinic input]
114 _codecs.decode
115     obj: object
116     encoding: str(c_default="NULL") = "utf-8"
117     errors: str(c_default="NULL") = "strict"
118 
119 Decodes obj using the codec registered for encoding.
120 
121 Default encoding is 'utf-8'.  errors may be given to set a
122 different error handling scheme.  Default is 'strict' meaning that encoding
123 errors raise a ValueError.  Other possible values are 'ignore', 'replace'
124 and 'backslashreplace' as well as any other name registered with
125 codecs.register_error that can handle ValueErrors.
126 [clinic start generated code]*/
127 
128 static PyObject *
_codecs_decode_impl(PyObject * module,PyObject * obj,const char * encoding,const char * errors)129 _codecs_decode_impl(PyObject *module, PyObject *obj, const char *encoding,
130                     const char *errors)
131 /*[clinic end generated code: output=679882417dc3a0bd input=7702c0cc2fa1add6]*/
132 {
133     if (encoding == NULL)
134         encoding = PyUnicode_GetDefaultEncoding();
135 
136     /* Decode via the codec registry */
137     return PyCodec_Decode(obj, encoding, errors);
138 }
139 
140 /* --- Helpers ------------------------------------------------------------ */
141 
142 /*[clinic input]
143 _codecs._forget_codec
144 
145     encoding: str
146     /
147 
148 Purge the named codec from the internal codec lookup cache
149 [clinic start generated code]*/
150 
151 static PyObject *
_codecs__forget_codec_impl(PyObject * module,const char * encoding)152 _codecs__forget_codec_impl(PyObject *module, const char *encoding)
153 /*[clinic end generated code: output=0bde9f0a5b084aa2 input=18d5d92d0e386c38]*/
154 {
155     if (_PyCodec_Forget(encoding) < 0) {
156         return NULL;
157     };
158     Py_RETURN_NONE;
159 }
160 
161 static
codec_tuple(PyObject * decoded,Py_ssize_t len)162 PyObject *codec_tuple(PyObject *decoded,
163                       Py_ssize_t len)
164 {
165     if (decoded == NULL)
166         return NULL;
167     return Py_BuildValue("Nn", decoded, len);
168 }
169 
170 /* --- String codecs ------------------------------------------------------ */
171 /*[clinic input]
172 _codecs.escape_decode
173     data: Py_buffer(accept={str, buffer})
174     errors: str(accept={str, NoneType}) = None
175     /
176 [clinic start generated code]*/
177 
178 static PyObject *
_codecs_escape_decode_impl(PyObject * module,Py_buffer * data,const char * errors)179 _codecs_escape_decode_impl(PyObject *module, Py_buffer *data,
180                            const char *errors)
181 /*[clinic end generated code: output=505200ba8056979a input=77298a561c90bd82]*/
182 {
183     PyObject *decoded = PyBytes_DecodeEscape(data->buf, data->len,
184                                              errors, 0, NULL);
185     return codec_tuple(decoded, data->len);
186 }
187 
188 /*[clinic input]
189 _codecs.escape_encode
190     data: object(subclass_of='&PyBytes_Type')
191     errors: str(accept={str, NoneType}) = None
192     /
193 [clinic start generated code]*/
194 
195 static PyObject *
_codecs_escape_encode_impl(PyObject * module,PyObject * data,const char * errors)196 _codecs_escape_encode_impl(PyObject *module, PyObject *data,
197                            const char *errors)
198 /*[clinic end generated code: output=4af1d477834bab34 input=8f4b144799a94245]*/
199 {
200     Py_ssize_t size;
201     Py_ssize_t newsize;
202     PyObject *v;
203 
204     size = PyBytes_GET_SIZE(data);
205     if (size > PY_SSIZE_T_MAX / 4) {
206         PyErr_SetString(PyExc_OverflowError,
207             "string is too large to encode");
208             return NULL;
209     }
210     newsize = 4*size;
211     v = PyBytes_FromStringAndSize(NULL, newsize);
212 
213     if (v == NULL) {
214         return NULL;
215     }
216     else {
217         Py_ssize_t i;
218         char c;
219         char *p = PyBytes_AS_STRING(v);
220 
221         for (i = 0; i < size; i++) {
222             /* There's at least enough room for a hex escape */
223             assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
224             c = PyBytes_AS_STRING(data)[i];
225             if (c == '\'' || c == '\\')
226                 *p++ = '\\', *p++ = c;
227             else if (c == '\t')
228                 *p++ = '\\', *p++ = 't';
229             else if (c == '\n')
230                 *p++ = '\\', *p++ = 'n';
231             else if (c == '\r')
232                 *p++ = '\\', *p++ = 'r';
233             else if (c < ' ' || c >= 0x7f) {
234                 *p++ = '\\';
235                 *p++ = 'x';
236                 *p++ = Py_hexdigits[(c & 0xf0) >> 4];
237                 *p++ = Py_hexdigits[c & 0xf];
238             }
239             else
240                 *p++ = c;
241         }
242         *p = '\0';
243         if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
244             return NULL;
245         }
246     }
247 
248     return codec_tuple(v, size);
249 }
250 
251 /* --- Decoder ------------------------------------------------------------ */
252 /*[clinic input]
253 _codecs.utf_7_decode
254     data: Py_buffer
255     errors: str(accept={str, NoneType}) = None
256     final: bool(accept={int}) = False
257     /
258 [clinic start generated code]*/
259 
260 static PyObject *
_codecs_utf_7_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)261 _codecs_utf_7_decode_impl(PyObject *module, Py_buffer *data,
262                           const char *errors, int final)
263 /*[clinic end generated code: output=0cd3a944a32a4089 input=22c395d357815d26]*/
264 {
265     Py_ssize_t consumed = data->len;
266     PyObject *decoded = PyUnicode_DecodeUTF7Stateful(data->buf, data->len,
267                                                      errors,
268                                                      final ? NULL : &consumed);
269     return codec_tuple(decoded, consumed);
270 }
271 
272 /*[clinic input]
273 _codecs.utf_8_decode
274     data: Py_buffer
275     errors: str(accept={str, NoneType}) = None
276     final: bool(accept={int}) = False
277     /
278 [clinic start generated code]*/
279 
280 static PyObject *
_codecs_utf_8_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)281 _codecs_utf_8_decode_impl(PyObject *module, Py_buffer *data,
282                           const char *errors, int final)
283 /*[clinic end generated code: output=10f74dec8d9bb8bf input=f611b3867352ba59]*/
284 {
285     Py_ssize_t consumed = data->len;
286     PyObject *decoded = PyUnicode_DecodeUTF8Stateful(data->buf, data->len,
287                                                      errors,
288                                                      final ? NULL : &consumed);
289     return codec_tuple(decoded, consumed);
290 }
291 
292 /*[clinic input]
293 _codecs.utf_16_decode
294     data: Py_buffer
295     errors: str(accept={str, NoneType}) = None
296     final: bool(accept={int}) = False
297     /
298 [clinic start generated code]*/
299 
300 static PyObject *
_codecs_utf_16_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)301 _codecs_utf_16_decode_impl(PyObject *module, Py_buffer *data,
302                            const char *errors, int final)
303 /*[clinic end generated code: output=783b442abcbcc2d0 input=191d360bd7309180]*/
304 {
305     int byteorder = 0;
306     /* This is overwritten unless final is true. */
307     Py_ssize_t consumed = data->len;
308     PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
309                                                       errors, &byteorder,
310                                                       final ? NULL : &consumed);
311     return codec_tuple(decoded, consumed);
312 }
313 
314 /*[clinic input]
315 _codecs.utf_16_le_decode
316     data: Py_buffer
317     errors: str(accept={str, NoneType}) = None
318     final: bool(accept={int}) = False
319     /
320 [clinic start generated code]*/
321 
322 static PyObject *
_codecs_utf_16_le_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)323 _codecs_utf_16_le_decode_impl(PyObject *module, Py_buffer *data,
324                               const char *errors, int final)
325 /*[clinic end generated code: output=899b9e6364379dcd input=c6904fdc27fb4724]*/
326 {
327     int byteorder = -1;
328     /* This is overwritten unless final is true. */
329     Py_ssize_t consumed = data->len;
330     PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
331                                                       errors, &byteorder,
332                                                       final ? NULL : &consumed);
333     return codec_tuple(decoded, consumed);
334 }
335 
336 /*[clinic input]
337 _codecs.utf_16_be_decode
338     data: Py_buffer
339     errors: str(accept={str, NoneType}) = None
340     final: bool(accept={int}) = False
341     /
342 [clinic start generated code]*/
343 
344 static PyObject *
_codecs_utf_16_be_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)345 _codecs_utf_16_be_decode_impl(PyObject *module, Py_buffer *data,
346                               const char *errors, int final)
347 /*[clinic end generated code: output=49f6465ea07669c8 input=e49012400974649b]*/
348 {
349     int byteorder = 1;
350     /* This is overwritten unless final is true. */
351     Py_ssize_t consumed = data->len;
352     PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
353                                                       errors, &byteorder,
354                                                       final ? NULL : &consumed);
355     return codec_tuple(decoded, consumed);
356 }
357 
358 /* This non-standard version also provides access to the byteorder
359    parameter of the builtin UTF-16 codec.
360 
361    It returns a tuple (unicode, bytesread, byteorder) with byteorder
362    being the value in effect at the end of data.
363 
364 */
365 /*[clinic input]
366 _codecs.utf_16_ex_decode
367     data: Py_buffer
368     errors: str(accept={str, NoneType}) = None
369     byteorder: int = 0
370     final: bool(accept={int}) = False
371     /
372 [clinic start generated code]*/
373 
374 static PyObject *
_codecs_utf_16_ex_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int byteorder,int final)375 _codecs_utf_16_ex_decode_impl(PyObject *module, Py_buffer *data,
376                               const char *errors, int byteorder, int final)
377 /*[clinic end generated code: output=0f385f251ecc1988 input=5a9c19f2e6b6cf0e]*/
378 {
379     /* This is overwritten unless final is true. */
380     Py_ssize_t consumed = data->len;
381 
382     PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
383                                                       errors, &byteorder,
384                                                       final ? NULL : &consumed);
385     if (decoded == NULL)
386         return NULL;
387     return Py_BuildValue("Nni", decoded, consumed, byteorder);
388 }
389 
390 /*[clinic input]
391 _codecs.utf_32_decode
392     data: Py_buffer
393     errors: str(accept={str, NoneType}) = None
394     final: bool(accept={int}) = False
395     /
396 [clinic start generated code]*/
397 
398 static PyObject *
_codecs_utf_32_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)399 _codecs_utf_32_decode_impl(PyObject *module, Py_buffer *data,
400                            const char *errors, int final)
401 /*[clinic end generated code: output=2fc961807f7b145f input=fd7193965627eb58]*/
402 {
403     int byteorder = 0;
404     /* This is overwritten unless final is true. */
405     Py_ssize_t consumed = data->len;
406     PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
407                                                       errors, &byteorder,
408                                                       final ? NULL : &consumed);
409     return codec_tuple(decoded, consumed);
410 }
411 
412 /*[clinic input]
413 _codecs.utf_32_le_decode
414     data: Py_buffer
415     errors: str(accept={str, NoneType}) = None
416     final: bool(accept={int}) = False
417     /
418 [clinic start generated code]*/
419 
420 static PyObject *
_codecs_utf_32_le_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)421 _codecs_utf_32_le_decode_impl(PyObject *module, Py_buffer *data,
422                               const char *errors, int final)
423 /*[clinic end generated code: output=ec8f46b67a94f3e6 input=9078ec70acfe7613]*/
424 {
425     int byteorder = -1;
426     /* This is overwritten unless final is true. */
427     Py_ssize_t consumed = data->len;
428     PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
429                                                       errors, &byteorder,
430                                                       final ? NULL : &consumed);
431     return codec_tuple(decoded, consumed);
432 }
433 
434 /*[clinic input]
435 _codecs.utf_32_be_decode
436     data: Py_buffer
437     errors: str(accept={str, NoneType}) = None
438     final: bool(accept={int}) = False
439     /
440 [clinic start generated code]*/
441 
442 static PyObject *
_codecs_utf_32_be_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)443 _codecs_utf_32_be_decode_impl(PyObject *module, Py_buffer *data,
444                               const char *errors, int final)
445 /*[clinic end generated code: output=ff82bae862c92c4e input=f1ae1bbbb86648ff]*/
446 {
447     int byteorder = 1;
448     /* This is overwritten unless final is true. */
449     Py_ssize_t consumed = data->len;
450     PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
451                                                       errors, &byteorder,
452                                                       final ? NULL : &consumed);
453     return codec_tuple(decoded, consumed);
454 }
455 
456 /* This non-standard version also provides access to the byteorder
457    parameter of the builtin UTF-32 codec.
458 
459    It returns a tuple (unicode, bytesread, byteorder) with byteorder
460    being the value in effect at the end of data.
461 
462 */
463 /*[clinic input]
464 _codecs.utf_32_ex_decode
465     data: Py_buffer
466     errors: str(accept={str, NoneType}) = None
467     byteorder: int = 0
468     final: bool(accept={int}) = False
469     /
470 [clinic start generated code]*/
471 
472 static PyObject *
_codecs_utf_32_ex_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int byteorder,int final)473 _codecs_utf_32_ex_decode_impl(PyObject *module, Py_buffer *data,
474                               const char *errors, int byteorder, int final)
475 /*[clinic end generated code: output=6bfb177dceaf4848 input=e46a73bc859d0bd0]*/
476 {
477     Py_ssize_t consumed = data->len;
478     PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
479                                                       errors, &byteorder,
480                                                       final ? NULL : &consumed);
481     if (decoded == NULL)
482         return NULL;
483     return Py_BuildValue("Nni", decoded, consumed, byteorder);
484 }
485 
486 /*[clinic input]
487 _codecs.unicode_escape_decode
488     data: Py_buffer(accept={str, buffer})
489     errors: str(accept={str, NoneType}) = None
490     /
491 [clinic start generated code]*/
492 
493 static PyObject *
_codecs_unicode_escape_decode_impl(PyObject * module,Py_buffer * data,const char * errors)494 _codecs_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
495                                    const char *errors)
496 /*[clinic end generated code: output=3ca3c917176b82ab input=8328081a3a569bd6]*/
497 {
498     PyObject *decoded = PyUnicode_DecodeUnicodeEscape(data->buf, data->len,
499                                                       errors);
500     return codec_tuple(decoded, data->len);
501 }
502 
503 /*[clinic input]
504 _codecs.raw_unicode_escape_decode
505     data: Py_buffer(accept={str, buffer})
506     errors: str(accept={str, NoneType}) = None
507     /
508 [clinic start generated code]*/
509 
510 static PyObject *
_codecs_raw_unicode_escape_decode_impl(PyObject * module,Py_buffer * data,const char * errors)511 _codecs_raw_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
512                                        const char *errors)
513 /*[clinic end generated code: output=c98eeb56028070a6 input=d2f5159ce3b3392f]*/
514 {
515     PyObject *decoded = PyUnicode_DecodeRawUnicodeEscape(data->buf, data->len,
516                                                          errors);
517     return codec_tuple(decoded, data->len);
518 }
519 
520 /*[clinic input]
521 _codecs.latin_1_decode
522     data: Py_buffer
523     errors: str(accept={str, NoneType}) = None
524     /
525 [clinic start generated code]*/
526 
527 static PyObject *
_codecs_latin_1_decode_impl(PyObject * module,Py_buffer * data,const char * errors)528 _codecs_latin_1_decode_impl(PyObject *module, Py_buffer *data,
529                             const char *errors)
530 /*[clinic end generated code: output=07f3dfa3f72c7d8f input=76ca58fd6dcd08c7]*/
531 {
532     PyObject *decoded = PyUnicode_DecodeLatin1(data->buf, data->len, errors);
533     return codec_tuple(decoded, data->len);
534 }
535 
536 /*[clinic input]
537 _codecs.ascii_decode
538     data: Py_buffer
539     errors: str(accept={str, NoneType}) = None
540     /
541 [clinic start generated code]*/
542 
543 static PyObject *
_codecs_ascii_decode_impl(PyObject * module,Py_buffer * data,const char * errors)544 _codecs_ascii_decode_impl(PyObject *module, Py_buffer *data,
545                           const char *errors)
546 /*[clinic end generated code: output=2627d72058d42429 input=e428a267a04b4481]*/
547 {
548     PyObject *decoded = PyUnicode_DecodeASCII(data->buf, data->len, errors);
549     return codec_tuple(decoded, data->len);
550 }
551 
552 /*[clinic input]
553 _codecs.charmap_decode
554     data: Py_buffer
555     errors: str(accept={str, NoneType}) = None
556     mapping: object = None
557     /
558 [clinic start generated code]*/
559 
560 static PyObject *
_codecs_charmap_decode_impl(PyObject * module,Py_buffer * data,const char * errors,PyObject * mapping)561 _codecs_charmap_decode_impl(PyObject *module, Py_buffer *data,
562                             const char *errors, PyObject *mapping)
563 /*[clinic end generated code: output=2c335b09778cf895 input=15b69df43458eb40]*/
564 {
565     PyObject *decoded;
566 
567     if (mapping == Py_None)
568         mapping = NULL;
569 
570     decoded = PyUnicode_DecodeCharmap(data->buf, data->len, mapping, errors);
571     return codec_tuple(decoded, data->len);
572 }
573 
574 #ifdef MS_WINDOWS
575 
576 /*[clinic input]
577 _codecs.mbcs_decode
578     data: Py_buffer
579     errors: str(accept={str, NoneType}) = None
580     final: bool(accept={int}) = False
581     /
582 [clinic start generated code]*/
583 
584 static PyObject *
_codecs_mbcs_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)585 _codecs_mbcs_decode_impl(PyObject *module, Py_buffer *data,
586                          const char *errors, int final)
587 /*[clinic end generated code: output=39b65b8598938c4b input=1c1d50f08fa53789]*/
588 {
589     Py_ssize_t consumed = data->len;
590     PyObject *decoded = PyUnicode_DecodeMBCSStateful(data->buf, data->len,
591             errors, final ? NULL : &consumed);
592     return codec_tuple(decoded, consumed);
593 }
594 
595 /*[clinic input]
596 _codecs.oem_decode
597     data: Py_buffer
598     errors: str(accept={str, NoneType}) = None
599     final: bool(accept={int}) = False
600     /
601 [clinic start generated code]*/
602 
603 static PyObject *
_codecs_oem_decode_impl(PyObject * module,Py_buffer * data,const char * errors,int final)604 _codecs_oem_decode_impl(PyObject *module, Py_buffer *data,
605                         const char *errors, int final)
606 /*[clinic end generated code: output=da1617612f3fcad8 input=81b67cba811022e5]*/
607 {
608     Py_ssize_t consumed = data->len;
609     PyObject *decoded = PyUnicode_DecodeCodePageStateful(CP_OEMCP,
610         data->buf, data->len, errors, final ? NULL : &consumed);
611     return codec_tuple(decoded, consumed);
612 }
613 
614 /*[clinic input]
615 _codecs.code_page_decode
616     codepage: int
617     data: Py_buffer
618     errors: str(accept={str, NoneType}) = None
619     final: bool(accept={int}) = False
620     /
621 [clinic start generated code]*/
622 
623 static PyObject *
_codecs_code_page_decode_impl(PyObject * module,int codepage,Py_buffer * data,const char * errors,int final)624 _codecs_code_page_decode_impl(PyObject *module, int codepage,
625                               Py_buffer *data, const char *errors, int final)
626 /*[clinic end generated code: output=53008ea967da3fff input=c5f58d036cb63575]*/
627 {
628     Py_ssize_t consumed = data->len;
629     PyObject *decoded = PyUnicode_DecodeCodePageStateful(codepage,
630                                                          data->buf, data->len,
631                                                          errors,
632                                                          final ? NULL : &consumed);
633     return codec_tuple(decoded, consumed);
634 }
635 
636 #endif /* MS_WINDOWS */
637 
638 /* --- Encoder ------------------------------------------------------------ */
639 
640 /*[clinic input]
641 _codecs.readbuffer_encode
642     data: Py_buffer(accept={str, buffer})
643     errors: str(accept={str, NoneType}) = None
644     /
645 [clinic start generated code]*/
646 
647 static PyObject *
_codecs_readbuffer_encode_impl(PyObject * module,Py_buffer * data,const char * errors)648 _codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data,
649                                const char *errors)
650 /*[clinic end generated code: output=c645ea7cdb3d6e86 input=aa10cfdf252455c5]*/
651 {
652     PyObject *result = PyBytes_FromStringAndSize(data->buf, data->len);
653     return codec_tuple(result, data->len);
654 }
655 
656 /*[clinic input]
657 _codecs.utf_7_encode
658     str: unicode
659     errors: str(accept={str, NoneType}) = None
660     /
661 [clinic start generated code]*/
662 
663 static PyObject *
_codecs_utf_7_encode_impl(PyObject * module,PyObject * str,const char * errors)664 _codecs_utf_7_encode_impl(PyObject *module, PyObject *str,
665                           const char *errors)
666 /*[clinic end generated code: output=0feda21ffc921bc8 input=2546dbbb3fa53114]*/
667 {
668     return codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
669                        PyUnicode_GET_LENGTH(str));
670 }
671 
672 /*[clinic input]
673 _codecs.utf_8_encode
674     str: unicode
675     errors: str(accept={str, NoneType}) = None
676     /
677 [clinic start generated code]*/
678 
679 static PyObject *
_codecs_utf_8_encode_impl(PyObject * module,PyObject * str,const char * errors)680 _codecs_utf_8_encode_impl(PyObject *module, PyObject *str,
681                           const char *errors)
682 /*[clinic end generated code: output=02bf47332b9c796c input=a3e71ae01c3f93f3]*/
683 {
684     return codec_tuple(_PyUnicode_AsUTF8String(str, errors),
685                        PyUnicode_GET_LENGTH(str));
686 }
687 
688 /* This version provides access to the byteorder parameter of the
689    builtin UTF-16 codecs as optional third argument. It defaults to 0
690    which means: use the native byte order and prepend the data with a
691    BOM mark.
692 
693 */
694 
695 /*[clinic input]
696 _codecs.utf_16_encode
697     str: unicode
698     errors: str(accept={str, NoneType}) = None
699     byteorder: int = 0
700     /
701 [clinic start generated code]*/
702 
703 static PyObject *
_codecs_utf_16_encode_impl(PyObject * module,PyObject * str,const char * errors,int byteorder)704 _codecs_utf_16_encode_impl(PyObject *module, PyObject *str,
705                            const char *errors, int byteorder)
706 /*[clinic end generated code: output=c654e13efa2e64e4 input=68cdc2eb8338555d]*/
707 {
708     return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
709                        PyUnicode_GET_LENGTH(str));
710 }
711 
712 /*[clinic input]
713 _codecs.utf_16_le_encode
714     str: unicode
715     errors: str(accept={str, NoneType}) = None
716     /
717 [clinic start generated code]*/
718 
719 static PyObject *
_codecs_utf_16_le_encode_impl(PyObject * module,PyObject * str,const char * errors)720 _codecs_utf_16_le_encode_impl(PyObject *module, PyObject *str,
721                               const char *errors)
722 /*[clinic end generated code: output=431b01e55f2d4995 input=83d042706eed6798]*/
723 {
724     return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
725                        PyUnicode_GET_LENGTH(str));
726 }
727 
728 /*[clinic input]
729 _codecs.utf_16_be_encode
730     str: unicode
731     errors: str(accept={str, NoneType}) = None
732     /
733 [clinic start generated code]*/
734 
735 static PyObject *
_codecs_utf_16_be_encode_impl(PyObject * module,PyObject * str,const char * errors)736 _codecs_utf_16_be_encode_impl(PyObject *module, PyObject *str,
737                               const char *errors)
738 /*[clinic end generated code: output=96886a6fd54dcae3 input=6f1e9e623b03071b]*/
739 {
740     return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
741                        PyUnicode_GET_LENGTH(str));
742 }
743 
744 /* This version provides access to the byteorder parameter of the
745    builtin UTF-32 codecs as optional third argument. It defaults to 0
746    which means: use the native byte order and prepend the data with a
747    BOM mark.
748 
749 */
750 
751 /*[clinic input]
752 _codecs.utf_32_encode
753     str: unicode
754     errors: str(accept={str, NoneType}) = None
755     byteorder: int = 0
756     /
757 [clinic start generated code]*/
758 
759 static PyObject *
_codecs_utf_32_encode_impl(PyObject * module,PyObject * str,const char * errors,int byteorder)760 _codecs_utf_32_encode_impl(PyObject *module, PyObject *str,
761                            const char *errors, int byteorder)
762 /*[clinic end generated code: output=5c760da0c09a8b83 input=8ec4c64d983bc52b]*/
763 {
764     return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
765                        PyUnicode_GET_LENGTH(str));
766 }
767 
768 /*[clinic input]
769 _codecs.utf_32_le_encode
770     str: unicode
771     errors: str(accept={str, NoneType}) = None
772     /
773 [clinic start generated code]*/
774 
775 static PyObject *
_codecs_utf_32_le_encode_impl(PyObject * module,PyObject * str,const char * errors)776 _codecs_utf_32_le_encode_impl(PyObject *module, PyObject *str,
777                               const char *errors)
778 /*[clinic end generated code: output=b65cd176de8e36d6 input=f0918d41de3eb1b1]*/
779 {
780     return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
781                        PyUnicode_GET_LENGTH(str));
782 }
783 
784 /*[clinic input]
785 _codecs.utf_32_be_encode
786     str: unicode
787     errors: str(accept={str, NoneType}) = None
788     /
789 [clinic start generated code]*/
790 
791 static PyObject *
_codecs_utf_32_be_encode_impl(PyObject * module,PyObject * str,const char * errors)792 _codecs_utf_32_be_encode_impl(PyObject *module, PyObject *str,
793                               const char *errors)
794 /*[clinic end generated code: output=1d9e71a9358709e9 input=967a99a95748b557]*/
795 {
796     return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
797                        PyUnicode_GET_LENGTH(str));
798 }
799 
800 /*[clinic input]
801 _codecs.unicode_escape_encode
802     str: unicode
803     errors: str(accept={str, NoneType}) = None
804     /
805 [clinic start generated code]*/
806 
807 static PyObject *
_codecs_unicode_escape_encode_impl(PyObject * module,PyObject * str,const char * errors)808 _codecs_unicode_escape_encode_impl(PyObject *module, PyObject *str,
809                                    const char *errors)
810 /*[clinic end generated code: output=66271b30bc4f7a3c input=8c4de07597054e33]*/
811 {
812     return codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
813                        PyUnicode_GET_LENGTH(str));
814 }
815 
816 /*[clinic input]
817 _codecs.raw_unicode_escape_encode
818     str: unicode
819     errors: str(accept={str, NoneType}) = None
820     /
821 [clinic start generated code]*/
822 
823 static PyObject *
_codecs_raw_unicode_escape_encode_impl(PyObject * module,PyObject * str,const char * errors)824 _codecs_raw_unicode_escape_encode_impl(PyObject *module, PyObject *str,
825                                        const char *errors)
826 /*[clinic end generated code: output=a66a806ed01c830a input=4aa6f280d78e4574]*/
827 {
828     return codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
829                        PyUnicode_GET_LENGTH(str));
830 }
831 
832 /*[clinic input]
833 _codecs.latin_1_encode
834     str: unicode
835     errors: str(accept={str, NoneType}) = None
836     /
837 [clinic start generated code]*/
838 
839 static PyObject *
_codecs_latin_1_encode_impl(PyObject * module,PyObject * str,const char * errors)840 _codecs_latin_1_encode_impl(PyObject *module, PyObject *str,
841                             const char *errors)
842 /*[clinic end generated code: output=2c28c83a27884e08 input=ec3ef74bf85c5c5d]*/
843 {
844     return codec_tuple(_PyUnicode_AsLatin1String(str, errors),
845                        PyUnicode_GET_LENGTH(str));
846 }
847 
848 /*[clinic input]
849 _codecs.ascii_encode
850     str: unicode
851     errors: str(accept={str, NoneType}) = None
852     /
853 [clinic start generated code]*/
854 
855 static PyObject *
_codecs_ascii_encode_impl(PyObject * module,PyObject * str,const char * errors)856 _codecs_ascii_encode_impl(PyObject *module, PyObject *str,
857                           const char *errors)
858 /*[clinic end generated code: output=b5e035182d33befc input=93e6e602838bd3de]*/
859 {
860     return codec_tuple(_PyUnicode_AsASCIIString(str, errors),
861                        PyUnicode_GET_LENGTH(str));
862 }
863 
864 /*[clinic input]
865 _codecs.charmap_encode
866     str: unicode
867     errors: str(accept={str, NoneType}) = None
868     mapping: object = None
869     /
870 [clinic start generated code]*/
871 
872 static PyObject *
_codecs_charmap_encode_impl(PyObject * module,PyObject * str,const char * errors,PyObject * mapping)873 _codecs_charmap_encode_impl(PyObject *module, PyObject *str,
874                             const char *errors, PyObject *mapping)
875 /*[clinic end generated code: output=047476f48495a9e9 input=2a98feae73dadce8]*/
876 {
877     if (mapping == Py_None)
878         mapping = NULL;
879 
880     return codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
881                        PyUnicode_GET_LENGTH(str));
882 }
883 
884 /*[clinic input]
885 _codecs.charmap_build
886     map: unicode
887     /
888 [clinic start generated code]*/
889 
890 static PyObject *
_codecs_charmap_build_impl(PyObject * module,PyObject * map)891 _codecs_charmap_build_impl(PyObject *module, PyObject *map)
892 /*[clinic end generated code: output=bb073c27031db9ac input=d91a91d1717dbc6d]*/
893 {
894     return PyUnicode_BuildEncodingMap(map);
895 }
896 
897 #ifdef MS_WINDOWS
898 
899 /*[clinic input]
900 _codecs.mbcs_encode
901     str: unicode
902     errors: str(accept={str, NoneType}) = None
903     /
904 [clinic start generated code]*/
905 
906 static PyObject *
_codecs_mbcs_encode_impl(PyObject * module,PyObject * str,const char * errors)907 _codecs_mbcs_encode_impl(PyObject *module, PyObject *str, const char *errors)
908 /*[clinic end generated code: output=76e2e170c966c080 input=2e932fc289ea5a5b]*/
909 {
910     return codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
911                        PyUnicode_GET_LENGTH(str));
912 }
913 
914 /*[clinic input]
915 _codecs.oem_encode
916     str: unicode
917     errors: str(accept={str, NoneType}) = None
918     /
919 [clinic start generated code]*/
920 
921 static PyObject *
_codecs_oem_encode_impl(PyObject * module,PyObject * str,const char * errors)922 _codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors)
923 /*[clinic end generated code: output=65d5982c737de649 input=9eac86dc21eb14f2]*/
924 {
925     return codec_tuple(PyUnicode_EncodeCodePage(CP_OEMCP, str, errors),
926         PyUnicode_GET_LENGTH(str));
927 }
928 
929 /*[clinic input]
930 _codecs.code_page_encode
931     code_page: int
932     str: unicode
933     errors: str(accept={str, NoneType}) = None
934     /
935 [clinic start generated code]*/
936 
937 static PyObject *
_codecs_code_page_encode_impl(PyObject * module,int code_page,PyObject * str,const char * errors)938 _codecs_code_page_encode_impl(PyObject *module, int code_page, PyObject *str,
939                               const char *errors)
940 /*[clinic end generated code: output=45673f6085657a9e input=7d18a33bc8cd0f94]*/
941 {
942     return codec_tuple(PyUnicode_EncodeCodePage(code_page, str, errors),
943                        PyUnicode_GET_LENGTH(str));
944 }
945 
946 #endif /* MS_WINDOWS */
947 
948 /* --- Error handler registry --------------------------------------------- */
949 
950 /*[clinic input]
951 _codecs.register_error
952     errors: str
953     handler: object
954     /
955 
956 Register the specified error handler under the name errors.
957 
958 handler must be a callable object, that will be called with an exception
959 instance containing information about the location of the encoding/decoding
960 error and must return a (replacement, new position) tuple.
961 [clinic start generated code]*/
962 
963 static PyObject *
_codecs_register_error_impl(PyObject * module,const char * errors,PyObject * handler)964 _codecs_register_error_impl(PyObject *module, const char *errors,
965                             PyObject *handler)
966 /*[clinic end generated code: output=fa2f7d1879b3067d input=5e6709203c2e33fe]*/
967 {
968     if (PyCodec_RegisterError(errors, handler))
969         return NULL;
970     Py_RETURN_NONE;
971 }
972 
973 /*[clinic input]
974 _codecs.lookup_error
975     name: str
976     /
977 
978 lookup_error(errors) -> handler
979 
980 Return the error handler for the specified error handling name or raise a
981 LookupError, if no handler exists under this name.
982 [clinic start generated code]*/
983 
984 static PyObject *
_codecs_lookup_error_impl(PyObject * module,const char * name)985 _codecs_lookup_error_impl(PyObject *module, const char *name)
986 /*[clinic end generated code: output=087f05dc0c9a98cc input=4775dd65e6235aba]*/
987 {
988     return PyCodec_LookupError(name);
989 }
990 
991 /* --- Module API --------------------------------------------------------- */
992 
993 static PyMethodDef _codecs_functions[] = {
994     _CODECS_REGISTER_METHODDEF
995     _CODECS_LOOKUP_METHODDEF
996     _CODECS_ENCODE_METHODDEF
997     _CODECS_DECODE_METHODDEF
998     _CODECS_ESCAPE_ENCODE_METHODDEF
999     _CODECS_ESCAPE_DECODE_METHODDEF
1000     _CODECS_UTF_8_ENCODE_METHODDEF
1001     _CODECS_UTF_8_DECODE_METHODDEF
1002     _CODECS_UTF_7_ENCODE_METHODDEF
1003     _CODECS_UTF_7_DECODE_METHODDEF
1004     _CODECS_UTF_16_ENCODE_METHODDEF
1005     _CODECS_UTF_16_LE_ENCODE_METHODDEF
1006     _CODECS_UTF_16_BE_ENCODE_METHODDEF
1007     _CODECS_UTF_16_DECODE_METHODDEF
1008     _CODECS_UTF_16_LE_DECODE_METHODDEF
1009     _CODECS_UTF_16_BE_DECODE_METHODDEF
1010     _CODECS_UTF_16_EX_DECODE_METHODDEF
1011     _CODECS_UTF_32_ENCODE_METHODDEF
1012     _CODECS_UTF_32_LE_ENCODE_METHODDEF
1013     _CODECS_UTF_32_BE_ENCODE_METHODDEF
1014     _CODECS_UTF_32_DECODE_METHODDEF
1015     _CODECS_UTF_32_LE_DECODE_METHODDEF
1016     _CODECS_UTF_32_BE_DECODE_METHODDEF
1017     _CODECS_UTF_32_EX_DECODE_METHODDEF
1018     _CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF
1019     _CODECS_UNICODE_ESCAPE_DECODE_METHODDEF
1020     _CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF
1021     _CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF
1022     _CODECS_LATIN_1_ENCODE_METHODDEF
1023     _CODECS_LATIN_1_DECODE_METHODDEF
1024     _CODECS_ASCII_ENCODE_METHODDEF
1025     _CODECS_ASCII_DECODE_METHODDEF
1026     _CODECS_CHARMAP_ENCODE_METHODDEF
1027     _CODECS_CHARMAP_DECODE_METHODDEF
1028     _CODECS_CHARMAP_BUILD_METHODDEF
1029     _CODECS_READBUFFER_ENCODE_METHODDEF
1030     _CODECS_MBCS_ENCODE_METHODDEF
1031     _CODECS_MBCS_DECODE_METHODDEF
1032     _CODECS_OEM_ENCODE_METHODDEF
1033     _CODECS_OEM_DECODE_METHODDEF
1034     _CODECS_CODE_PAGE_ENCODE_METHODDEF
1035     _CODECS_CODE_PAGE_DECODE_METHODDEF
1036     _CODECS_REGISTER_ERROR_METHODDEF
1037     _CODECS_LOOKUP_ERROR_METHODDEF
1038     _CODECS__FORGET_CODEC_METHODDEF
1039     {NULL, NULL}                /* sentinel */
1040 };
1041 
1042 static struct PyModuleDef codecsmodule = {
1043         PyModuleDef_HEAD_INIT,
1044         "_codecs",
1045         NULL,
1046         -1,
1047         _codecs_functions,
1048         NULL,
1049         NULL,
1050         NULL,
1051         NULL
1052 };
1053 
1054 PyMODINIT_FUNC
PyInit__codecs(void)1055 PyInit__codecs(void)
1056 {
1057         return PyModule_Create(&codecsmodule);
1058 }
1059