1 /* JSON accelerator C extensor: _json module.
2  *
3  * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4  * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5  * platforms. */
6 
7 #if !defined(Py_BUILD_CORE_BUILTIN) && !defined(Py_BUILD_CORE_MODULE)
8 #  error "Py_BUILD_CORE_BUILTIN or Py_BUILD_CORE_MODULE must be defined"
9 #endif
10 
11 #include "Python.h"
12 #include "structmember.h"
13 #include "pycore_accu.h"
14 
15 #ifdef __GNUC__
16 #define UNUSED __attribute__((__unused__))
17 #else
18 #define UNUSED
19 #endif
20 
21 #define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
22 #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
23 #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
24 #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
25 
26 static PyTypeObject PyScannerType;
27 static PyTypeObject PyEncoderType;
28 
29 typedef struct _PyScannerObject {
30     PyObject_HEAD
31     signed char strict;
32     PyObject *object_hook;
33     PyObject *object_pairs_hook;
34     PyObject *parse_float;
35     PyObject *parse_int;
36     PyObject *parse_constant;
37     PyObject *memo;
38 } PyScannerObject;
39 
40 static PyMemberDef scanner_members[] = {
41     {"strict", T_BOOL, offsetof(PyScannerObject, strict), READONLY, "strict"},
42     {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
43     {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
44     {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
45     {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
46     {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
47     {NULL}
48 };
49 
50 typedef struct _PyEncoderObject {
51     PyObject_HEAD
52     PyObject *markers;
53     PyObject *defaultfn;
54     PyObject *encoder;
55     PyObject *indent;
56     PyObject *key_separator;
57     PyObject *item_separator;
58     char sort_keys;
59     char skipkeys;
60     int allow_nan;
61     PyCFunction fast_encode;
62 } PyEncoderObject;
63 
64 static PyMemberDef encoder_members[] = {
65     {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
66     {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
67     {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
68     {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
69     {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
70     {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
71     {"sort_keys", T_BOOL, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
72     {"skipkeys", T_BOOL, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
73     {NULL}
74 };
75 
76 static PyObject *
join_list_unicode(PyObject * lst)77 join_list_unicode(PyObject *lst)
78 {
79     /* return u''.join(lst) */
80     static PyObject *sep = NULL;
81     if (sep == NULL) {
82         sep = PyUnicode_FromStringAndSize("", 0);
83         if (sep == NULL)
84             return NULL;
85     }
86     return PyUnicode_Join(sep, lst);
87 }
88 
89 /* Forward decls */
90 
91 static PyObject *
92 ascii_escape_unicode(PyObject *pystr);
93 static PyObject *
94 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
95 void init_json(void);
96 static PyObject *
97 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
98 static PyObject *
99 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
100 static PyObject *
101 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
102 static void
103 scanner_dealloc(PyObject *self);
104 static int
105 scanner_clear(PyObject *self);
106 static PyObject *
107 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
108 static void
109 encoder_dealloc(PyObject *self);
110 static int
111 encoder_clear(PyObject *self);
112 static int
113 encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc, PyObject *seq, Py_ssize_t indent_level);
114 static int
115 encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc, PyObject *obj, Py_ssize_t indent_level);
116 static int
117 encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc, PyObject *dct, Py_ssize_t indent_level);
118 static PyObject *
119 _encoded_const(PyObject *obj);
120 static void
121 raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
122 static PyObject *
123 encoder_encode_string(PyEncoderObject *s, PyObject *obj);
124 static PyObject *
125 encoder_encode_float(PyEncoderObject *s, PyObject *obj);
126 
127 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
128 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
129 
130 static Py_ssize_t
ascii_escape_unichar(Py_UCS4 c,unsigned char * output,Py_ssize_t chars)131 ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
132 {
133     /* Escape unicode code point c to ASCII escape sequences
134     in char *output. output must have at least 12 bytes unused to
135     accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
136     output[chars++] = '\\';
137     switch (c) {
138         case '\\': output[chars++] = c; break;
139         case '"': output[chars++] = c; break;
140         case '\b': output[chars++] = 'b'; break;
141         case '\f': output[chars++] = 'f'; break;
142         case '\n': output[chars++] = 'n'; break;
143         case '\r': output[chars++] = 'r'; break;
144         case '\t': output[chars++] = 't'; break;
145         default:
146             if (c >= 0x10000) {
147                 /* UTF-16 surrogate pair */
148                 Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
149                 output[chars++] = 'u';
150                 output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
151                 output[chars++] = Py_hexdigits[(v >>  8) & 0xf];
152                 output[chars++] = Py_hexdigits[(v >>  4) & 0xf];
153                 output[chars++] = Py_hexdigits[(v      ) & 0xf];
154                 c = Py_UNICODE_LOW_SURROGATE(c);
155                 output[chars++] = '\\';
156             }
157             output[chars++] = 'u';
158             output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
159             output[chars++] = Py_hexdigits[(c >>  8) & 0xf];
160             output[chars++] = Py_hexdigits[(c >>  4) & 0xf];
161             output[chars++] = Py_hexdigits[(c      ) & 0xf];
162     }
163     return chars;
164 }
165 
166 static PyObject *
ascii_escape_unicode(PyObject * pystr)167 ascii_escape_unicode(PyObject *pystr)
168 {
169     /* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
170     Py_ssize_t i;
171     Py_ssize_t input_chars;
172     Py_ssize_t output_size;
173     Py_ssize_t chars;
174     PyObject *rval;
175     void *input;
176     unsigned char *output;
177     int kind;
178 
179     if (PyUnicode_READY(pystr) == -1)
180         return NULL;
181 
182     input_chars = PyUnicode_GET_LENGTH(pystr);
183     input = PyUnicode_DATA(pystr);
184     kind = PyUnicode_KIND(pystr);
185 
186     /* Compute the output size */
187     for (i = 0, output_size = 2; i < input_chars; i++) {
188         Py_UCS4 c = PyUnicode_READ(kind, input, i);
189         Py_ssize_t d;
190         if (S_CHAR(c)) {
191             d = 1;
192         }
193         else {
194             switch(c) {
195             case '\\': case '"': case '\b': case '\f':
196             case '\n': case '\r': case '\t':
197                 d = 2; break;
198             default:
199                 d = c >= 0x10000 ? 12 : 6;
200             }
201         }
202         if (output_size > PY_SSIZE_T_MAX - d) {
203             PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
204             return NULL;
205         }
206         output_size += d;
207     }
208 
209     rval = PyUnicode_New(output_size, 127);
210     if (rval == NULL) {
211         return NULL;
212     }
213     output = PyUnicode_1BYTE_DATA(rval);
214     chars = 0;
215     output[chars++] = '"';
216     for (i = 0; i < input_chars; i++) {
217         Py_UCS4 c = PyUnicode_READ(kind, input, i);
218         if (S_CHAR(c)) {
219             output[chars++] = c;
220         }
221         else {
222             chars = ascii_escape_unichar(c, output, chars);
223         }
224     }
225     output[chars++] = '"';
226 #ifdef Py_DEBUG
227     assert(_PyUnicode_CheckConsistency(rval, 1));
228 #endif
229     return rval;
230 }
231 
232 static PyObject *
escape_unicode(PyObject * pystr)233 escape_unicode(PyObject *pystr)
234 {
235     /* Take a PyUnicode pystr and return a new escaped PyUnicode */
236     Py_ssize_t i;
237     Py_ssize_t input_chars;
238     Py_ssize_t output_size;
239     Py_ssize_t chars;
240     PyObject *rval;
241     void *input;
242     int kind;
243     Py_UCS4 maxchar;
244 
245     if (PyUnicode_READY(pystr) == -1)
246         return NULL;
247 
248     maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
249     input_chars = PyUnicode_GET_LENGTH(pystr);
250     input = PyUnicode_DATA(pystr);
251     kind = PyUnicode_KIND(pystr);
252 
253     /* Compute the output size */
254     for (i = 0, output_size = 2; i < input_chars; i++) {
255         Py_UCS4 c = PyUnicode_READ(kind, input, i);
256         Py_ssize_t d;
257         switch (c) {
258         case '\\': case '"': case '\b': case '\f':
259         case '\n': case '\r': case '\t':
260             d = 2;
261             break;
262         default:
263             if (c <= 0x1f)
264                 d = 6;
265             else
266                 d = 1;
267         }
268         if (output_size > PY_SSIZE_T_MAX - d) {
269             PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
270             return NULL;
271         }
272         output_size += d;
273     }
274 
275     rval = PyUnicode_New(output_size, maxchar);
276     if (rval == NULL)
277         return NULL;
278 
279     kind = PyUnicode_KIND(rval);
280 
281 #define ENCODE_OUTPUT do { \
282         chars = 0; \
283         output[chars++] = '"'; \
284         for (i = 0; i < input_chars; i++) { \
285             Py_UCS4 c = PyUnicode_READ(kind, input, i); \
286             switch (c) { \
287             case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
288             case '"':  output[chars++] = '\\'; output[chars++] = c; break; \
289             case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
290             case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
291             case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
292             case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
293             case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
294             default: \
295                 if (c <= 0x1f) { \
296                     output[chars++] = '\\'; \
297                     output[chars++] = 'u'; \
298                     output[chars++] = '0'; \
299                     output[chars++] = '0'; \
300                     output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
301                     output[chars++] = Py_hexdigits[(c     ) & 0xf]; \
302                 } else { \
303                     output[chars++] = c; \
304                 } \
305             } \
306         } \
307         output[chars++] = '"'; \
308     } while (0)
309 
310     if (kind == PyUnicode_1BYTE_KIND) {
311         Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
312         ENCODE_OUTPUT;
313     } else if (kind == PyUnicode_2BYTE_KIND) {
314         Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
315         ENCODE_OUTPUT;
316     } else {
317         Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
318         assert(kind == PyUnicode_4BYTE_KIND);
319         ENCODE_OUTPUT;
320     }
321 #undef ENCODE_OUTPUT
322 
323 #ifdef Py_DEBUG
324     assert(_PyUnicode_CheckConsistency(rval, 1));
325 #endif
326     return rval;
327 }
328 
329 static void
raise_errmsg(const char * msg,PyObject * s,Py_ssize_t end)330 raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
331 {
332     /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
333     static PyObject *JSONDecodeError = NULL;
334     PyObject *exc;
335     if (JSONDecodeError == NULL) {
336         PyObject *decoder = PyImport_ImportModule("json.decoder");
337         if (decoder == NULL)
338             return;
339         JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
340         Py_DECREF(decoder);
341         if (JSONDecodeError == NULL)
342             return;
343     }
344     exc = PyObject_CallFunction(JSONDecodeError, "zOn", msg, s, end);
345     if (exc) {
346         PyErr_SetObject(JSONDecodeError, exc);
347         Py_DECREF(exc);
348     }
349 }
350 
351 static void
raise_stop_iteration(Py_ssize_t idx)352 raise_stop_iteration(Py_ssize_t idx)
353 {
354     PyObject *value = PyLong_FromSsize_t(idx);
355     if (value != NULL) {
356         PyErr_SetObject(PyExc_StopIteration, value);
357         Py_DECREF(value);
358     }
359 }
360 
361 static PyObject *
_build_rval_index_tuple(PyObject * rval,Py_ssize_t idx)362 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
363     /* return (rval, idx) tuple, stealing reference to rval */
364     PyObject *tpl;
365     PyObject *pyidx;
366     /*
367     steal a reference to rval, returns (rval, idx)
368     */
369     if (rval == NULL) {
370         return NULL;
371     }
372     pyidx = PyLong_FromSsize_t(idx);
373     if (pyidx == NULL) {
374         Py_DECREF(rval);
375         return NULL;
376     }
377     tpl = PyTuple_New(2);
378     if (tpl == NULL) {
379         Py_DECREF(pyidx);
380         Py_DECREF(rval);
381         return NULL;
382     }
383     PyTuple_SET_ITEM(tpl, 0, rval);
384     PyTuple_SET_ITEM(tpl, 1, pyidx);
385     return tpl;
386 }
387 
388 #define APPEND_OLD_CHUNK \
389     if (chunk != NULL) { \
390         if (chunks == NULL) { \
391             chunks = PyList_New(0); \
392             if (chunks == NULL) { \
393                 goto bail; \
394             } \
395         } \
396         if (PyList_Append(chunks, chunk)) { \
397             Py_CLEAR(chunk); \
398             goto bail; \
399         } \
400         Py_CLEAR(chunk); \
401     }
402 
403 static PyObject *
scanstring_unicode(PyObject * pystr,Py_ssize_t end,int strict,Py_ssize_t * next_end_ptr)404 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
405 {
406     /* Read the JSON string from PyUnicode pystr.
407     end is the index of the first character after the quote.
408     if strict is zero then literal control characters are allowed
409     *next_end_ptr is a return-by-reference index of the character
410         after the end quote
411 
412     Return value is a new PyUnicode
413     */
414     PyObject *rval = NULL;
415     Py_ssize_t len;
416     Py_ssize_t begin = end - 1;
417     Py_ssize_t next /* = begin */;
418     const void *buf;
419     int kind;
420     PyObject *chunks = NULL;
421     PyObject *chunk = NULL;
422 
423     if (PyUnicode_READY(pystr) == -1)
424         return 0;
425 
426     len = PyUnicode_GET_LENGTH(pystr);
427     buf = PyUnicode_DATA(pystr);
428     kind = PyUnicode_KIND(pystr);
429 
430     if (end < 0 || len < end) {
431         PyErr_SetString(PyExc_ValueError, "end is out of bounds");
432         goto bail;
433     }
434     while (1) {
435         /* Find the end of the string or the next escape */
436         Py_UCS4 c = 0;
437         for (next = end; next < len; next++) {
438             c = PyUnicode_READ(kind, buf, next);
439             if (c == '"' || c == '\\') {
440                 break;
441             }
442             else if (c <= 0x1f && strict) {
443                 raise_errmsg("Invalid control character at", pystr, next);
444                 goto bail;
445             }
446         }
447         if (!(c == '"' || c == '\\')) {
448             raise_errmsg("Unterminated string starting at", pystr, begin);
449             goto bail;
450         }
451         /* Pick up this chunk if it's not zero length */
452         if (next != end) {
453             APPEND_OLD_CHUNK
454                 chunk = PyUnicode_FromKindAndData(
455                     kind,
456                     (char*)buf + kind * end,
457                     next - end);
458             if (chunk == NULL) {
459                 goto bail;
460             }
461         }
462         next++;
463         if (c == '"') {
464             end = next;
465             break;
466         }
467         if (next == len) {
468             raise_errmsg("Unterminated string starting at", pystr, begin);
469             goto bail;
470         }
471         c = PyUnicode_READ(kind, buf, next);
472         if (c != 'u') {
473             /* Non-unicode backslash escapes */
474             end = next + 1;
475             switch (c) {
476                 case '"': break;
477                 case '\\': break;
478                 case '/': break;
479                 case 'b': c = '\b'; break;
480                 case 'f': c = '\f'; break;
481                 case 'n': c = '\n'; break;
482                 case 'r': c = '\r'; break;
483                 case 't': c = '\t'; break;
484                 default: c = 0;
485             }
486             if (c == 0) {
487                 raise_errmsg("Invalid \\escape", pystr, end - 2);
488                 goto bail;
489             }
490         }
491         else {
492             c = 0;
493             next++;
494             end = next + 4;
495             if (end >= len) {
496                 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
497                 goto bail;
498             }
499             /* Decode 4 hex digits */
500             for (; next < end; next++) {
501                 Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
502                 c <<= 4;
503                 switch (digit) {
504                     case '0': case '1': case '2': case '3': case '4':
505                     case '5': case '6': case '7': case '8': case '9':
506                         c |= (digit - '0'); break;
507                     case 'a': case 'b': case 'c': case 'd': case 'e':
508                     case 'f':
509                         c |= (digit - 'a' + 10); break;
510                     case 'A': case 'B': case 'C': case 'D': case 'E':
511                     case 'F':
512                         c |= (digit - 'A' + 10); break;
513                     default:
514                         raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
515                         goto bail;
516                 }
517             }
518             /* Surrogate pair */
519             if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
520                 PyUnicode_READ(kind, buf, next++) == '\\' &&
521                 PyUnicode_READ(kind, buf, next++) == 'u') {
522                 Py_UCS4 c2 = 0;
523                 end += 6;
524                 /* Decode 4 hex digits */
525                 for (; next < end; next++) {
526                     Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
527                     c2 <<= 4;
528                     switch (digit) {
529                         case '0': case '1': case '2': case '3': case '4':
530                         case '5': case '6': case '7': case '8': case '9':
531                             c2 |= (digit - '0'); break;
532                         case 'a': case 'b': case 'c': case 'd': case 'e':
533                         case 'f':
534                             c2 |= (digit - 'a' + 10); break;
535                         case 'A': case 'B': case 'C': case 'D': case 'E':
536                         case 'F':
537                             c2 |= (digit - 'A' + 10); break;
538                         default:
539                             raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
540                             goto bail;
541                     }
542                 }
543                 if (Py_UNICODE_IS_LOW_SURROGATE(c2))
544                     c = Py_UNICODE_JOIN_SURROGATES(c, c2);
545                 else
546                     end -= 6;
547             }
548         }
549         APPEND_OLD_CHUNK
550         chunk = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &c, 1);
551         if (chunk == NULL) {
552             goto bail;
553         }
554     }
555 
556     if (chunks == NULL) {
557         if (chunk != NULL)
558             rval = chunk;
559         else
560             rval = PyUnicode_FromStringAndSize("", 0);
561     }
562     else {
563         APPEND_OLD_CHUNK
564         rval = join_list_unicode(chunks);
565         if (rval == NULL) {
566             goto bail;
567         }
568         Py_CLEAR(chunks);
569     }
570 
571     *next_end_ptr = end;
572     return rval;
573 bail:
574     *next_end_ptr = -1;
575     Py_XDECREF(chunks);
576     Py_XDECREF(chunk);
577     return NULL;
578 }
579 
580 PyDoc_STRVAR(pydoc_scanstring,
581     "scanstring(string, end, strict=True) -> (string, end)\n"
582     "\n"
583     "Scan the string s for a JSON string. End is the index of the\n"
584     "character in s after the quote that started the JSON string.\n"
585     "Unescapes all valid JSON string escape sequences and raises ValueError\n"
586     "on attempt to decode an invalid string. If strict is False then literal\n"
587     "control characters are allowed in the string.\n"
588     "\n"
589     "Returns a tuple of the decoded string and the index of the character in s\n"
590     "after the end quote."
591 );
592 
593 static PyObject *
py_scanstring(PyObject * self UNUSED,PyObject * args)594 py_scanstring(PyObject* self UNUSED, PyObject *args)
595 {
596     PyObject *pystr;
597     PyObject *rval;
598     Py_ssize_t end;
599     Py_ssize_t next_end = -1;
600     int strict = 1;
601     if (!PyArg_ParseTuple(args, "On|i:scanstring", &pystr, &end, &strict)) {
602         return NULL;
603     }
604     if (PyUnicode_Check(pystr)) {
605         rval = scanstring_unicode(pystr, end, strict, &next_end);
606     }
607     else {
608         PyErr_Format(PyExc_TypeError,
609                      "first argument must be a string, not %.80s",
610                      Py_TYPE(pystr)->tp_name);
611         return NULL;
612     }
613     return _build_rval_index_tuple(rval, next_end);
614 }
615 
616 PyDoc_STRVAR(pydoc_encode_basestring_ascii,
617     "encode_basestring_ascii(string) -> string\n"
618     "\n"
619     "Return an ASCII-only JSON representation of a Python string"
620 );
621 
622 static PyObject *
py_encode_basestring_ascii(PyObject * self UNUSED,PyObject * pystr)623 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
624 {
625     PyObject *rval;
626     /* Return an ASCII-only JSON representation of a Python string */
627     /* METH_O */
628     if (PyUnicode_Check(pystr)) {
629         rval = ascii_escape_unicode(pystr);
630     }
631     else {
632         PyErr_Format(PyExc_TypeError,
633                      "first argument must be a string, not %.80s",
634                      Py_TYPE(pystr)->tp_name);
635         return NULL;
636     }
637     return rval;
638 }
639 
640 
641 PyDoc_STRVAR(pydoc_encode_basestring,
642     "encode_basestring(string) -> string\n"
643     "\n"
644     "Return a JSON representation of a Python string"
645 );
646 
647 static PyObject *
py_encode_basestring(PyObject * self UNUSED,PyObject * pystr)648 py_encode_basestring(PyObject* self UNUSED, PyObject *pystr)
649 {
650     PyObject *rval;
651     /* Return a JSON representation of a Python string */
652     /* METH_O */
653     if (PyUnicode_Check(pystr)) {
654         rval = escape_unicode(pystr);
655     }
656     else {
657         PyErr_Format(PyExc_TypeError,
658                      "first argument must be a string, not %.80s",
659                      Py_TYPE(pystr)->tp_name);
660         return NULL;
661     }
662     return rval;
663 }
664 
665 static void
scanner_dealloc(PyObject * self)666 scanner_dealloc(PyObject *self)
667 {
668     /* bpo-31095: UnTrack is needed before calling any callbacks */
669     PyObject_GC_UnTrack(self);
670     scanner_clear(self);
671     Py_TYPE(self)->tp_free(self);
672 }
673 
674 static int
scanner_traverse(PyObject * self,visitproc visit,void * arg)675 scanner_traverse(PyObject *self, visitproc visit, void *arg)
676 {
677     PyScannerObject *s;
678     assert(PyScanner_Check(self));
679     s = (PyScannerObject *)self;
680     Py_VISIT(s->object_hook);
681     Py_VISIT(s->object_pairs_hook);
682     Py_VISIT(s->parse_float);
683     Py_VISIT(s->parse_int);
684     Py_VISIT(s->parse_constant);
685     return 0;
686 }
687 
688 static int
scanner_clear(PyObject * self)689 scanner_clear(PyObject *self)
690 {
691     PyScannerObject *s;
692     assert(PyScanner_Check(self));
693     s = (PyScannerObject *)self;
694     Py_CLEAR(s->object_hook);
695     Py_CLEAR(s->object_pairs_hook);
696     Py_CLEAR(s->parse_float);
697     Py_CLEAR(s->parse_int);
698     Py_CLEAR(s->parse_constant);
699     Py_CLEAR(s->memo);
700     return 0;
701 }
702 
703 static PyObject *
_parse_object_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)704 _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
705 {
706     /* Read a JSON object from PyUnicode pystr.
707     idx is the index of the first character after the opening curly brace.
708     *next_idx_ptr is a return-by-reference index to the first character after
709         the closing curly brace.
710 
711     Returns a new PyObject (usually a dict, but object_hook can change that)
712     */
713     void *str;
714     int kind;
715     Py_ssize_t end_idx;
716     PyObject *val = NULL;
717     PyObject *rval = NULL;
718     PyObject *key = NULL;
719     int has_pairs_hook = (s->object_pairs_hook != Py_None);
720     Py_ssize_t next_idx;
721 
722     if (PyUnicode_READY(pystr) == -1)
723         return NULL;
724 
725     str = PyUnicode_DATA(pystr);
726     kind = PyUnicode_KIND(pystr);
727     end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
728 
729     if (has_pairs_hook)
730         rval = PyList_New(0);
731     else
732         rval = PyDict_New();
733     if (rval == NULL)
734         return NULL;
735 
736     /* skip whitespace after { */
737     while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
738 
739     /* only loop if the object is non-empty */
740     if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
741         while (1) {
742             PyObject *memokey;
743 
744             /* read key */
745             if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
746                 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
747                 goto bail;
748             }
749             key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
750             if (key == NULL)
751                 goto bail;
752             memokey = PyDict_GetItemWithError(s->memo, key);
753             if (memokey != NULL) {
754                 Py_INCREF(memokey);
755                 Py_DECREF(key);
756                 key = memokey;
757             }
758             else if (PyErr_Occurred()) {
759                 goto bail;
760             }
761             else {
762                 if (PyDict_SetItem(s->memo, key, key) < 0)
763                     goto bail;
764             }
765             idx = next_idx;
766 
767             /* skip whitespace between key and : delimiter, read :, skip whitespace */
768             while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
769             if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
770                 raise_errmsg("Expecting ':' delimiter", pystr, idx);
771                 goto bail;
772             }
773             idx++;
774             while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
775 
776             /* read any JSON term */
777             val = scan_once_unicode(s, pystr, idx, &next_idx);
778             if (val == NULL)
779                 goto bail;
780 
781             if (has_pairs_hook) {
782                 PyObject *item = PyTuple_Pack(2, key, val);
783                 if (item == NULL)
784                     goto bail;
785                 Py_CLEAR(key);
786                 Py_CLEAR(val);
787                 if (PyList_Append(rval, item) == -1) {
788                     Py_DECREF(item);
789                     goto bail;
790                 }
791                 Py_DECREF(item);
792             }
793             else {
794                 if (PyDict_SetItem(rval, key, val) < 0)
795                     goto bail;
796                 Py_CLEAR(key);
797                 Py_CLEAR(val);
798             }
799             idx = next_idx;
800 
801             /* skip whitespace before } or , */
802             while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
803 
804             /* bail if the object is closed or we didn't get the , delimiter */
805             if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
806                 break;
807             if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
808                 raise_errmsg("Expecting ',' delimiter", pystr, idx);
809                 goto bail;
810             }
811             idx++;
812 
813             /* skip whitespace after , delimiter */
814             while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
815         }
816     }
817 
818     *next_idx_ptr = idx + 1;
819 
820     if (has_pairs_hook) {
821         val = PyObject_CallFunctionObjArgs(s->object_pairs_hook, rval, NULL);
822         Py_DECREF(rval);
823         return val;
824     }
825 
826     /* if object_hook is not None: rval = object_hook(rval) */
827     if (s->object_hook != Py_None) {
828         val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
829         Py_DECREF(rval);
830         return val;
831     }
832     return rval;
833 bail:
834     Py_XDECREF(key);
835     Py_XDECREF(val);
836     Py_XDECREF(rval);
837     return NULL;
838 }
839 
840 static PyObject *
_parse_array_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)841 _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
842     /* Read a JSON array from PyUnicode pystr.
843     idx is the index of the first character after the opening brace.
844     *next_idx_ptr is a return-by-reference index to the first character after
845         the closing brace.
846 
847     Returns a new PyList
848     */
849     void *str;
850     int kind;
851     Py_ssize_t end_idx;
852     PyObject *val = NULL;
853     PyObject *rval;
854     Py_ssize_t next_idx;
855 
856     if (PyUnicode_READY(pystr) == -1)
857         return NULL;
858 
859     rval = PyList_New(0);
860     if (rval == NULL)
861         return NULL;
862 
863     str = PyUnicode_DATA(pystr);
864     kind = PyUnicode_KIND(pystr);
865     end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
866 
867     /* skip whitespace after [ */
868     while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
869 
870     /* only loop if the array is non-empty */
871     if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
872         while (1) {
873 
874             /* read any JSON term  */
875             val = scan_once_unicode(s, pystr, idx, &next_idx);
876             if (val == NULL)
877                 goto bail;
878 
879             if (PyList_Append(rval, val) == -1)
880                 goto bail;
881 
882             Py_CLEAR(val);
883             idx = next_idx;
884 
885             /* skip whitespace between term and , */
886             while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
887 
888             /* bail if the array is closed or we didn't get the , delimiter */
889             if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
890                 break;
891             if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
892                 raise_errmsg("Expecting ',' delimiter", pystr, idx);
893                 goto bail;
894             }
895             idx++;
896 
897             /* skip whitespace after , */
898             while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
899         }
900     }
901 
902     /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
903     if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
904         raise_errmsg("Expecting value", pystr, end_idx);
905         goto bail;
906     }
907     *next_idx_ptr = idx + 1;
908     return rval;
909 bail:
910     Py_XDECREF(val);
911     Py_DECREF(rval);
912     return NULL;
913 }
914 
915 static PyObject *
_parse_constant(PyScannerObject * s,const char * constant,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)916 _parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
917     /* Read a JSON constant.
918     constant is the constant string that was found
919         ("NaN", "Infinity", "-Infinity").
920     idx is the index of the first character of the constant
921     *next_idx_ptr is a return-by-reference index to the first character after
922         the constant.
923 
924     Returns the result of parse_constant
925     */
926     PyObject *cstr;
927     PyObject *rval;
928     /* constant is "NaN", "Infinity", or "-Infinity" */
929     cstr = PyUnicode_InternFromString(constant);
930     if (cstr == NULL)
931         return NULL;
932 
933     /* rval = parse_constant(constant) */
934     rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
935     idx += PyUnicode_GET_LENGTH(cstr);
936     Py_DECREF(cstr);
937     *next_idx_ptr = idx;
938     return rval;
939 }
940 
941 static PyObject *
_match_number_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t start,Py_ssize_t * next_idx_ptr)942 _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
943     /* Read a JSON number from PyUnicode pystr.
944     idx is the index of the first character of the number
945     *next_idx_ptr is a return-by-reference index to the first character after
946         the number.
947 
948     Returns a new PyObject representation of that number:
949         PyLong, or PyFloat.
950         May return other types if parse_int or parse_float are set
951     */
952     void *str;
953     int kind;
954     Py_ssize_t end_idx;
955     Py_ssize_t idx = start;
956     int is_float = 0;
957     PyObject *rval;
958     PyObject *numstr = NULL;
959     PyObject *custom_func;
960 
961     if (PyUnicode_READY(pystr) == -1)
962         return NULL;
963 
964     str = PyUnicode_DATA(pystr);
965     kind = PyUnicode_KIND(pystr);
966     end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
967 
968     /* read a sign if it's there, make sure it's not the end of the string */
969     if (PyUnicode_READ(kind, str, idx) == '-') {
970         idx++;
971         if (idx > end_idx) {
972             raise_stop_iteration(start);
973             return NULL;
974         }
975     }
976 
977     /* read as many integer digits as we find as long as it doesn't start with 0 */
978     if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
979         idx++;
980         while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
981     }
982     /* if it starts with 0 we only expect one integer digit */
983     else if (PyUnicode_READ(kind, str, idx) == '0') {
984         idx++;
985     }
986     /* no integer digits, error */
987     else {
988         raise_stop_iteration(start);
989         return NULL;
990     }
991 
992     /* if the next char is '.' followed by a digit then read all float digits */
993     if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
994         is_float = 1;
995         idx += 2;
996         while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
997     }
998 
999     /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1000     if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
1001         Py_ssize_t e_start = idx;
1002         idx++;
1003 
1004         /* read an exponent sign if present */
1005         if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
1006 
1007         /* read all digits */
1008         while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
1009 
1010         /* if we got a digit, then parse as float. if not, backtrack */
1011         if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
1012             is_float = 1;
1013         }
1014         else {
1015             idx = e_start;
1016         }
1017     }
1018 
1019     if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
1020         custom_func = s->parse_float;
1021     else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
1022         custom_func = s->parse_int;
1023     else
1024         custom_func = NULL;
1025 
1026     if (custom_func) {
1027         /* copy the section we determined to be a number */
1028         numstr = PyUnicode_FromKindAndData(kind,
1029                                            (char*)str + kind * start,
1030                                            idx - start);
1031         if (numstr == NULL)
1032             return NULL;
1033         rval = PyObject_CallFunctionObjArgs(custom_func, numstr, NULL);
1034     }
1035     else {
1036         Py_ssize_t i, n;
1037         char *buf;
1038         /* Straight conversion to ASCII, to avoid costly conversion of
1039            decimal unicode digits (which cannot appear here) */
1040         n = idx - start;
1041         numstr = PyBytes_FromStringAndSize(NULL, n);
1042         if (numstr == NULL)
1043             return NULL;
1044         buf = PyBytes_AS_STRING(numstr);
1045         for (i = 0; i < n; i++) {
1046             buf[i] = (char) PyUnicode_READ(kind, str, i + start);
1047         }
1048         if (is_float)
1049             rval = PyFloat_FromString(numstr);
1050         else
1051             rval = PyLong_FromString(buf, NULL, 10);
1052     }
1053     Py_DECREF(numstr);
1054     *next_idx_ptr = idx;
1055     return rval;
1056 }
1057 
1058 static PyObject *
scan_once_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1059 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1060 {
1061     /* Read one JSON term (of any kind) from PyUnicode pystr.
1062     idx is the index of the first character of the term
1063     *next_idx_ptr is a return-by-reference index to the first character after
1064         the number.
1065 
1066     Returns a new PyObject representation of the term.
1067     */
1068     PyObject *res;
1069     void *str;
1070     int kind;
1071     Py_ssize_t length;
1072 
1073     if (PyUnicode_READY(pystr) == -1)
1074         return NULL;
1075 
1076     str = PyUnicode_DATA(pystr);
1077     kind = PyUnicode_KIND(pystr);
1078     length = PyUnicode_GET_LENGTH(pystr);
1079 
1080     if (idx < 0) {
1081         PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1082         return NULL;
1083     }
1084     if (idx >= length) {
1085         raise_stop_iteration(idx);
1086         return NULL;
1087     }
1088 
1089     switch (PyUnicode_READ(kind, str, idx)) {
1090         case '"':
1091             /* string */
1092             return scanstring_unicode(pystr, idx + 1, s->strict, next_idx_ptr);
1093         case '{':
1094             /* object */
1095             if (Py_EnterRecursiveCall(" while decoding a JSON object "
1096                                       "from a unicode string"))
1097                 return NULL;
1098             res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1099             Py_LeaveRecursiveCall();
1100             return res;
1101         case '[':
1102             /* array */
1103             if (Py_EnterRecursiveCall(" while decoding a JSON array "
1104                                       "from a unicode string"))
1105                 return NULL;
1106             res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1107             Py_LeaveRecursiveCall();
1108             return res;
1109         case 'n':
1110             /* null */
1111             if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
1112                 *next_idx_ptr = idx + 4;
1113                 Py_RETURN_NONE;
1114             }
1115             break;
1116         case 't':
1117             /* true */
1118             if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
1119                 *next_idx_ptr = idx + 4;
1120                 Py_RETURN_TRUE;
1121             }
1122             break;
1123         case 'f':
1124             /* false */
1125             if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1126                 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1127                 PyUnicode_READ(kind, str, idx + 3) == 's' &&
1128                 PyUnicode_READ(kind, str, idx + 4) == 'e') {
1129                 *next_idx_ptr = idx + 5;
1130                 Py_RETURN_FALSE;
1131             }
1132             break;
1133         case 'N':
1134             /* NaN */
1135             if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1136                 PyUnicode_READ(kind, str, idx + 2) == 'N') {
1137                 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1138             }
1139             break;
1140         case 'I':
1141             /* Infinity */
1142             if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1143                 PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1144                 PyUnicode_READ(kind, str, idx + 3) == 'i' &&
1145                 PyUnicode_READ(kind, str, idx + 4) == 'n' &&
1146                 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1147                 PyUnicode_READ(kind, str, idx + 6) == 't' &&
1148                 PyUnicode_READ(kind, str, idx + 7) == 'y') {
1149                 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1150             }
1151             break;
1152         case '-':
1153             /* -Infinity */
1154             if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
1155                 PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1156                 PyUnicode_READ(kind, str, idx + 3) == 'f' &&
1157                 PyUnicode_READ(kind, str, idx + 4) == 'i' &&
1158                 PyUnicode_READ(kind, str, idx + 5) == 'n' &&
1159                 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1160                 PyUnicode_READ(kind, str, idx + 7) == 't' &&
1161                 PyUnicode_READ(kind, str, idx + 8) == 'y') {
1162                 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1163             }
1164             break;
1165     }
1166     /* Didn't find a string, object, array, or named constant. Look for a number. */
1167     return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1168 }
1169 
1170 static PyObject *
scanner_call(PyObject * self,PyObject * args,PyObject * kwds)1171 scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1172 {
1173     /* Python callable interface to scan_once_{str,unicode} */
1174     PyObject *pystr;
1175     PyObject *rval;
1176     Py_ssize_t idx;
1177     Py_ssize_t next_idx = -1;
1178     static char *kwlist[] = {"string", "idx", NULL};
1179     PyScannerObject *s;
1180     assert(PyScanner_Check(self));
1181     s = (PyScannerObject *)self;
1182     if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
1183         return NULL;
1184 
1185     if (PyUnicode_Check(pystr)) {
1186         rval = scan_once_unicode(s, pystr, idx, &next_idx);
1187     }
1188     else {
1189         PyErr_Format(PyExc_TypeError,
1190                  "first argument must be a string, not %.80s",
1191                  Py_TYPE(pystr)->tp_name);
1192         return NULL;
1193     }
1194     PyDict_Clear(s->memo);
1195     if (rval == NULL)
1196         return NULL;
1197     return _build_rval_index_tuple(rval, next_idx);
1198 }
1199 
1200 static PyObject *
scanner_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1201 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1202 {
1203     PyScannerObject *s;
1204     PyObject *ctx;
1205     PyObject *strict;
1206     static char *kwlist[] = {"context", NULL};
1207 
1208     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1209         return NULL;
1210 
1211     s = (PyScannerObject *)type->tp_alloc(type, 0);
1212     if (s == NULL) {
1213         return NULL;
1214     }
1215 
1216     s->memo = PyDict_New();
1217     if (s->memo == NULL)
1218         goto bail;
1219 
1220     /* All of these will fail "gracefully" so we don't need to verify them */
1221     strict = PyObject_GetAttrString(ctx, "strict");
1222     if (strict == NULL)
1223         goto bail;
1224     s->strict = PyObject_IsTrue(strict);
1225     Py_DECREF(strict);
1226     if (s->strict < 0)
1227         goto bail;
1228     s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1229     if (s->object_hook == NULL)
1230         goto bail;
1231     s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1232     if (s->object_pairs_hook == NULL)
1233         goto bail;
1234     s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1235     if (s->parse_float == NULL)
1236         goto bail;
1237     s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1238     if (s->parse_int == NULL)
1239         goto bail;
1240     s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1241     if (s->parse_constant == NULL)
1242         goto bail;
1243 
1244     return (PyObject *)s;
1245 
1246 bail:
1247     Py_DECREF(s);
1248     return NULL;
1249 }
1250 
1251 PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1252 
1253 static
1254 PyTypeObject PyScannerType = {
1255     PyVarObject_HEAD_INIT(NULL, 0)
1256     "_json.Scanner",       /* tp_name */
1257     sizeof(PyScannerObject), /* tp_basicsize */
1258     0,                    /* tp_itemsize */
1259     scanner_dealloc, /* tp_dealloc */
1260     0,                    /* tp_vectorcall_offset */
1261     0,                    /* tp_getattr */
1262     0,                    /* tp_setattr */
1263     0,                    /* tp_as_async */
1264     0,                    /* tp_repr */
1265     0,                    /* tp_as_number */
1266     0,                    /* tp_as_sequence */
1267     0,                    /* tp_as_mapping */
1268     0,                    /* tp_hash */
1269     scanner_call,         /* tp_call */
1270     0,                    /* tp_str */
1271     0,/* PyObject_GenericGetAttr, */                    /* tp_getattro */
1272     0,/* PyObject_GenericSetAttr, */                    /* tp_setattro */
1273     0,                    /* tp_as_buffer */
1274     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,   /* tp_flags */
1275     scanner_doc,          /* tp_doc */
1276     scanner_traverse,                    /* tp_traverse */
1277     scanner_clear,                    /* tp_clear */
1278     0,                    /* tp_richcompare */
1279     0,                    /* tp_weaklistoffset */
1280     0,                    /* tp_iter */
1281     0,                    /* tp_iternext */
1282     0,                    /* tp_methods */
1283     scanner_members,                    /* tp_members */
1284     0,                    /* tp_getset */
1285     0,                    /* tp_base */
1286     0,                    /* tp_dict */
1287     0,                    /* tp_descr_get */
1288     0,                    /* tp_descr_set */
1289     0,                    /* tp_dictoffset */
1290     0,                    /* tp_init */
1291     0,/* PyType_GenericAlloc, */        /* tp_alloc */
1292     scanner_new,          /* tp_new */
1293     0,/* PyObject_GC_Del, */              /* tp_free */
1294 };
1295 
1296 static PyObject *
encoder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1297 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1298 {
1299     static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1300 
1301     PyEncoderObject *s;
1302     PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1303     PyObject *item_separator;
1304     int sort_keys, skipkeys, allow_nan;
1305 
1306     if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist,
1307         &markers, &defaultfn, &encoder, &indent,
1308         &key_separator, &item_separator,
1309         &sort_keys, &skipkeys, &allow_nan))
1310         return NULL;
1311 
1312     if (markers != Py_None && !PyDict_Check(markers)) {
1313         PyErr_Format(PyExc_TypeError,
1314                      "make_encoder() argument 1 must be dict or None, "
1315                      "not %.200s", Py_TYPE(markers)->tp_name);
1316         return NULL;
1317     }
1318 
1319     s = (PyEncoderObject *)type->tp_alloc(type, 0);
1320     if (s == NULL)
1321         return NULL;
1322 
1323     s->markers = markers;
1324     s->defaultfn = defaultfn;
1325     s->encoder = encoder;
1326     s->indent = indent;
1327     s->key_separator = key_separator;
1328     s->item_separator = item_separator;
1329     s->sort_keys = sort_keys;
1330     s->skipkeys = skipkeys;
1331     s->allow_nan = allow_nan;
1332     s->fast_encode = NULL;
1333     if (PyCFunction_Check(s->encoder)) {
1334         PyCFunction f = PyCFunction_GetFunction(s->encoder);
1335         if (f == (PyCFunction)py_encode_basestring_ascii ||
1336                 f == (PyCFunction)py_encode_basestring) {
1337             s->fast_encode = f;
1338         }
1339     }
1340 
1341     Py_INCREF(s->markers);
1342     Py_INCREF(s->defaultfn);
1343     Py_INCREF(s->encoder);
1344     Py_INCREF(s->indent);
1345     Py_INCREF(s->key_separator);
1346     Py_INCREF(s->item_separator);
1347     return (PyObject *)s;
1348 }
1349 
1350 static PyObject *
encoder_call(PyObject * self,PyObject * args,PyObject * kwds)1351 encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1352 {
1353     /* Python callable interface to encode_listencode_obj */
1354     static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1355     PyObject *obj;
1356     Py_ssize_t indent_level;
1357     PyEncoderObject *s;
1358     _PyAccu acc;
1359 
1360     assert(PyEncoder_Check(self));
1361     s = (PyEncoderObject *)self;
1362     if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1363         &obj, &indent_level))
1364         return NULL;
1365     if (_PyAccu_Init(&acc))
1366         return NULL;
1367     if (encoder_listencode_obj(s, &acc, obj, indent_level)) {
1368         _PyAccu_Destroy(&acc);
1369         return NULL;
1370     }
1371     return _PyAccu_FinishAsList(&acc);
1372 }
1373 
1374 static PyObject *
_encoded_const(PyObject * obj)1375 _encoded_const(PyObject *obj)
1376 {
1377     /* Return the JSON string representation of None, True, False */
1378     if (obj == Py_None) {
1379         static PyObject *s_null = NULL;
1380         if (s_null == NULL) {
1381             s_null = PyUnicode_InternFromString("null");
1382         }
1383         Py_XINCREF(s_null);
1384         return s_null;
1385     }
1386     else if (obj == Py_True) {
1387         static PyObject *s_true = NULL;
1388         if (s_true == NULL) {
1389             s_true = PyUnicode_InternFromString("true");
1390         }
1391         Py_XINCREF(s_true);
1392         return s_true;
1393     }
1394     else if (obj == Py_False) {
1395         static PyObject *s_false = NULL;
1396         if (s_false == NULL) {
1397             s_false = PyUnicode_InternFromString("false");
1398         }
1399         Py_XINCREF(s_false);
1400         return s_false;
1401     }
1402     else {
1403         PyErr_SetString(PyExc_ValueError, "not a const");
1404         return NULL;
1405     }
1406 }
1407 
1408 static PyObject *
encoder_encode_float(PyEncoderObject * s,PyObject * obj)1409 encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1410 {
1411     /* Return the JSON representation of a PyFloat. */
1412     double i = PyFloat_AS_DOUBLE(obj);
1413     if (!Py_IS_FINITE(i)) {
1414         if (!s->allow_nan) {
1415             PyErr_SetString(
1416                     PyExc_ValueError,
1417                     "Out of range float values are not JSON compliant"
1418                     );
1419             return NULL;
1420         }
1421         if (i > 0) {
1422             return PyUnicode_FromString("Infinity");
1423         }
1424         else if (i < 0) {
1425             return PyUnicode_FromString("-Infinity");
1426         }
1427         else {
1428             return PyUnicode_FromString("NaN");
1429         }
1430     }
1431     return PyFloat_Type.tp_repr(obj);
1432 }
1433 
1434 static PyObject *
encoder_encode_string(PyEncoderObject * s,PyObject * obj)1435 encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1436 {
1437     /* Return the JSON representation of a string */
1438     PyObject *encoded;
1439 
1440     if (s->fast_encode) {
1441         return s->fast_encode(NULL, obj);
1442     }
1443     encoded = PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1444     if (encoded != NULL && !PyUnicode_Check(encoded)) {
1445         PyErr_Format(PyExc_TypeError,
1446                      "encoder() must return a string, not %.80s",
1447                      Py_TYPE(encoded)->tp_name);
1448         Py_DECREF(encoded);
1449         return NULL;
1450     }
1451     return encoded;
1452 }
1453 
1454 static int
_steal_accumulate(_PyAccu * acc,PyObject * stolen)1455 _steal_accumulate(_PyAccu *acc, PyObject *stolen)
1456 {
1457     /* Append stolen and then decrement its reference count */
1458     int rval = _PyAccu_Accumulate(acc, stolen);
1459     Py_DECREF(stolen);
1460     return rval;
1461 }
1462 
1463 static int
encoder_listencode_obj(PyEncoderObject * s,_PyAccu * acc,PyObject * obj,Py_ssize_t indent_level)1464 encoder_listencode_obj(PyEncoderObject *s, _PyAccu *acc,
1465                        PyObject *obj, Py_ssize_t indent_level)
1466 {
1467     /* Encode Python object obj to a JSON term */
1468     PyObject *newobj;
1469     int rv;
1470 
1471     if (obj == Py_None || obj == Py_True || obj == Py_False) {
1472         PyObject *cstr = _encoded_const(obj);
1473         if (cstr == NULL)
1474             return -1;
1475         return _steal_accumulate(acc, cstr);
1476     }
1477     else if (PyUnicode_Check(obj))
1478     {
1479         PyObject *encoded = encoder_encode_string(s, obj);
1480         if (encoded == NULL)
1481             return -1;
1482         return _steal_accumulate(acc, encoded);
1483     }
1484     else if (PyLong_Check(obj)) {
1485         PyObject *encoded = PyLong_Type.tp_repr(obj);
1486         if (encoded == NULL)
1487             return -1;
1488         return _steal_accumulate(acc, encoded);
1489     }
1490     else if (PyFloat_Check(obj)) {
1491         PyObject *encoded = encoder_encode_float(s, obj);
1492         if (encoded == NULL)
1493             return -1;
1494         return _steal_accumulate(acc, encoded);
1495     }
1496     else if (PyList_Check(obj) || PyTuple_Check(obj)) {
1497         if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1498             return -1;
1499         rv = encoder_listencode_list(s, acc, obj, indent_level);
1500         Py_LeaveRecursiveCall();
1501         return rv;
1502     }
1503     else if (PyDict_Check(obj)) {
1504         if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1505             return -1;
1506         rv = encoder_listencode_dict(s, acc, obj, indent_level);
1507         Py_LeaveRecursiveCall();
1508         return rv;
1509     }
1510     else {
1511         PyObject *ident = NULL;
1512         if (s->markers != Py_None) {
1513             int has_key;
1514             ident = PyLong_FromVoidPtr(obj);
1515             if (ident == NULL)
1516                 return -1;
1517             has_key = PyDict_Contains(s->markers, ident);
1518             if (has_key) {
1519                 if (has_key != -1)
1520                     PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1521                 Py_DECREF(ident);
1522                 return -1;
1523             }
1524             if (PyDict_SetItem(s->markers, ident, obj)) {
1525                 Py_DECREF(ident);
1526                 return -1;
1527             }
1528         }
1529         newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
1530         if (newobj == NULL) {
1531             Py_XDECREF(ident);
1532             return -1;
1533         }
1534 
1535         if (Py_EnterRecursiveCall(" while encoding a JSON object")) {
1536             Py_DECREF(newobj);
1537             Py_XDECREF(ident);
1538             return -1;
1539         }
1540         rv = encoder_listencode_obj(s, acc, newobj, indent_level);
1541         Py_LeaveRecursiveCall();
1542 
1543         Py_DECREF(newobj);
1544         if (rv) {
1545             Py_XDECREF(ident);
1546             return -1;
1547         }
1548         if (ident != NULL) {
1549             if (PyDict_DelItem(s->markers, ident)) {
1550                 Py_XDECREF(ident);
1551                 return -1;
1552             }
1553             Py_XDECREF(ident);
1554         }
1555         return rv;
1556     }
1557 }
1558 
1559 static int
encoder_listencode_dict(PyEncoderObject * s,_PyAccu * acc,PyObject * dct,Py_ssize_t indent_level)1560 encoder_listencode_dict(PyEncoderObject *s, _PyAccu *acc,
1561                         PyObject *dct, Py_ssize_t indent_level)
1562 {
1563     /* Encode Python dict dct a JSON term */
1564     static PyObject *open_dict = NULL;
1565     static PyObject *close_dict = NULL;
1566     static PyObject *empty_dict = NULL;
1567     PyObject *kstr = NULL;
1568     PyObject *ident = NULL;
1569     PyObject *it = NULL;
1570     PyObject *items;
1571     PyObject *item = NULL;
1572     Py_ssize_t idx;
1573 
1574     if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1575         open_dict = PyUnicode_InternFromString("{");
1576         close_dict = PyUnicode_InternFromString("}");
1577         empty_dict = PyUnicode_InternFromString("{}");
1578         if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
1579             return -1;
1580     }
1581     if (PyDict_GET_SIZE(dct) == 0)  /* Fast path */
1582         return _PyAccu_Accumulate(acc, empty_dict);
1583 
1584     if (s->markers != Py_None) {
1585         int has_key;
1586         ident = PyLong_FromVoidPtr(dct);
1587         if (ident == NULL)
1588             goto bail;
1589         has_key = PyDict_Contains(s->markers, ident);
1590         if (has_key) {
1591             if (has_key != -1)
1592                 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1593             goto bail;
1594         }
1595         if (PyDict_SetItem(s->markers, ident, dct)) {
1596             goto bail;
1597         }
1598     }
1599 
1600     if (_PyAccu_Accumulate(acc, open_dict))
1601         goto bail;
1602 
1603     if (s->indent != Py_None) {
1604         /* TODO: DOES NOT RUN */
1605         indent_level += 1;
1606         /*
1607             newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1608             separator = _item_separator + newline_indent
1609             buf += newline_indent
1610         */
1611     }
1612 
1613     items = PyMapping_Items(dct);
1614     if (items == NULL)
1615         goto bail;
1616     if (s->sort_keys && PyList_Sort(items) < 0) {
1617         Py_DECREF(items);
1618         goto bail;
1619     }
1620     it = PyObject_GetIter(items);
1621     Py_DECREF(items);
1622     if (it == NULL)
1623         goto bail;
1624     idx = 0;
1625     while ((item = PyIter_Next(it)) != NULL) {
1626         PyObject *encoded, *key, *value;
1627         if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
1628             PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1629             goto bail;
1630         }
1631         key = PyTuple_GET_ITEM(item, 0);
1632         if (PyUnicode_Check(key)) {
1633             Py_INCREF(key);
1634             kstr = key;
1635         }
1636         else if (PyFloat_Check(key)) {
1637             kstr = encoder_encode_float(s, key);
1638             if (kstr == NULL)
1639                 goto bail;
1640         }
1641         else if (key == Py_True || key == Py_False || key == Py_None) {
1642                         /* This must come before the PyLong_Check because
1643                            True and False are also 1 and 0.*/
1644             kstr = _encoded_const(key);
1645             if (kstr == NULL)
1646                 goto bail;
1647         }
1648         else if (PyLong_Check(key)) {
1649             kstr = PyLong_Type.tp_repr(key);
1650             if (kstr == NULL) {
1651                 goto bail;
1652             }
1653         }
1654         else if (s->skipkeys) {
1655             Py_DECREF(item);
1656             continue;
1657         }
1658         else {
1659             PyErr_Format(PyExc_TypeError,
1660                          "keys must be str, int, float, bool or None, "
1661                          "not %.100s", key->ob_type->tp_name);
1662             goto bail;
1663         }
1664 
1665         if (idx) {
1666             if (_PyAccu_Accumulate(acc, s->item_separator))
1667                 goto bail;
1668         }
1669 
1670         encoded = encoder_encode_string(s, kstr);
1671         Py_CLEAR(kstr);
1672         if (encoded == NULL)
1673             goto bail;
1674         if (_PyAccu_Accumulate(acc, encoded)) {
1675             Py_DECREF(encoded);
1676             goto bail;
1677         }
1678         Py_DECREF(encoded);
1679         if (_PyAccu_Accumulate(acc, s->key_separator))
1680             goto bail;
1681 
1682         value = PyTuple_GET_ITEM(item, 1);
1683         if (encoder_listencode_obj(s, acc, value, indent_level))
1684             goto bail;
1685         idx += 1;
1686         Py_DECREF(item);
1687     }
1688     if (PyErr_Occurred())
1689         goto bail;
1690     Py_CLEAR(it);
1691 
1692     if (ident != NULL) {
1693         if (PyDict_DelItem(s->markers, ident))
1694             goto bail;
1695         Py_CLEAR(ident);
1696     }
1697     /* TODO DOES NOT RUN; dead code
1698     if (s->indent != Py_None) {
1699         indent_level -= 1;
1700 
1701         yield '\n' + (' ' * (_indent * _current_indent_level))
1702     }*/
1703     if (_PyAccu_Accumulate(acc, close_dict))
1704         goto bail;
1705     return 0;
1706 
1707 bail:
1708     Py_XDECREF(it);
1709     Py_XDECREF(item);
1710     Py_XDECREF(kstr);
1711     Py_XDECREF(ident);
1712     return -1;
1713 }
1714 
1715 
1716 static int
encoder_listencode_list(PyEncoderObject * s,_PyAccu * acc,PyObject * seq,Py_ssize_t indent_level)1717 encoder_listencode_list(PyEncoderObject *s, _PyAccu *acc,
1718                         PyObject *seq, Py_ssize_t indent_level)
1719 {
1720     /* Encode Python list seq to a JSON term */
1721     static PyObject *open_array = NULL;
1722     static PyObject *close_array = NULL;
1723     static PyObject *empty_array = NULL;
1724     PyObject *ident = NULL;
1725     PyObject *s_fast = NULL;
1726     Py_ssize_t i;
1727 
1728     if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1729         open_array = PyUnicode_InternFromString("[");
1730         close_array = PyUnicode_InternFromString("]");
1731         empty_array = PyUnicode_InternFromString("[]");
1732         if (open_array == NULL || close_array == NULL || empty_array == NULL)
1733             return -1;
1734     }
1735     ident = NULL;
1736     s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1737     if (s_fast == NULL)
1738         return -1;
1739     if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
1740         Py_DECREF(s_fast);
1741         return _PyAccu_Accumulate(acc, empty_array);
1742     }
1743 
1744     if (s->markers != Py_None) {
1745         int has_key;
1746         ident = PyLong_FromVoidPtr(seq);
1747         if (ident == NULL)
1748             goto bail;
1749         has_key = PyDict_Contains(s->markers, ident);
1750         if (has_key) {
1751             if (has_key != -1)
1752                 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1753             goto bail;
1754         }
1755         if (PyDict_SetItem(s->markers, ident, seq)) {
1756             goto bail;
1757         }
1758     }
1759 
1760     if (_PyAccu_Accumulate(acc, open_array))
1761         goto bail;
1762     if (s->indent != Py_None) {
1763         /* TODO: DOES NOT RUN */
1764         indent_level += 1;
1765         /*
1766             newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1767             separator = _item_separator + newline_indent
1768             buf += newline_indent
1769         */
1770     }
1771     for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1772         PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
1773         if (i) {
1774             if (_PyAccu_Accumulate(acc, s->item_separator))
1775                 goto bail;
1776         }
1777         if (encoder_listencode_obj(s, acc, obj, indent_level))
1778             goto bail;
1779     }
1780     if (ident != NULL) {
1781         if (PyDict_DelItem(s->markers, ident))
1782             goto bail;
1783         Py_CLEAR(ident);
1784     }
1785 
1786     /* TODO: DOES NOT RUN
1787     if (s->indent != Py_None) {
1788         indent_level -= 1;
1789 
1790         yield '\n' + (' ' * (_indent * _current_indent_level))
1791     }*/
1792     if (_PyAccu_Accumulate(acc, close_array))
1793         goto bail;
1794     Py_DECREF(s_fast);
1795     return 0;
1796 
1797 bail:
1798     Py_XDECREF(ident);
1799     Py_DECREF(s_fast);
1800     return -1;
1801 }
1802 
1803 static void
encoder_dealloc(PyObject * self)1804 encoder_dealloc(PyObject *self)
1805 {
1806     /* bpo-31095: UnTrack is needed before calling any callbacks */
1807     PyObject_GC_UnTrack(self);
1808     encoder_clear(self);
1809     Py_TYPE(self)->tp_free(self);
1810 }
1811 
1812 static int
encoder_traverse(PyObject * self,visitproc visit,void * arg)1813 encoder_traverse(PyObject *self, visitproc visit, void *arg)
1814 {
1815     PyEncoderObject *s;
1816     assert(PyEncoder_Check(self));
1817     s = (PyEncoderObject *)self;
1818     Py_VISIT(s->markers);
1819     Py_VISIT(s->defaultfn);
1820     Py_VISIT(s->encoder);
1821     Py_VISIT(s->indent);
1822     Py_VISIT(s->key_separator);
1823     Py_VISIT(s->item_separator);
1824     return 0;
1825 }
1826 
1827 static int
encoder_clear(PyObject * self)1828 encoder_clear(PyObject *self)
1829 {
1830     /* Deallocate Encoder */
1831     PyEncoderObject *s;
1832     assert(PyEncoder_Check(self));
1833     s = (PyEncoderObject *)self;
1834     Py_CLEAR(s->markers);
1835     Py_CLEAR(s->defaultfn);
1836     Py_CLEAR(s->encoder);
1837     Py_CLEAR(s->indent);
1838     Py_CLEAR(s->key_separator);
1839     Py_CLEAR(s->item_separator);
1840     return 0;
1841 }
1842 
1843 PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1844 
1845 static
1846 PyTypeObject PyEncoderType = {
1847     PyVarObject_HEAD_INIT(NULL, 0)
1848     "_json.Encoder",       /* tp_name */
1849     sizeof(PyEncoderObject), /* tp_basicsize */
1850     0,                    /* tp_itemsize */
1851     encoder_dealloc, /* tp_dealloc */
1852     0,                    /* tp_vectorcall_offset */
1853     0,                    /* tp_getattr */
1854     0,                    /* tp_setattr */
1855     0,                    /* tp_as_async */
1856     0,                    /* tp_repr */
1857     0,                    /* tp_as_number */
1858     0,                    /* tp_as_sequence */
1859     0,                    /* tp_as_mapping */
1860     0,                    /* tp_hash */
1861     encoder_call,         /* tp_call */
1862     0,                    /* tp_str */
1863     0,                    /* tp_getattro */
1864     0,                    /* tp_setattro */
1865     0,                    /* tp_as_buffer */
1866     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,   /* tp_flags */
1867     encoder_doc,          /* tp_doc */
1868     encoder_traverse,     /* tp_traverse */
1869     encoder_clear,        /* tp_clear */
1870     0,                    /* tp_richcompare */
1871     0,                    /* tp_weaklistoffset */
1872     0,                    /* tp_iter */
1873     0,                    /* tp_iternext */
1874     0,                    /* tp_methods */
1875     encoder_members,      /* tp_members */
1876     0,                    /* tp_getset */
1877     0,                    /* tp_base */
1878     0,                    /* tp_dict */
1879     0,                    /* tp_descr_get */
1880     0,                    /* tp_descr_set */
1881     0,                    /* tp_dictoffset */
1882     0,                    /* tp_init */
1883     0,                    /* tp_alloc */
1884     encoder_new,          /* tp_new */
1885     0,                    /* tp_free */
1886 };
1887 
1888 static PyMethodDef speedups_methods[] = {
1889     {"encode_basestring_ascii",
1890         (PyCFunction)py_encode_basestring_ascii,
1891         METH_O,
1892         pydoc_encode_basestring_ascii},
1893     {"encode_basestring",
1894         (PyCFunction)py_encode_basestring,
1895         METH_O,
1896         pydoc_encode_basestring},
1897     {"scanstring",
1898         (PyCFunction)py_scanstring,
1899         METH_VARARGS,
1900         pydoc_scanstring},
1901     {NULL, NULL, 0, NULL}
1902 };
1903 
1904 PyDoc_STRVAR(module_doc,
1905 "json speedups\n");
1906 
1907 static struct PyModuleDef jsonmodule = {
1908         PyModuleDef_HEAD_INIT,
1909         "_json",
1910         module_doc,
1911         -1,
1912         speedups_methods,
1913         NULL,
1914         NULL,
1915         NULL,
1916         NULL
1917 };
1918 
1919 PyMODINIT_FUNC
PyInit__json(void)1920 PyInit__json(void)
1921 {
1922     PyObject *m = PyModule_Create(&jsonmodule);
1923     if (!m)
1924         return NULL;
1925     if (PyType_Ready(&PyScannerType) < 0)
1926         goto fail;
1927     if (PyType_Ready(&PyEncoderType) < 0)
1928         goto fail;
1929     Py_INCREF((PyObject*)&PyScannerType);
1930     if (PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType) < 0) {
1931         Py_DECREF((PyObject*)&PyScannerType);
1932         goto fail;
1933     }
1934     Py_INCREF((PyObject*)&PyEncoderType);
1935     if (PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType) < 0) {
1936         Py_DECREF((PyObject*)&PyEncoderType);
1937         goto fail;
1938     }
1939     return m;
1940   fail:
1941     Py_DECREF(m);
1942     return NULL;
1943 }
1944