1 #include "Python.h"
2 
3 #include "cbor.h"
4 
5 #include <math.h>
6 #include <stdint.h>
7 
8 //#include <stdio.h>
9 #include <arpa/inet.h>
10 
11 
12 #ifndef DEBUG_LOGGING
13 // causes things to be written to stderr
14 #define DEBUG_LOGGING 0
15 //#define DEBUG_LOGGING 1
16 #endif
17 
18 
19 #ifdef Py_InitModule
20 // Python 2.7
21 
22 #define HAS_FILE_READER 1
23 #define IS_PY3 0
24 
25 #else
26 
27 #define HAS_FILE_READER 0
28 #define IS_PY3 1
29 
30 #endif
31 
32 typedef struct {
33     unsigned int sort_keys;
34 } EncodeOptions;
35 
36 // Hey Look! It's a polymorphic object structure in C!
37 
38 // read(, len): read len bytes and return in buffer, or NULL on error
39 // read1(, uint8_t*): read one byte and return 0 on success
40 // return_buffer(, *): release result of read(, len)
41 // delete(): destructor. free thiz and contents.
42 #define READER_FUNCTIONS \
43     void* (*read)(void* self, Py_ssize_t len); \
44     int (*read1)(void* self, uint8_t* oneByte); \
45     void (*return_buffer)(void* self, void* buffer); \
46     void (*delete)(void* self);
47 
48 #define SET_READER_FUNCTIONS(thiz, clazz) (thiz)->read = clazz##_read;\
49     (thiz)->read1 = clazz##_read1;\
50     (thiz)->return_buffer = clazz##_return_buffer;\
51     (thiz)->delete = clazz##_delete;
52 
53 typedef struct _Reader {
54     READER_FUNCTIONS;
55 } Reader;
56 
57 static Reader* NewBufferReader(PyObject* ob);
58 static Reader* NewObjectReader(PyObject* ob);
59 #if HAS_FILE_READER
60 static Reader* NewFileReader(PyObject* ob);
61 #endif
62 
63 
64 static PyObject* loads_tag(Reader* rin, uint64_t aux);
65 static int loads_kv(PyObject* out, Reader* rin);
66 
67 typedef struct VarBufferPart {
68     void* start;
69     uint64_t len;
70     struct VarBufferPart* next;
71 } VarBufferPart;
72 
73 
logprintf(const char * fmt,...)74 static int logprintf(const char* fmt, ...) {
75     va_list ap;
76     int ret;
77     va_start(ap, fmt);
78 #if DEBUG_LOGGING
79     ret = vfprintf(stderr, fmt, ap);
80 #else
81     ret = 0;
82 #endif
83     va_end(ap);
84     return ret;
85 }
86 
87 // TODO: portably work this out at compile time
88 static int _is_big_endian = 0;
89 
is_big_endian(void)90 static int is_big_endian(void) {
91     uint32_t val = 1234;
92     _is_big_endian = val == htonl(val);
93     //logprintf("is_big_endian=%d\n", _is_big_endian);
94     return _is_big_endian;
95 }
96 
97 
decodeFloat16(Reader * rin)98 PyObject* decodeFloat16(Reader* rin) {
99     // float16 parsing adapted from example code in spec
100     uint8_t hibyte, lobyte;// = raw[pos];
101     int err;
102     int exp;
103     int mant;
104     double val;
105 
106     err = rin->read1(rin, &hibyte);
107     if (err) { logprintf("fail in float16[0]\n"); return NULL; }
108     err = rin->read1(rin, &lobyte);
109     if (err) { logprintf("fail in float16[1]\n"); return NULL; }
110 
111     exp = (hibyte >> 2) & 0x1f;
112     mant = ((hibyte & 0x3) << 8) | lobyte;
113     if (exp == 0) {
114 	val = ldexp(mant, -24);
115     } else if (exp != 31) {
116 	val = ldexp(mant + 1024, exp - 25);
117     } else {
118 	val = mant == 0 ? INFINITY : NAN;
119     }
120     if (hibyte & 0x80) {
121 	val = -val;
122     }
123     return PyFloat_FromDouble(val);
124 }
decodeFloat32(Reader * rin)125 PyObject* decodeFloat32(Reader* rin) {
126     float val;
127     uint8_t* raw = rin->read(rin, 4);
128     if (!raw) { logprintf("fail in float32\n"); return NULL; }
129     if (_is_big_endian) {
130 	// easy!
131 	val = *((float*)raw);
132     } else {
133 	uint8_t* dest = (uint8_t*)(&val);
134 	dest[3] = raw[0];
135 	dest[2] = raw[1];
136 	dest[1] = raw[2];
137 	dest[0] = raw[3];
138     }
139     rin->return_buffer(rin, raw);
140     return PyFloat_FromDouble(val);
141 }
decodeFloat64(Reader * rin)142 PyObject* decodeFloat64(Reader* rin) {
143     int si;
144     uint64_t aux = 0;
145     uint8_t* raw = rin->read(rin, 8);
146     if (!raw) { logprintf("fail in float64\n"); return NULL; }
147     for (si = 0; si < 8; si++) {
148 	aux = aux << 8;
149 	aux |= raw[si];
150     }
151     rin->return_buffer(rin, raw);
152     return PyFloat_FromDouble(*((double*)(&aux)));
153 }
154 
155 // parse following int value into *auxP
156 // return 0 on success, -1 on fail
handle_info_bits(Reader * rin,uint8_t cbor_info,uint64_t * auxP)157 static int handle_info_bits(Reader* rin, uint8_t cbor_info, uint64_t* auxP) {
158     uint64_t aux;
159 
160     if (cbor_info <= 23) {
161 	// literal value <=23
162 	aux = cbor_info;
163     } else if (cbor_info == CBOR_UINT8_FOLLOWS) {
164 	uint8_t taux;
165 	if (rin->read1(rin, &taux)) { logprintf("fail in uint8\n"); return -1; }
166 	aux = taux;
167     } else if (cbor_info == CBOR_UINT16_FOLLOWS) {
168 	uint8_t hibyte, lobyte;
169 	if (rin->read1(rin, &hibyte)) { logprintf("fail in uint16[0]\n"); return -1; }
170 	if (rin->read1(rin, &lobyte)) { logprintf("fail in uint16[1]\n"); return -1; }
171 	aux = (hibyte << 8) | lobyte;
172     } else if (cbor_info == CBOR_UINT32_FOLLOWS) {
173 	uint8_t* raw = (uint8_t*)rin->read(rin, 4);
174 	if (!raw) { logprintf("fail in uint32[1]\n"); return -1; }
175 	aux =
176             (((uint64_t)raw[0]) << 24) |
177 	    (((uint64_t)raw[1]) << 16) |
178 	    (((uint64_t)raw[2]) <<  8) |
179 	    ((uint64_t)raw[3]);
180 	rin->return_buffer(rin, raw);
181     } else if (cbor_info == CBOR_UINT64_FOLLOWS) {
182         int si;
183 	uint8_t* raw = (uint8_t*)rin->read(rin, 8);
184 	if (!raw) { logprintf("fail in uint64[1]\n"); return -1; }
185 	aux = 0;
186 	for (si = 0; si < 8; si++) {
187 	    aux = aux << 8;
188 	    aux |= raw[si];
189 	}
190 	rin->return_buffer(rin, raw);
191     } else {
192 	aux = 0;
193     }
194     *auxP = aux;
195     return 0;
196 }
197 
198 static PyObject* inner_loads_c(Reader* rin, uint8_t c);
199 
inner_loads(Reader * rin)200 static PyObject* inner_loads(Reader* rin) {
201     uint8_t c;
202     int err;
203 
204     err = rin->read1(rin, &c);
205     if (err) { logprintf("fail in loads tag\n"); return NULL; }
206     return inner_loads_c(rin, c);
207 }
208 
inner_loads_c(Reader * rin,uint8_t c)209 PyObject* inner_loads_c(Reader* rin, uint8_t c) {
210     uint8_t cbor_type;
211     uint8_t cbor_info;
212     uint64_t aux;
213 
214     cbor_type = c & CBOR_TYPE_MASK;
215     cbor_info = c & CBOR_INFO_BITS;
216 
217 #if 0
218     if (pos > len) {
219 	PyErr_SetString(PyExc_ValueError, "misparse, token went longer than buffer");
220 	return NULL;
221     }
222 
223     pos += 1;
224 #endif
225 
226     if (cbor_type == CBOR_7) {
227 	if (cbor_info == CBOR_UINT16_FOLLOWS) { // float16
228 	    return decodeFloat16(rin);
229 	} else if (cbor_info == CBOR_UINT32_FOLLOWS) { // float32
230 	    return decodeFloat32(rin);
231 	} else if (cbor_info == CBOR_UINT64_FOLLOWS) {  // float64
232 	    return decodeFloat64(rin);
233 	}
234 	// not a float, fall through to other CBOR_7 interpretations
235     }
236     if (handle_info_bits(rin, cbor_info, &aux)) { logprintf("info bits failed\n"); return NULL; }
237 
238     PyObject* out = NULL;
239     switch (cbor_type) {
240     case CBOR_UINT:
241 	out = PyLong_FromUnsignedLongLong(aux);
242         if (out == NULL) {
243             PyErr_SetString(PyExc_RuntimeError, "unknown error decoding UINT");
244         }
245         return out;
246     case CBOR_NEGINT:
247 	if (aux > 0x7fffffffffffffff) {
248 	    PyObject* bignum = PyLong_FromUnsignedLongLong(aux);
249 	    PyObject* minusOne = PyLong_FromLong(-1);
250 	    out = PyNumber_Subtract(minusOne, bignum);
251 	    Py_DECREF(minusOne);
252 	    Py_DECREF(bignum);
253 	} else {
254 	    out = PyLong_FromLongLong((long long)(((long long)-1) - aux));
255 	}
256         if (out == NULL) {
257             PyErr_SetString(PyExc_RuntimeError, "unknown error decoding NEGINT");
258         }
259         return out;
260     case CBOR_BYTES:
261 	if (cbor_info == CBOR_VAR_FOLLOWS) {
262 	    size_t total = 0;
263 	    VarBufferPart* parts = NULL;
264 	    VarBufferPart* parts_tail = NULL;
265 	    uint8_t sc;
266 	    if (rin->read1(rin, &sc)) { logprintf("r1 fail in var bytes tag\n"); return NULL; }
267 	    while (sc != CBOR_BREAK) {
268 		uint8_t scbor_type = sc & CBOR_TYPE_MASK;
269 		uint8_t scbor_info = sc & CBOR_INFO_BITS;
270 		uint64_t saux;
271 		void* blob;
272 
273 		if (scbor_type != CBOR_BYTES) {
274 		    PyErr_Format(PyExc_ValueError, "expected subordinate BYTES block under VAR BYTES, but got %x", scbor_type);
275 		    return NULL;
276 		}
277 		if(handle_info_bits(rin, scbor_info, &saux)) { logprintf("var bytes sub infobits failed\n"); return NULL; }
278 		blob = rin->read(rin, saux);
279 		if (!blob) { logprintf("var bytes sub bytes read failed\n"); return NULL; }
280 		if (parts_tail == NULL) {
281 		    parts = parts_tail = (VarBufferPart*)PyMem_Malloc(sizeof(VarBufferPart) + saux);
282 		} else {
283 		    parts_tail->next = (VarBufferPart*)PyMem_Malloc(sizeof(VarBufferPart) + saux);
284 		    parts_tail = parts_tail->next;
285 		}
286                 parts_tail->start = (void*)(parts_tail + 1);
287                 memcpy(parts_tail->start, blob, saux);
288                 rin->return_buffer(rin, blob);
289 		parts_tail->len = saux;
290 		parts_tail->next = NULL;
291 		total += saux;
292 		if (rin->read1(rin, &sc)) { logprintf("r1 fail in var bytes tag\n"); return NULL; }
293 	    }
294 	    // Done
295 	    {
296 		uint8_t* allbytes = (uint8_t*)PyMem_Malloc(total);
297 		uintptr_t op = 0;
298 		while (parts != NULL) {
299 		    VarBufferPart* next;
300 		    memcpy(allbytes + op, parts->start, parts->len);
301 		    op += parts->len;
302 		    next = parts->next;
303 		    PyMem_Free(parts);
304 		    parts = next;
305 		}
306 		out = PyBytes_FromStringAndSize((char*)allbytes, total);
307 		PyMem_Free(allbytes);
308 	    }
309             if (out == NULL) {
310                 PyErr_SetString(PyExc_RuntimeError, "unknown error decoding VAR BYTES");
311             }
312 	} else {
313 	    void* raw;
314 	    if (aux == 0) {
315 		static void* empty_string = "";
316 		raw = empty_string;
317 	    } else {
318 		raw = rin->read(rin, aux);
319 		if (!raw) { logprintf("bytes read failed\n"); return NULL; }
320 	    }
321 	    out = PyBytes_FromStringAndSize(raw, (Py_ssize_t)aux);
322             if (out == NULL) {
323                 PyErr_SetString(PyExc_RuntimeError, "unknown error decoding BYTES");
324             }
325             if (aux != 0) {
326                 rin->return_buffer(rin, raw);
327             }
328 	}
329         return out;
330     case CBOR_TEXT:
331 	if (cbor_info == CBOR_VAR_FOLLOWS) {
332 	    PyObject* parts = PyList_New(0);
333 	    PyObject* joiner = PyUnicode_FromString("");
334 	    uint8_t sc;
335 	    if (rin->read1(rin, &sc)) { logprintf("r1 fail in var text tag\n"); return NULL; }
336 	    while (sc != CBOR_BREAK) {
337 		PyObject* subitem = inner_loads_c(rin, sc);
338 		if (subitem == NULL) { logprintf("fail in var text subitem\n"); return NULL; }
339 		PyList_Append(parts, subitem);
340                 Py_DECREF(subitem);
341 		if (rin->read1(rin, &sc)) { logprintf("r1 fail in var text tag\n"); return NULL; }
342 	    }
343 	    // Done
344 	    out = PyUnicode_Join(joiner, parts);
345 	    Py_DECREF(joiner);
346 	    Py_DECREF(parts);
347             if (out == NULL) {
348                 PyErr_SetString(PyExc_RuntimeError, "unknown error decoding VAR TEXT");
349             }
350 	} else {
351             void* raw;
352 	    if (aux == 0) {
353 		static void* empty_string = "";
354 		raw = empty_string;
355 	    } else {
356                 raw = rin->read(rin, aux);
357                 if (!raw) { logprintf("read text failed\n"); return NULL; }
358             }
359 	    out = PyUnicode_FromStringAndSize((char*)raw, (Py_ssize_t)aux);
360             if (out == NULL) {
361                 PyErr_SetString(PyExc_RuntimeError, "unknown error decoding TEXT");
362             }
363             if (aux != 0) {
364                 rin->return_buffer(rin, raw);
365             }
366 	}
367         return out;
368     case CBOR_ARRAY:
369 	if (cbor_info == CBOR_VAR_FOLLOWS) {
370 	    uint8_t sc;
371 	    out = PyList_New(0);
372 	    if (rin->read1(rin, &sc)) { logprintf("r1 fail in var array tag\n"); return NULL; }
373 	    while (sc != CBOR_BREAK) {
374 		PyObject* subitem = inner_loads_c(rin, sc);
375 		if (subitem == NULL) { logprintf("fail in var array subitem\n"); return NULL; }
376 		PyList_Append(out, subitem);
377                 Py_DECREF(subitem);
378 		if (rin->read1(rin, &sc)) { logprintf("r1 fail in var array tag\n"); return NULL; }
379 	    }
380 	    // Done
381             if (out == NULL) {
382                 PyErr_SetString(PyExc_RuntimeError, "unknown error decoding VAR ARRAY");
383             }
384 	} else {
385             unsigned int i;
386 	    out = PyList_New((Py_ssize_t)aux);
387 	    for (i = 0; i < aux; i++) {
388 		PyObject* subitem = inner_loads(rin);
389 		if (subitem == NULL) { logprintf("array subitem[%d] (of %d) failed\n", i, aux); return NULL; }
390 		PyList_SetItem(out, (Py_ssize_t)i, subitem);
391                 // PyList_SetItem became the owner of the reference count of subitem, we don't need to DECREF it
392 	    }
393             if (out == NULL) {
394                 PyErr_SetString(PyExc_RuntimeError, "unknown error decoding ARRAY");
395             }
396 	}
397         return out;
398     case CBOR_MAP:
399 	out = PyDict_New();
400 	if (cbor_info == CBOR_VAR_FOLLOWS) {
401 	    uint8_t sc;
402 	    if (rin->read1(rin, &sc)) { logprintf("r1 fail in var map tag\n"); return NULL; }
403 	    while (sc != CBOR_BREAK) {
404 		PyObject* key = inner_loads_c(rin, sc);
405 		PyObject* value;
406 		if (key == NULL) { logprintf("var map key fail\n"); return NULL; }
407 		value = inner_loads(rin);
408 		if (value == NULL) { logprintf("var map val vail\n"); return NULL; }
409 		PyDict_SetItem(out, key, value);
410                 Py_DECREF(key);
411                 Py_DECREF(value);
412 
413 		if (rin->read1(rin, &sc)) { logprintf("r1 fail in var map tag\n"); return NULL; }
414 	    }
415             if (out == NULL) {
416                 PyErr_SetString(PyExc_RuntimeError, "unknown error decoding VAR MAP");
417             }
418 	} else {
419             unsigned int i;
420 	    for (i = 0; i < aux; i++) {
421 		if (loads_kv(out, rin) != 0) {
422 		    logprintf("map kv[%d] failed\n", i);
423 		    return NULL;
424 		}
425 	    }
426             if (out == NULL) {
427                 PyErr_SetString(PyExc_RuntimeError, "unknown error decoding MAP");
428             }
429 	}
430         return out;
431     case CBOR_TAG:
432 	return loads_tag(rin, aux);
433     case CBOR_7:
434 	if (aux == 20) {
435 	    out = Py_False;
436 	    Py_INCREF(out);
437 	} else if (aux == 21) {
438 	    out = Py_True;
439 	    Py_INCREF(out);
440 	} else if (aux == 22) {
441 	    out = Py_None;
442 	    Py_INCREF(out);
443 	} else if (aux == 23) {
444             // js `undefined`, closest is py None
445 	    out = Py_None;
446 	    Py_INCREF(out);
447 	}
448         if (out == NULL) {
449             PyErr_Format(PyExc_ValueError, "unknown section 7 marker %02x, aux=%llu", c, aux);
450         }
451         return out;
452     default:
453         PyErr_Format(PyExc_RuntimeError, "unknown cbor marker %02x", c);
454         return NULL;
455     }
456 #pragma GCC diagnostic push
457 #pragma GCC diagnostic ignored "-Wunreachable-code"
458     PyErr_SetString(PyExc_RuntimeError, "cbor library internal error moof!");
459     return NULL;
460 #pragma GCC diagnostic pop
461 }
462 
loads_kv(PyObject * out,Reader * rin)463 static int loads_kv(PyObject* out, Reader* rin) {
464     PyObject* key = inner_loads(rin);
465     PyObject* value;
466     if (key == NULL) { logprintf("map key fail\n"); return -1; }
467     value = inner_loads(rin);
468     if (value == NULL) { logprintf("map val fail\n"); return -1; }
469     PyDict_SetItem(out, key, value);
470     Py_DECREF(key);
471     Py_DECREF(value);
472     return 0;
473 }
474 
loads_bignum(Reader * rin,uint8_t c)475 static PyObject* loads_bignum(Reader* rin, uint8_t c) {
476     PyObject* out = NULL;
477 
478     uint8_t bytes_info = c & CBOR_INFO_BITS;
479     if (bytes_info < 24) {
480         int i;
481 	PyObject* eight = PyLong_FromLong(8);
482 	out = PyLong_FromLong(0);
483 	for (i = 0; i < bytes_info; i++) {
484 	    // TODO: is this leaking like crazy?
485 	    PyObject* curbyte;
486 	    PyObject* tout = PyNumber_Lshift(out, eight);
487 	    Py_DECREF(out);
488 	    out = tout;
489 	    uint8_t cb;
490 	    if (rin->read1(rin, &cb)) {
491                 logprintf("r1 fail in bignum %d/%d\n", i, bytes_info);
492                 Py_DECREF(eight);
493                 Py_DECREF(out);
494                 return NULL;
495             }
496 	    curbyte = PyLong_FromLong(cb);
497 	    tout = PyNumber_Or(out, curbyte);
498 	    Py_DECREF(curbyte);
499 	    Py_DECREF(out);
500 	    out = tout;
501 	}
502         Py_DECREF(eight);
503 	return out;
504     } else {
505 	PyErr_Format(PyExc_NotImplementedError, "TODO: TAG BIGNUM for bigger bignum bytes_info=%d, len(ull)=%lu\n", bytes_info, sizeof(unsigned long long));
506 	return NULL;
507     }
508 }
509 
510 
511 // returns a PyObject for cbor.cbor.Tag
512 // Returned PyObject* is a BORROWED reference from the module dict
getCborTagClass(void)513 static PyObject* getCborTagClass(void) {
514     PyObject* cbor_module = PyImport_ImportModule("cbor.cbor");
515     PyObject* moddict = PyModule_GetDict(cbor_module);
516     PyObject* tag_class = PyDict_GetItemString(moddict, "Tag");
517     // moddict and tag_class are 'borrowed reference'
518     Py_DECREF(cbor_module);
519 
520     return tag_class;
521 }
522 
523 
loads_tag(Reader * rin,uint64_t aux)524 static PyObject* loads_tag(Reader* rin, uint64_t aux) {
525     PyObject* out = NULL;
526     // return an object CBORTag(tagnum, nextob)
527     if (aux == CBOR_TAG_BIGNUM) {
528 	// If the next object is bytes, interpret it here without making a PyObject for it.
529 	uint8_t sc;
530 	if (rin->read1(rin, &sc)) { logprintf("r1 fail in bignum tag\n"); return NULL; }
531 	if ((sc & CBOR_TYPE_MASK) == CBOR_BYTES) {
532 	    return loads_bignum(rin, sc);
533 	} else {
534 	    PyErr_Format(PyExc_ValueError, "TAG BIGNUM not followed by bytes but %02x", sc);
535 	    return NULL;
536 	}
537 #pragma GCC diagnostic push
538 #pragma GCC diagnostic ignored "-Wunreachable-code"
539 	PyErr_Format(PyExc_ValueError, "TODO: WRITEME CBOR TAG BIGNUM %02x ...\n", sc);
540 	return NULL;
541 #pragma GCC diagnostic pop
542     } else if (aux == CBOR_TAG_NEGBIGNUM) {
543 	// If the next object is bytes, interpret it here without making a PyObject for it.
544 	uint8_t sc;
545 	if (rin->read1(rin, &sc)) { logprintf("r1 fail in negbignum tag\n"); return NULL; }
546 	if ((sc & CBOR_TYPE_MASK) == CBOR_BYTES) {
547 	    out = loads_bignum(rin, sc);
548             if (out == NULL) { logprintf("loads_bignum fail inside TAG_NEGBIGNUM\n"); return NULL; }
549             PyObject* minusOne = PyLong_FromLong(-1);
550             PyObject* tout = PyNumber_Subtract(minusOne, out);
551             Py_DECREF(minusOne);
552             Py_DECREF(out);
553             out = tout;
554             return out;
555 	} else {
556 	    PyErr_Format(PyExc_ValueError, "TAG NEGBIGNUM not followed by bytes but %02x", sc);
557 	    return NULL;
558 	}
559 #pragma GCC diagnostic push
560 #pragma GCC diagnostic ignored "-Wunreachable-code"
561 	PyErr_Format(PyExc_ValueError, "TODO: WRITEME CBOR TAG NEGBIGNUM %02x ...\n", sc);
562 	return NULL;
563 #pragma GCC diagnostic pop
564     }
565     out = inner_loads(rin);
566     if (out == NULL) { return NULL; }
567     {
568         PyObject* tag_class = getCborTagClass();
569 	PyObject* args = Py_BuildValue("(K,O)", aux, out);
570         PyObject* tout = PyObject_CallObject(tag_class, args);
571 	Py_DECREF(args);
572 	Py_DECREF(out);
573         // tag_class was just a borrowed reference
574 	out = tout;
575     }
576     return out;
577 }
578 
579 
580 static PyObject*
cbor_loads(PyObject * noself,PyObject * args)581 cbor_loads(PyObject* noself, PyObject* args) {
582     PyObject* ob;
583     is_big_endian();
584     if (PyType_IsSubtype(Py_TYPE(args), &PyList_Type)) {
585 	ob = PyList_GetItem(args, 0);
586     } else if (PyType_IsSubtype(Py_TYPE(args), &PyTuple_Type)) {
587 	ob = PyTuple_GetItem(args, 0);
588     } else {
589 	PyErr_Format(PyExc_ValueError, "args not list or tuple: %R\n", args);
590 	return NULL;
591     }
592 
593     if (ob == Py_None) {
594 	PyErr_SetString(PyExc_ValueError, "got None for buffer to decode in loads");
595 	return NULL;
596     }
597 
598     {
599         PyObject* out = NULL;
600 	Reader* r = NewBufferReader(ob);
601 	if (!r) {
602 	    return NULL;
603 	}
604 	out = inner_loads(r);
605         r->delete(r);
606         return out;
607     }
608 }
609 
610 
611 #if HAS_FILE_READER
612 
613 typedef struct _FileReader {
614     READER_FUNCTIONS;
615     FILE* fin;
616     void* dst;
617     Py_ssize_t dst_size;
618     Py_ssize_t read_count;
619 } FileReader;
620 
621 // read from a python builtin file which contains a C FILE*
FileReader_read(void * self,Py_ssize_t len)622 static void* FileReader_read(void* self, Py_ssize_t len) {
623     FileReader* thiz = (FileReader*)self;
624     Py_ssize_t rtotal = 0;
625     uintptr_t opos;
626     //logprintf("file read %d\n", len);
627     if (len > thiz->dst_size) {
628 	thiz->dst = PyMem_Realloc(thiz->dst, len);
629 	thiz->dst_size = len;
630     } else if ((thiz->dst_size > (128 * 1024)) && (len < 4096)) {
631 	PyMem_Free(thiz->dst);
632 	thiz->dst = PyMem_Malloc(len);
633 	thiz->dst_size = len;
634     }
635     opos = (uintptr_t)(thiz->dst);
636     while (1) {
637 	size_t rlen = fread((void*)opos, 1, len, thiz->fin);
638 	if (rlen == 0) {
639 	    // file isn't going to give any more
640 	    PyErr_Format(PyExc_ValueError, "only got %zd bytes with %zd stil to read from file", rtotal, len);
641 	    PyMem_Free(thiz->dst);
642 	    thiz->dst = NULL;
643             thiz->dst_size = 0;
644 	    return NULL;
645 	}
646 	thiz->read_count += rlen;
647 	rtotal += rlen;
648 	opos += rlen;
649 	len -= rlen;
650 	if (rtotal >= len) {
651             if (thiz->dst == NULL) {
652                 PyErr_SetString(PyExc_RuntimeError, "known error in file reader, NULL dst");
653                 return NULL;
654             }
655 	    return thiz->dst;
656 	}
657     }
658 }
FileReader_read1(void * self,uint8_t * oneByte)659 static int FileReader_read1(void* self, uint8_t* oneByte) {
660     FileReader* thiz = (FileReader*)self;
661     size_t didread = fread((void*)oneByte, 1, 1, thiz->fin);
662     if (didread == 0) {
663 	logprintf("failed to read 1 from file\n");
664 	PyErr_SetString(PyExc_ValueError, "got nothing reading 1 from file");
665 	return -1;
666     }
667     thiz->read_count++;
668     return 0;
669 }
FileReader_return_buffer(void * self,void * buffer)670 static void FileReader_return_buffer(void* self, void* buffer) {
671     // Nothing to do, we hold onto the buffer and maybe reuse it for next read
672 }
FileReader_delete(void * self)673 static void FileReader_delete(void* self) {
674     FileReader* thiz = (FileReader*)self;
675     if (thiz->dst) {
676 	PyMem_Free(thiz->dst);
677     }
678     PyMem_Free(thiz);
679 }
NewFileReader(PyObject * ob)680 static Reader* NewFileReader(PyObject* ob) {
681     FileReader* fr = (FileReader*)PyMem_Malloc(sizeof(FileReader));
682     if (fr == NULL) {
683         PyErr_SetString(PyExc_MemoryError, "failed to allocate FileReader");
684         return NULL;
685     }
686     fr->fin = PyFile_AsFile(ob);
687     if (fr->fin == NULL) {
688         PyErr_SetString(PyExc_RuntimeError, "PyFile_AsFile NULL");
689         PyMem_Free(fr);
690         return NULL;
691     }
692     fr->dst = NULL;
693     fr->dst_size = 0;
694     fr->read_count = 0;
695     SET_READER_FUNCTIONS(fr, FileReader);
696     return (Reader*)fr;
697 }
698 
699 #endif /* Python 2.7 FileReader */
700 
701 
702 typedef struct _ObjectReader {
703     READER_FUNCTIONS;
704     PyObject* ob;
705 
706     // We got one object with all the bytes neccessary, and need to
707     // DECREF it later.
708     PyObject* retval;
709     void* bytes;
710 
711     // OR, we got several objects, we DECREFed them as we went, and
712     // need to Free() this buffer at the end.
713     void* dst;
714 
715     Py_ssize_t read_count;
716     int exception_is_external;
717 } ObjectReader;
718 
719 // read from a python file-like object which has a .read(n) method
ObjectReader_read(void * context,Py_ssize_t len)720 static void* ObjectReader_read(void* context, Py_ssize_t len) {
721     ObjectReader* thiz = (ObjectReader*)context;
722     Py_ssize_t rtotal = 0;
723     uintptr_t opos = 0;
724     //logprintf("ob read %d\n", len);
725     assert(!thiz->dst);
726     assert(!thiz->bytes);
727     while (rtotal < len) {
728 	PyObject* retval = PyObject_CallMethod(thiz->ob, "read", "n", len - rtotal, NULL);
729 	Py_ssize_t rlen;
730 	if (retval == NULL) {
731 	    thiz->exception_is_external = 1;
732             logprintf("exception in object.read()\n");
733 	    return NULL;
734 	}
735 	if (!PyBytes_Check(retval)) {
736             logprintf("object.read() is not bytes\n");
737 	    PyErr_SetString(PyExc_ValueError, "expected ob.read() to return a bytes object\n");
738             Py_DECREF(retval);
739 	    return NULL;
740 	}
741 	rlen = PyBytes_Size(retval);
742 	thiz->read_count += rlen;
743 	if (rlen > len - rtotal) {
744             logprintf("object.read() is too much!\n");
745             PyErr_Format(PyExc_ValueError, "ob.read() returned %ld bytes but only wanted %lu\n", rlen, len - rtotal);
746             Py_DECREF(retval);
747             return NULL;
748 	}
749 	if (rlen == len) {
750 	    // best case! All in one call to read()
751 	    // We _keep_ a reference to retval until later.
752 	    thiz->retval = retval;
753 	    thiz->bytes = PyBytes_AsString(retval);
754 	    assert(thiz->bytes);
755 	    thiz->dst = NULL;
756 	    opos = 0;
757 	    return thiz->bytes;
758 	}
759 	if (thiz->dst == NULL) {
760 	    thiz->dst = PyMem_Malloc(len);
761 	    opos = (uintptr_t)thiz->dst;
762 	}
763 	// else, not enough all in one go
764 	memcpy((void*)opos, PyBytes_AsString(retval), rlen);
765 	Py_DECREF(retval);
766 	opos += rlen;
767 	rtotal += rlen;
768     }
769     assert(thiz->dst);
770     return thiz->dst;
771 }
ObjectReader_read1(void * self,uint8_t * oneByte)772 static int ObjectReader_read1(void* self, uint8_t* oneByte) {
773     ObjectReader* thiz = (ObjectReader*)self;
774     PyObject* retval = PyObject_CallMethod(thiz->ob, "read", "i", 1, NULL);
775     Py_ssize_t rlen;
776     if (retval == NULL) {
777 	thiz->exception_is_external = 1;
778 	//logprintf("call ob read(1) failed\n");
779 	return -1;
780     }
781     if (!PyBytes_Check(retval)) {
782 	PyErr_SetString(PyExc_ValueError, "expected ob.read() to return a bytes object\n");
783 	return -1;
784     }
785     rlen = PyBytes_Size(retval);
786     thiz->read_count += rlen;
787     if (rlen > 1) {
788 	PyErr_Format(PyExc_ValueError, "TODO: raise exception: WAT ob.read() returned %ld bytes but only wanted 1\n", rlen);
789 	return -1;
790     }
791     if (rlen == 1) {
792 	*oneByte = PyBytes_AsString(retval)[0];
793 	Py_DECREF(retval);
794 	return 0;
795     }
796     PyErr_SetString(PyExc_ValueError, "got nothing reading 1");
797     return -1;
798 }
ObjectReader_return_buffer(void * context,void * buffer)799 static void ObjectReader_return_buffer(void* context, void* buffer) {
800     ObjectReader* thiz = (ObjectReader*)context;
801     if (buffer == thiz->bytes) {
802 	Py_DECREF(thiz->retval);
803 	thiz->retval = NULL;
804 	thiz->bytes = NULL;
805     } else if (buffer == thiz->dst) {
806 	PyMem_Free(thiz->dst);
807 	thiz->dst = NULL;
808     } else {
809 	logprintf("TODO: raise exception, could not release buffer %p, wanted dst=%p or bytes=%p\n", buffer, thiz->dst, thiz->bytes);
810     }
811 }
ObjectReader_delete(void * context)812 static void ObjectReader_delete(void* context) {
813     ObjectReader* thiz = (ObjectReader*)context;
814     if (thiz->retval != NULL) {
815 	Py_DECREF(thiz->retval);
816     }
817     if (thiz->dst != NULL) {
818 	PyMem_Free(thiz->dst);
819     }
820     PyMem_Free(thiz);
821 }
NewObjectReader(PyObject * ob)822 static Reader* NewObjectReader(PyObject* ob) {
823     ObjectReader* r = (ObjectReader*)PyMem_Malloc(sizeof(ObjectReader));
824     r->ob = ob;
825     r->retval = NULL;
826     r->bytes = NULL;
827     r->dst = NULL;
828     r->read_count = 0;
829     r->exception_is_external = 0;
830     SET_READER_FUNCTIONS(r, ObjectReader);
831     return (Reader*)r;
832 }
833 
834 typedef struct _BufferReader {
835     READER_FUNCTIONS;
836     uint8_t* raw;
837     Py_ssize_t len;
838     uintptr_t pos;
839 } BufferReader;
840 
841 // read from a buffer, aka loads()
BufferReader_read(void * context,Py_ssize_t len)842 static void* BufferReader_read(void* context, Py_ssize_t len) {
843     BufferReader* thiz = (BufferReader*)context;
844     //logprintf("br %p %d (%d)\n", thiz, len, thiz->len);
845     if (len <= thiz->len) {
846 	void* out = (void*)thiz->pos;
847 	thiz->pos += len;
848 	thiz->len -= len;
849 	assert(out);
850 	return out;
851     }
852     PyErr_Format(PyExc_ValueError, "buffer read for %zd but only have %zd\n", len, thiz->len);
853     return NULL;
854 }
BufferReader_read1(void * self,uint8_t * oneByte)855 static int BufferReader_read1(void* self, uint8_t* oneByte) {
856     BufferReader* thiz = (BufferReader*)self;
857     //logprintf("br %p _1_ (%d)\n", thiz, thiz->len);
858     if (thiz->len <= 0) {
859 	PyErr_SetString(PyExc_LookupError, "buffer exhausted");
860 	return -1;
861     }
862     *oneByte = *((uint8_t*)thiz->pos);
863     thiz->pos += 1;
864     thiz->len -= 1;
865     return 0;
866 }
BufferReader_return_buffer(void * context,void * buffer)867 static void BufferReader_return_buffer(void* context, void* buffer) {
868     // nothing to do
869 }
BufferReader_delete(void * context)870 static void BufferReader_delete(void* context) {
871     BufferReader* thiz = (BufferReader*)context;
872     PyMem_Free(thiz);
873 }
NewBufferReader(PyObject * ob)874 static Reader* NewBufferReader(PyObject* ob) {
875     BufferReader* r = (BufferReader*)PyMem_Malloc(sizeof(BufferReader));
876     SET_READER_FUNCTIONS(r, BufferReader);
877     if (PyByteArray_Check(ob)) {
878         r->raw = (uint8_t*)PyByteArray_AsString(ob);
879         r->len = PyByteArray_Size(ob);
880     } else if (PyBytes_Check(ob)) {
881         r->raw = (uint8_t*)PyBytes_AsString(ob);
882         r->len = PyBytes_Size(ob);
883     } else {
884         PyErr_SetString(PyExc_ValueError, "input of unknown type not bytes or bytearray");
885         return NULL;
886     }
887     r->pos = (uintptr_t)r->raw;
888     if (r->len == 0) {
889 	PyErr_SetString(PyExc_ValueError, "got zero length string in loads");
890 	return NULL;
891     }
892     if (r->raw == NULL) {
893 	PyErr_SetString(PyExc_ValueError, "got NULL buffer for string");
894 	return NULL;
895     }
896     //logprintf("NBR(%llu, %ld)\n", r->pos, r->len);
897     return (Reader*)r;
898 }
899 
900 
901 static PyObject*
cbor_load(PyObject * noself,PyObject * args)902 cbor_load(PyObject* noself, PyObject* args) {
903     PyObject* ob;
904     Reader* reader;
905     is_big_endian();
906     if (PyType_IsSubtype(Py_TYPE(args), &PyList_Type)) {
907 	ob = PyList_GetItem(args, 0);
908     } else if (PyType_IsSubtype(Py_TYPE(args), &PyTuple_Type)) {
909 	ob = PyTuple_GetItem(args, 0);
910     } else {
911 	PyErr_Format(PyExc_ValueError, "args not list or tuple: %R\n", args);
912 	return NULL;
913     }
914 
915     if (ob == Py_None) {
916 	PyErr_SetString(PyExc_ValueError, "got None for buffer to decode in loads");
917 	return NULL;
918     }
919     PyObject* retval;
920 #if HAS_FILE_READER
921     if (PyFile_Check(ob)) {
922 	reader = NewFileReader(ob);
923         if (reader == NULL) { return NULL; }
924 	retval = inner_loads(reader);
925         if ((retval == NULL) &&
926             (((FileReader*)reader)->read_count == 0) &&
927             (feof(((FileReader*)reader)->fin) != 0)) {
928 	    // never got anything, started at EOF
929 	    PyErr_Clear();
930 	    PyErr_SetString(PyExc_EOFError, "read nothing, apparent EOF");
931         }
932         reader->delete(reader);
933     } else
934 #endif
935     {
936 	reader = NewObjectReader(ob);
937 	retval = inner_loads(reader);
938 	if ((retval == NULL) &&
939 	    (!((ObjectReader*)reader)->exception_is_external) &&
940 	    ((ObjectReader*)reader)->read_count == 0) {
941 	    // never got anything, assume EOF
942 	    PyErr_Clear();
943 	    PyErr_SetString(PyExc_EOFError, "read nothing, apparent EOF");
944 	}
945         reader->delete(reader);
946     }
947     return retval;
948 }
949 
950 
tag_u64_out(uint8_t cbor_type,uint64_t aux,uint8_t * out,uintptr_t * posp)951 static void tag_u64_out(uint8_t cbor_type, uint64_t aux, uint8_t* out, uintptr_t* posp) {
952     uintptr_t pos = *posp;
953     if (out != NULL) {
954 	out[pos] = cbor_type | CBOR_UINT64_FOLLOWS;
955 	out[pos+1] = (aux >> 56) & 0x0ff;
956 	out[pos+2] = (aux >> 48) & 0x0ff;
957 	out[pos+3] = (aux >> 40) & 0x0ff;
958 	out[pos+4] = (aux >> 32) & 0x0ff;
959 	out[pos+5] = (aux >> 24) & 0x0ff;
960 	out[pos+6] = (aux >> 16) & 0x0ff;
961 	out[pos+7] = (aux >>  8) & 0x0ff;
962 	out[pos+8] = aux & 0x0ff;
963     }
964     pos += 9;
965     *posp = pos;
966 }
967 
968 
tag_aux_out(uint8_t cbor_type,uint64_t aux,uint8_t * out,uintptr_t * posp)969 static void tag_aux_out(uint8_t cbor_type, uint64_t aux, uint8_t* out, uintptr_t* posp) {
970     uintptr_t pos = *posp;
971     if (aux <= 23) {
972 	// tiny literal
973 	if (out != NULL) {
974 	    out[pos] = cbor_type | aux;
975 	}
976 	pos += 1;
977     } else if (aux <= 0x0ff) {
978 	// one byte value
979 	if (out != NULL) {
980 	    out[pos] = cbor_type | CBOR_UINT8_FOLLOWS;
981 	    out[pos+1] = aux;
982 	}
983 	pos += 2;
984     } else if (aux <= 0x0ffff) {
985 	// two byte value
986 	if (out != NULL) {
987 	    out[pos] = cbor_type | CBOR_UINT16_FOLLOWS;
988 	    out[pos+1] = (aux >> 8) & 0x0ff;
989 	    out[pos+2] = aux & 0x0ff;
990 	}
991 	pos += 3;
992     } else if (aux <= 0x0ffffffffL) {
993 	// four byte value
994 	if (out != NULL) {
995 	    out[pos] = cbor_type | CBOR_UINT32_FOLLOWS;
996 	    out[pos+1] = (aux >> 24) & 0x0ff;
997 	    out[pos+2] = (aux >> 16) & 0x0ff;
998 	    out[pos+3] = (aux >>  8) & 0x0ff;
999 	    out[pos+4] = aux & 0x0ff;
1000 	}
1001 	pos += 5;
1002     } else {
1003 	// eight byte value
1004 	tag_u64_out(cbor_type, aux, out, posp);
1005 	return;
1006     }
1007     *posp = pos;
1008     return;
1009 }
1010 
1011 static int inner_dumps(EncodeOptions *optp, PyObject* ob, uint8_t* out, uintptr_t* posp);
1012 
dumps_dict(EncodeOptions * optp,PyObject * ob,uint8_t * out,uintptr_t * posp)1013 static int dumps_dict(EncodeOptions *optp, PyObject* ob, uint8_t* out, uintptr_t* posp) {
1014     uintptr_t pos = *posp;
1015     Py_ssize_t dictlen = PyDict_Size(ob);
1016     PyObject* key;
1017     PyObject* val;
1018     int err;
1019 
1020     tag_aux_out(CBOR_MAP, dictlen, out, &pos);
1021 
1022     if (optp->sort_keys) {
1023         Py_ssize_t index = 0;
1024         PyObject* keylist = PyDict_Keys(ob);
1025         PyList_Sort(keylist);
1026 
1027         //fprintf(stderr, "sortking keys\n");
1028         for (index = 0; index < PyList_Size(keylist); index++) {
1029             key = PyList_GetItem(keylist, index); // Borrowed ref
1030             val = PyDict_GetItem(ob, key); // Borrowed ref
1031             err = inner_dumps(optp, key, out, &pos);
1032             if (err != 0) { return err; }
1033             err = inner_dumps(optp, val, out, &pos);
1034             if (err != 0) { return err; }
1035         }
1036         Py_DECREF(keylist);
1037     } else {
1038         Py_ssize_t dictiter = 0;
1039         //fprintf(stderr, "unsorted keys\n");
1040         while (PyDict_Next(ob, &dictiter, &key, &val)) {
1041             err = inner_dumps(optp, key, out, &pos);
1042             if (err != 0) { return err; }
1043             err = inner_dumps(optp, val, out, &pos);
1044             if (err != 0) { return err; }
1045         }
1046     }
1047 
1048     *posp = pos;
1049     return 0;
1050 }
1051 
1052 
dumps_bignum(EncodeOptions * optp,uint8_t tag,PyObject * val,uint8_t * out,uintptr_t * posp)1053 static void dumps_bignum(EncodeOptions *optp, uint8_t tag, PyObject* val, uint8_t* out, uintptr_t* posp) {
1054     uintptr_t pos = (posp != NULL) ? *posp : 0;
1055     PyObject* eight = PyLong_FromLong(8);
1056     PyObject* bytemask = NULL;
1057     PyObject* nval = NULL;
1058     uint8_t* revbytes = NULL;
1059     int revbytepos = 0;
1060     int val_is_orig = 1;
1061     if (out != NULL) {
1062 	bytemask = PyLong_FromLongLong(0x0ff);
1063 	revbytes = PyMem_Malloc(23);
1064     }
1065     while (PyObject_IsTrue(val) && (revbytepos < 23)) {
1066 	if (revbytes != NULL) {
1067 	    PyObject* tbyte = PyNumber_And(val, bytemask);
1068 	    revbytes[revbytepos] = PyLong_AsLong(tbyte);
1069 	    Py_DECREF(tbyte);
1070 	}
1071 	revbytepos++;
1072 	nval = PyNumber_InPlaceRshift(val, eight);
1073         if (val_is_orig) {
1074             val_is_orig = 0;
1075         } else {
1076             Py_DECREF(val);
1077         }
1078         val = nval;
1079     }
1080     if (revbytes != NULL) {
1081 	out[pos] = CBOR_TAG | tag;
1082 	pos++;
1083 	out[pos] = CBOR_BYTES | revbytepos;
1084 	pos++;
1085 	revbytepos--;
1086 	while (revbytepos >= 0) {
1087 	    out[pos] = revbytes[revbytepos];
1088 	    pos++;
1089 	    revbytepos--;
1090 	}
1091         PyMem_Free(revbytes);
1092 	Py_DECREF(bytemask);
1093     } else {
1094 	pos += 2 + revbytepos;
1095     }
1096     if (!val_is_orig) {
1097         Py_DECREF(val);
1098     }
1099     Py_DECREF(eight);
1100     *posp = pos;
1101 }
1102 
dumps_tag(EncodeOptions * optp,PyObject * ob,uint8_t * out,uintptr_t * posp)1103 static int dumps_tag(EncodeOptions *optp, PyObject* ob, uint8_t* out, uintptr_t* posp) {
1104     uintptr_t pos = (posp != NULL) ? *posp : 0;
1105     int err = 0;
1106 
1107 
1108     PyObject* tag_num;
1109     PyObject* tag_value;
1110     err = 0;
1111 
1112     tag_num = PyObject_GetAttrString(ob, "tag");
1113     if (tag_num != NULL) {
1114         tag_value = PyObject_GetAttrString(ob, "value");
1115         if (tag_value != NULL) {
1116 #ifdef Py_INTOBJECT_H
1117             if (PyInt_Check(tag_num)) {
1118                 long val = PyInt_AsLong(tag_num);
1119                 if (val >= 0) {
1120                     tag_aux_out(CBOR_TAG, val, out, &pos);
1121                     err = inner_dumps(optp, tag_value, out, &pos);
1122                 } else {
1123                     PyErr_Format(PyExc_ValueError, "tag cannot be a negative int: %ld", val);
1124                     err = -1;
1125                 }
1126             } else
1127 #endif
1128             if (PyLong_Check(tag_num)) {
1129                 int overflow = -1;
1130                 long long val = PyLong_AsLongLongAndOverflow(tag_num, &overflow);
1131                 if (overflow == 0) {
1132                     if (val >= 0) {
1133                         tag_aux_out(CBOR_TAG, val, out, &pos);
1134                         err = inner_dumps(optp, tag_value, out, &pos);
1135                     } else {
1136                         PyErr_Format(PyExc_ValueError, "tag cannot be a negative long: %lld", val);
1137                         err = -1;
1138                     }
1139                 } else {
1140                     PyErr_SetString(PyExc_ValueError, "tag number too large");
1141                     err = -1;
1142                 }
1143             }
1144             Py_DECREF(tag_value);
1145         } else {
1146             PyErr_SetString(PyExc_ValueError, "broken Tag object has .tag but not .value");
1147             err = -1;
1148         }
1149         Py_DECREF(tag_num);
1150     } else {
1151         PyErr_SetString(PyExc_ValueError, "broken Tag object with no .tag");
1152         err = -1;
1153     }
1154     if (err != 0) { return err; }
1155 
1156     *posp = pos;
1157     return err;
1158 }
1159 
1160 
1161 // With out=NULL it just counts the length.
1162 // return err, 0=OK
inner_dumps(EncodeOptions * optp,PyObject * ob,uint8_t * out,uintptr_t * posp)1163 static int inner_dumps(EncodeOptions *optp, PyObject* ob, uint8_t* out, uintptr_t* posp) {
1164     uintptr_t pos = (posp != NULL) ? *posp : 0;
1165 
1166     if (ob == Py_None) {
1167 	if (out != NULL) {
1168 	    out[pos] = CBOR_NULL;
1169 	}
1170 	pos += 1;
1171     } else if (PyBool_Check(ob)) {
1172 	if (out != NULL) {
1173 	    if (PyObject_IsTrue(ob)) {
1174 		out[pos] = CBOR_TRUE;
1175 	    } else {
1176 		out[pos] = CBOR_FALSE;
1177 	    }
1178 	}
1179 	pos += 1;
1180     } else if (PyDict_Check(ob)) {
1181 	int err = dumps_dict(optp, ob, out, &pos);
1182 	if (err != 0) { return err; }
1183     } else if (PyList_Check(ob)) {
1184         Py_ssize_t i;
1185 	Py_ssize_t listlen = PyList_Size(ob);
1186 	tag_aux_out(CBOR_ARRAY, listlen, out, &pos);
1187 	for (i = 0; i < listlen; i++) {
1188 	    int err = inner_dumps(optp, PyList_GetItem(ob, i), out, &pos);
1189 	    if (err != 0) { return err; }
1190 	}
1191     } else if (PyTuple_Check(ob)) {
1192         Py_ssize_t i;
1193 	Py_ssize_t listlen = PyTuple_Size(ob);
1194 	tag_aux_out(CBOR_ARRAY, listlen, out, &pos);
1195 	for (i = 0; i < listlen; i++) {
1196 	    int err = inner_dumps(optp, PyTuple_GetItem(ob, i), out, &pos);
1197 	    if (err != 0) { return err; }
1198 	}
1199 	// TODO: accept other enumerables and emit a variable length array
1200 #ifdef Py_INTOBJECT_H
1201 	// PyInt exists in Python 2 but not 3
1202     } else if (PyInt_Check(ob)) {
1203 	long val = PyInt_AsLong(ob);
1204 	if (val >= 0) {
1205 	    tag_aux_out(CBOR_UINT, val, out, &pos);
1206 	} else {
1207 	    tag_aux_out(CBOR_NEGINT, -1 - val, out, &pos);
1208 	}
1209 #endif
1210     } else if (PyLong_Check(ob)) {
1211 	int overflow = 0;
1212 	long long val = PyLong_AsLongLongAndOverflow(ob, &overflow);
1213 	if (overflow == 0) {
1214 	    if (val >= 0) {
1215 		tag_aux_out(CBOR_UINT, val, out, &pos);
1216 	    } else {
1217 		tag_aux_out(CBOR_NEGINT, -1L - val, out, &pos);
1218 	    }
1219 	} else {
1220 	    if (overflow < 0) {
1221 		// BIG NEGINT
1222 		PyObject* minusone = PyLong_FromLongLong(-1L);
1223 		PyObject* val = PyNumber_Subtract(minusone, ob);
1224 		Py_DECREF(minusone);
1225 		dumps_bignum(optp, CBOR_TAG_NEGBIGNUM, val, out, &pos);
1226 		Py_DECREF(val);
1227 	    } else {
1228 		// BIG INT
1229 		dumps_bignum(optp, CBOR_TAG_BIGNUM, ob, out, &pos);
1230 	    }
1231 	}
1232     } else if (PyFloat_Check(ob)) {
1233 	double val = PyFloat_AsDouble(ob);
1234 	tag_u64_out(CBOR_7, *((uint64_t*)(&val)), out, &pos);
1235     } else if (PyBytes_Check(ob)) {
1236 	Py_ssize_t len = PyBytes_Size(ob);
1237 	tag_aux_out(CBOR_BYTES, len, out, &pos);
1238 	if (out != NULL) {
1239 	    memcpy(out + pos, PyBytes_AsString(ob), len);
1240 	}
1241 	pos += len;
1242     } else if (PyUnicode_Check(ob)) {
1243 	PyObject* utf8 = PyUnicode_AsUTF8String(ob);
1244 	Py_ssize_t len = PyBytes_Size(utf8);
1245 	tag_aux_out(CBOR_TEXT, len, out, &pos);
1246 	if (out != NULL) {
1247 	    memcpy(out + pos, PyBytes_AsString(utf8), len);
1248 	}
1249 	pos += len;
1250 	Py_DECREF(utf8);
1251     } else {
1252         int handled = 0;
1253         {
1254             PyObject* tag_class = getCborTagClass();
1255             if (PyObject_IsInstance(ob, tag_class)) {
1256                 int err = dumps_tag(optp, ob, out, &pos);
1257                 if (err != 0) { return err; }
1258                 handled = 1;
1259             }
1260             // tag_class was just a borrowed reference
1261         }
1262 
1263         // TODO: other special object serializations here
1264 
1265         if (!handled) {
1266 #if IS_PY3
1267             PyErr_Format(PyExc_ValueError, "cannot serialize unknown object: %R", ob);
1268 #else
1269             PyObject* badtype = PyObject_Type(ob);
1270             PyObject* badtypename = PyObject_Str(badtype);
1271             PyErr_Format(PyExc_ValueError, "cannot serialize unknown object of type %s", PyString_AsString(badtypename));
1272             Py_DECREF(badtypename);
1273             Py_DECREF(badtype);
1274 #endif
1275             return -1;
1276         }
1277     }
1278     if (posp != NULL) {
1279 	*posp = pos;
1280     }
1281     return 0;
1282 }
1283 
_dumps_kwargs(EncodeOptions * optp,PyObject * kwargs)1284 static int _dumps_kwargs(EncodeOptions *optp, PyObject* kwargs) {
1285     if (kwargs == NULL) {
1286     } else if (!PyDict_Check(kwargs)) {
1287 	PyErr_Format(PyExc_ValueError, "kwargs not dict: %R\n", kwargs);
1288 	return 0;
1289     } else {
1290 	PyObject* sort_keys = PyDict_GetItemString(kwargs, "sort_keys");  // Borrowed ref
1291 	if (sort_keys != NULL) {
1292             optp->sort_keys = PyObject_IsTrue(sort_keys);
1293             //fprintf(stderr, "sort_keys=%d\n", optp->sort_keys);
1294 	}
1295     }
1296     return 1;
1297 }
1298 
1299 static PyObject*
cbor_dumps(PyObject * noself,PyObject * args,PyObject * kwargs)1300 cbor_dumps(PyObject* noself, PyObject* args, PyObject* kwargs) {
1301 
1302     PyObject* ob;
1303     EncodeOptions opts = {0};
1304     EncodeOptions *optp = &opts;
1305     is_big_endian();
1306     if (PyType_IsSubtype(Py_TYPE(args), &PyList_Type)) {
1307 	ob = PyList_GetItem(args, 0);
1308     } else if (PyType_IsSubtype(Py_TYPE(args), &PyTuple_Type)) {
1309 	ob = PyTuple_GetItem(args, 0);
1310     } else {
1311 	PyErr_Format(PyExc_ValueError, "args not list or tuple: %R\n", args);
1312 	return NULL;
1313     }
1314     if (ob == NULL) {
1315         return NULL;
1316     }
1317 
1318     if (!_dumps_kwargs(optp, kwargs)) {
1319         return NULL;
1320     }
1321 
1322     {
1323 	Py_ssize_t outlen = 0;
1324 	uintptr_t pos = 0;
1325 	void* out = NULL;
1326 	PyObject* obout = NULL;
1327 	int err;
1328 
1329 	// first pass just to count length
1330 	err = inner_dumps(optp, ob, NULL, &pos);
1331 	if (err != 0) {
1332 	    return NULL;
1333 	}
1334 
1335 	outlen = pos;
1336 
1337 	out = PyMem_Malloc(outlen);
1338 	if (out == NULL) {
1339 	    PyErr_NoMemory();
1340 	    return NULL;
1341 	}
1342 
1343 	err = inner_dumps(optp, ob, out, NULL);
1344 	if (err != 0) {
1345 	    PyMem_Free(out);
1346 	    return NULL;
1347 	}
1348 
1349 	// TODO: I wish there was a way to do this without this copy.
1350 	obout = PyBytes_FromStringAndSize(out, outlen);
1351 	PyMem_Free(out);
1352 	return obout;
1353     }
1354 }
1355 
1356 static PyObject*
cbor_dump(PyObject * noself,PyObject * args,PyObject * kwargs)1357 cbor_dump(PyObject* noself, PyObject* args, PyObject *kwargs) {
1358     // args should be (obj, fp)
1359     PyObject* ob;
1360     PyObject* fp;
1361     EncodeOptions opts = {0};
1362     EncodeOptions *optp = &opts;
1363 
1364     is_big_endian();
1365     if (PyType_IsSubtype(Py_TYPE(args), &PyList_Type)) {
1366 	ob = PyList_GetItem(args, 0);
1367 	fp = PyList_GetItem(args, 1);
1368     } else if (PyType_IsSubtype(Py_TYPE(args), &PyTuple_Type)) {
1369 	ob = PyTuple_GetItem(args, 0);
1370 	fp = PyTuple_GetItem(args, 1);
1371     } else {
1372 	PyErr_Format(PyExc_ValueError, "args not list or tuple: %R\n", args);
1373 	return NULL;
1374     }
1375     if ((ob == NULL) || (fp == NULL)) {
1376         return NULL;
1377     }
1378 
1379     if (!_dumps_kwargs(optp, kwargs)) {
1380         return NULL;
1381     }
1382 
1383     {
1384 	// TODO: make this smarter, right now it is justt fp.write(dumps(ob))
1385 	Py_ssize_t outlen = 0;
1386 	uintptr_t pos = 0;
1387 	void* out = NULL;
1388 	int err;
1389 
1390 	// first pass just to count length
1391 	err = inner_dumps(optp, ob, NULL, &pos);
1392 	if (err != 0) {
1393 	    return NULL;
1394 	}
1395 
1396 	outlen = pos;
1397 
1398 	out = PyMem_Malloc(outlen);
1399 	if (out == NULL) {
1400 	    PyErr_NoMemory();
1401 	    return NULL;
1402 	}
1403 
1404 	err = inner_dumps(optp, ob, out, NULL);
1405 	if (err != 0) {
1406 	    PyMem_Free(out);
1407 	    return NULL;
1408 	}
1409 
1410 #if HAS_FILE_READER
1411 	if (PyFile_Check(fp)) {
1412 	    FILE* fout = PyFile_AsFile(fp);
1413 	    fwrite(out, 1, outlen, fout);
1414 	} else
1415 #endif
1416 	{
1417 	    PyObject* ret;
1418             PyObject* obout = NULL;
1419 #if IS_PY3
1420 	    PyObject* writeStr = PyUnicode_FromString("write");
1421 #else
1422 	    PyObject* writeStr = PyString_FromString("write");
1423 #endif
1424 	    obout = PyBytes_FromStringAndSize(out, outlen);
1425 	    //logprintf("write %zd bytes to %p.write() as %p\n", outlen, fp, obout);
1426 	    ret = PyObject_CallMethodObjArgs(fp, writeStr, obout, NULL);
1427 	    Py_DECREF(writeStr);
1428 	    Py_DECREF(obout);
1429 	    if (ret != NULL) {
1430 		Py_DECREF(ret);
1431 	    } else {
1432 		// exception in fp.write()
1433 		PyMem_Free(out);
1434 		return NULL;
1435 	    }
1436 	    //logprintf("wrote %zd bytes to %p.write() as %p\n", outlen, fp, obout);
1437 	}
1438 	PyMem_Free(out);
1439     }
1440 
1441     Py_RETURN_NONE;
1442 }
1443 
1444 
1445 static PyMethodDef CborMethods[] = {
1446     {"loads",  cbor_loads, METH_VARARGS,
1447         "parse cbor from data buffer to objects"},
1448     {"dumps", (PyCFunction)cbor_dumps, METH_VARARGS|METH_KEYWORDS,
1449         "serialize python object to bytes"},
1450     {"load",  cbor_load, METH_VARARGS,
1451      "Parse cbor from data buffer to objects.\n"
1452      "Takes a file-like object capable of .read(N)\n"},
1453     {"dump", (PyCFunction)cbor_dump, METH_VARARGS|METH_KEYWORDS,
1454      "Serialize python object to bytes.\n"
1455      "dump(obj, fp)\n"
1456      "obj: object to output; fp: file-like object to .write() to\n"},
1457     {NULL, NULL, 0, NULL}        /* Sentinel */
1458 };
1459 
1460 #ifdef Py_InitModule
1461 // Python 2.7
1462 PyMODINIT_FUNC
init_cbor(void)1463 init_cbor(void)
1464 {
1465     (void) Py_InitModule("cbor._cbor", CborMethods);
1466 }
1467 #else
1468 // Python 3
1469 PyMODINIT_FUNC
PyInit__cbor(void)1470 PyInit__cbor(void)
1471 {
1472     static PyModuleDef modef = {
1473 	PyModuleDef_HEAD_INIT,
1474     };
1475     //modef.m_base = PyModuleDef_HEAD_INIT;
1476     modef.m_name = "cbor._cbor";
1477     modef.m_doc = NULL;
1478     modef.m_size = 0;
1479     modef.m_methods = CborMethods;
1480 #ifdef Py_mod_exec
1481     modef.m_slots = NULL; // Py >= 3.5
1482 #else
1483     modef.m_reload = NULL; // Py < 3.5
1484 #endif
1485     modef.m_traverse = NULL;
1486     modef.m_clear = NULL;
1487     modef.m_free = NULL;
1488     return PyModule_Create(&modef);
1489 }
1490 #endif
1491 
1492