1 #include "Python.h"
2
3 #include "cbor.h"
4
5 #include <math.h>
6 #include <stdint.h>
7
8 //#include <stdio.h>
9 #include <arpa/inet.h>
10
11
12 #ifndef DEBUG_LOGGING
13 // causes things to be written to stderr
14 #define DEBUG_LOGGING 0
15 //#define DEBUG_LOGGING 1
16 #endif
17
18
19 #ifdef Py_InitModule
20 // Python 2.7
21
22 #define HAS_FILE_READER 1
23 #define IS_PY3 0
24
25 #else
26
27 #define HAS_FILE_READER 0
28 #define IS_PY3 1
29
30 #endif
31
32 typedef struct {
33 unsigned int sort_keys;
34 } EncodeOptions;
35
36 // Hey Look! It's a polymorphic object structure in C!
37
38 // read(, len): read len bytes and return in buffer, or NULL on error
39 // read1(, uint8_t*): read one byte and return 0 on success
40 // return_buffer(, *): release result of read(, len)
41 // delete(): destructor. free thiz and contents.
42 #define READER_FUNCTIONS \
43 void* (*read)(void* self, Py_ssize_t len); \
44 int (*read1)(void* self, uint8_t* oneByte); \
45 void (*return_buffer)(void* self, void* buffer); \
46 void (*delete)(void* self);
47
48 #define SET_READER_FUNCTIONS(thiz, clazz) (thiz)->read = clazz##_read;\
49 (thiz)->read1 = clazz##_read1;\
50 (thiz)->return_buffer = clazz##_return_buffer;\
51 (thiz)->delete = clazz##_delete;
52
53 typedef struct _Reader {
54 READER_FUNCTIONS;
55 } Reader;
56
57 static Reader* NewBufferReader(PyObject* ob);
58 static Reader* NewObjectReader(PyObject* ob);
59 #if HAS_FILE_READER
60 static Reader* NewFileReader(PyObject* ob);
61 #endif
62
63
64 static PyObject* loads_tag(Reader* rin, uint64_t aux);
65 static int loads_kv(PyObject* out, Reader* rin);
66
67 typedef struct VarBufferPart {
68 void* start;
69 uint64_t len;
70 struct VarBufferPart* next;
71 } VarBufferPart;
72
73
logprintf(const char * fmt,...)74 static int logprintf(const char* fmt, ...) {
75 va_list ap;
76 int ret;
77 va_start(ap, fmt);
78 #if DEBUG_LOGGING
79 ret = vfprintf(stderr, fmt, ap);
80 #else
81 ret = 0;
82 #endif
83 va_end(ap);
84 return ret;
85 }
86
87 // TODO: portably work this out at compile time
88 static int _is_big_endian = 0;
89
is_big_endian(void)90 static int is_big_endian(void) {
91 uint32_t val = 1234;
92 _is_big_endian = val == htonl(val);
93 //logprintf("is_big_endian=%d\n", _is_big_endian);
94 return _is_big_endian;
95 }
96
97
decodeFloat16(Reader * rin)98 PyObject* decodeFloat16(Reader* rin) {
99 // float16 parsing adapted from example code in spec
100 uint8_t hibyte, lobyte;// = raw[pos];
101 int err;
102 int exp;
103 int mant;
104 double val;
105
106 err = rin->read1(rin, &hibyte);
107 if (err) { logprintf("fail in float16[0]\n"); return NULL; }
108 err = rin->read1(rin, &lobyte);
109 if (err) { logprintf("fail in float16[1]\n"); return NULL; }
110
111 exp = (hibyte >> 2) & 0x1f;
112 mant = ((hibyte & 0x3) << 8) | lobyte;
113 if (exp == 0) {
114 val = ldexp(mant, -24);
115 } else if (exp != 31) {
116 val = ldexp(mant + 1024, exp - 25);
117 } else {
118 val = mant == 0 ? INFINITY : NAN;
119 }
120 if (hibyte & 0x80) {
121 val = -val;
122 }
123 return PyFloat_FromDouble(val);
124 }
decodeFloat32(Reader * rin)125 PyObject* decodeFloat32(Reader* rin) {
126 float val;
127 uint8_t* raw = rin->read(rin, 4);
128 if (!raw) { logprintf("fail in float32\n"); return NULL; }
129 if (_is_big_endian) {
130 // easy!
131 val = *((float*)raw);
132 } else {
133 uint8_t* dest = (uint8_t*)(&val);
134 dest[3] = raw[0];
135 dest[2] = raw[1];
136 dest[1] = raw[2];
137 dest[0] = raw[3];
138 }
139 rin->return_buffer(rin, raw);
140 return PyFloat_FromDouble(val);
141 }
decodeFloat64(Reader * rin)142 PyObject* decodeFloat64(Reader* rin) {
143 int si;
144 uint64_t aux = 0;
145 uint8_t* raw = rin->read(rin, 8);
146 if (!raw) { logprintf("fail in float64\n"); return NULL; }
147 for (si = 0; si < 8; si++) {
148 aux = aux << 8;
149 aux |= raw[si];
150 }
151 rin->return_buffer(rin, raw);
152 return PyFloat_FromDouble(*((double*)(&aux)));
153 }
154
155 // parse following int value into *auxP
156 // return 0 on success, -1 on fail
handle_info_bits(Reader * rin,uint8_t cbor_info,uint64_t * auxP)157 static int handle_info_bits(Reader* rin, uint8_t cbor_info, uint64_t* auxP) {
158 uint64_t aux;
159
160 if (cbor_info <= 23) {
161 // literal value <=23
162 aux = cbor_info;
163 } else if (cbor_info == CBOR_UINT8_FOLLOWS) {
164 uint8_t taux;
165 if (rin->read1(rin, &taux)) { logprintf("fail in uint8\n"); return -1; }
166 aux = taux;
167 } else if (cbor_info == CBOR_UINT16_FOLLOWS) {
168 uint8_t hibyte, lobyte;
169 if (rin->read1(rin, &hibyte)) { logprintf("fail in uint16[0]\n"); return -1; }
170 if (rin->read1(rin, &lobyte)) { logprintf("fail in uint16[1]\n"); return -1; }
171 aux = (hibyte << 8) | lobyte;
172 } else if (cbor_info == CBOR_UINT32_FOLLOWS) {
173 uint8_t* raw = (uint8_t*)rin->read(rin, 4);
174 if (!raw) { logprintf("fail in uint32[1]\n"); return -1; }
175 aux =
176 (((uint64_t)raw[0]) << 24) |
177 (((uint64_t)raw[1]) << 16) |
178 (((uint64_t)raw[2]) << 8) |
179 ((uint64_t)raw[3]);
180 rin->return_buffer(rin, raw);
181 } else if (cbor_info == CBOR_UINT64_FOLLOWS) {
182 int si;
183 uint8_t* raw = (uint8_t*)rin->read(rin, 8);
184 if (!raw) { logprintf("fail in uint64[1]\n"); return -1; }
185 aux = 0;
186 for (si = 0; si < 8; si++) {
187 aux = aux << 8;
188 aux |= raw[si];
189 }
190 rin->return_buffer(rin, raw);
191 } else {
192 aux = 0;
193 }
194 *auxP = aux;
195 return 0;
196 }
197
198 static PyObject* inner_loads_c(Reader* rin, uint8_t c);
199
inner_loads(Reader * rin)200 static PyObject* inner_loads(Reader* rin) {
201 uint8_t c;
202 int err;
203
204 err = rin->read1(rin, &c);
205 if (err) { logprintf("fail in loads tag\n"); return NULL; }
206 return inner_loads_c(rin, c);
207 }
208
inner_loads_c(Reader * rin,uint8_t c)209 PyObject* inner_loads_c(Reader* rin, uint8_t c) {
210 uint8_t cbor_type;
211 uint8_t cbor_info;
212 uint64_t aux;
213
214 cbor_type = c & CBOR_TYPE_MASK;
215 cbor_info = c & CBOR_INFO_BITS;
216
217 #if 0
218 if (pos > len) {
219 PyErr_SetString(PyExc_ValueError, "misparse, token went longer than buffer");
220 return NULL;
221 }
222
223 pos += 1;
224 #endif
225
226 if (cbor_type == CBOR_7) {
227 if (cbor_info == CBOR_UINT16_FOLLOWS) { // float16
228 return decodeFloat16(rin);
229 } else if (cbor_info == CBOR_UINT32_FOLLOWS) { // float32
230 return decodeFloat32(rin);
231 } else if (cbor_info == CBOR_UINT64_FOLLOWS) { // float64
232 return decodeFloat64(rin);
233 }
234 // not a float, fall through to other CBOR_7 interpretations
235 }
236 if (handle_info_bits(rin, cbor_info, &aux)) { logprintf("info bits failed\n"); return NULL; }
237
238 PyObject* out = NULL;
239 switch (cbor_type) {
240 case CBOR_UINT:
241 out = PyLong_FromUnsignedLongLong(aux);
242 if (out == NULL) {
243 PyErr_SetString(PyExc_RuntimeError, "unknown error decoding UINT");
244 }
245 return out;
246 case CBOR_NEGINT:
247 if (aux > 0x7fffffffffffffff) {
248 PyObject* bignum = PyLong_FromUnsignedLongLong(aux);
249 PyObject* minusOne = PyLong_FromLong(-1);
250 out = PyNumber_Subtract(minusOne, bignum);
251 Py_DECREF(minusOne);
252 Py_DECREF(bignum);
253 } else {
254 out = PyLong_FromLongLong((long long)(((long long)-1) - aux));
255 }
256 if (out == NULL) {
257 PyErr_SetString(PyExc_RuntimeError, "unknown error decoding NEGINT");
258 }
259 return out;
260 case CBOR_BYTES:
261 if (cbor_info == CBOR_VAR_FOLLOWS) {
262 size_t total = 0;
263 VarBufferPart* parts = NULL;
264 VarBufferPart* parts_tail = NULL;
265 uint8_t sc;
266 if (rin->read1(rin, &sc)) { logprintf("r1 fail in var bytes tag\n"); return NULL; }
267 while (sc != CBOR_BREAK) {
268 uint8_t scbor_type = sc & CBOR_TYPE_MASK;
269 uint8_t scbor_info = sc & CBOR_INFO_BITS;
270 uint64_t saux;
271 void* blob;
272
273 if (scbor_type != CBOR_BYTES) {
274 PyErr_Format(PyExc_ValueError, "expected subordinate BYTES block under VAR BYTES, but got %x", scbor_type);
275 return NULL;
276 }
277 if(handle_info_bits(rin, scbor_info, &saux)) { logprintf("var bytes sub infobits failed\n"); return NULL; }
278 blob = rin->read(rin, saux);
279 if (!blob) { logprintf("var bytes sub bytes read failed\n"); return NULL; }
280 if (parts_tail == NULL) {
281 parts = parts_tail = (VarBufferPart*)PyMem_Malloc(sizeof(VarBufferPart) + saux);
282 } else {
283 parts_tail->next = (VarBufferPart*)PyMem_Malloc(sizeof(VarBufferPart) + saux);
284 parts_tail = parts_tail->next;
285 }
286 parts_tail->start = (void*)(parts_tail + 1);
287 memcpy(parts_tail->start, blob, saux);
288 rin->return_buffer(rin, blob);
289 parts_tail->len = saux;
290 parts_tail->next = NULL;
291 total += saux;
292 if (rin->read1(rin, &sc)) { logprintf("r1 fail in var bytes tag\n"); return NULL; }
293 }
294 // Done
295 {
296 uint8_t* allbytes = (uint8_t*)PyMem_Malloc(total);
297 uintptr_t op = 0;
298 while (parts != NULL) {
299 VarBufferPart* next;
300 memcpy(allbytes + op, parts->start, parts->len);
301 op += parts->len;
302 next = parts->next;
303 PyMem_Free(parts);
304 parts = next;
305 }
306 out = PyBytes_FromStringAndSize((char*)allbytes, total);
307 PyMem_Free(allbytes);
308 }
309 if (out == NULL) {
310 PyErr_SetString(PyExc_RuntimeError, "unknown error decoding VAR BYTES");
311 }
312 } else {
313 void* raw;
314 if (aux == 0) {
315 static void* empty_string = "";
316 raw = empty_string;
317 } else {
318 raw = rin->read(rin, aux);
319 if (!raw) { logprintf("bytes read failed\n"); return NULL; }
320 }
321 out = PyBytes_FromStringAndSize(raw, (Py_ssize_t)aux);
322 if (out == NULL) {
323 PyErr_SetString(PyExc_RuntimeError, "unknown error decoding BYTES");
324 }
325 if (aux != 0) {
326 rin->return_buffer(rin, raw);
327 }
328 }
329 return out;
330 case CBOR_TEXT:
331 if (cbor_info == CBOR_VAR_FOLLOWS) {
332 PyObject* parts = PyList_New(0);
333 PyObject* joiner = PyUnicode_FromString("");
334 uint8_t sc;
335 if (rin->read1(rin, &sc)) { logprintf("r1 fail in var text tag\n"); return NULL; }
336 while (sc != CBOR_BREAK) {
337 PyObject* subitem = inner_loads_c(rin, sc);
338 if (subitem == NULL) { logprintf("fail in var text subitem\n"); return NULL; }
339 PyList_Append(parts, subitem);
340 Py_DECREF(subitem);
341 if (rin->read1(rin, &sc)) { logprintf("r1 fail in var text tag\n"); return NULL; }
342 }
343 // Done
344 out = PyUnicode_Join(joiner, parts);
345 Py_DECREF(joiner);
346 Py_DECREF(parts);
347 if (out == NULL) {
348 PyErr_SetString(PyExc_RuntimeError, "unknown error decoding VAR TEXT");
349 }
350 } else {
351 void* raw;
352 if (aux == 0) {
353 static void* empty_string = "";
354 raw = empty_string;
355 } else {
356 raw = rin->read(rin, aux);
357 if (!raw) { logprintf("read text failed\n"); return NULL; }
358 }
359 out = PyUnicode_FromStringAndSize((char*)raw, (Py_ssize_t)aux);
360 if (out == NULL) {
361 PyErr_SetString(PyExc_RuntimeError, "unknown error decoding TEXT");
362 }
363 if (aux != 0) {
364 rin->return_buffer(rin, raw);
365 }
366 }
367 return out;
368 case CBOR_ARRAY:
369 if (cbor_info == CBOR_VAR_FOLLOWS) {
370 uint8_t sc;
371 out = PyList_New(0);
372 if (rin->read1(rin, &sc)) { logprintf("r1 fail in var array tag\n"); return NULL; }
373 while (sc != CBOR_BREAK) {
374 PyObject* subitem = inner_loads_c(rin, sc);
375 if (subitem == NULL) { logprintf("fail in var array subitem\n"); return NULL; }
376 PyList_Append(out, subitem);
377 Py_DECREF(subitem);
378 if (rin->read1(rin, &sc)) { logprintf("r1 fail in var array tag\n"); return NULL; }
379 }
380 // Done
381 if (out == NULL) {
382 PyErr_SetString(PyExc_RuntimeError, "unknown error decoding VAR ARRAY");
383 }
384 } else {
385 unsigned int i;
386 out = PyList_New((Py_ssize_t)aux);
387 for (i = 0; i < aux; i++) {
388 PyObject* subitem = inner_loads(rin);
389 if (subitem == NULL) { logprintf("array subitem[%d] (of %d) failed\n", i, aux); return NULL; }
390 PyList_SetItem(out, (Py_ssize_t)i, subitem);
391 // PyList_SetItem became the owner of the reference count of subitem, we don't need to DECREF it
392 }
393 if (out == NULL) {
394 PyErr_SetString(PyExc_RuntimeError, "unknown error decoding ARRAY");
395 }
396 }
397 return out;
398 case CBOR_MAP:
399 out = PyDict_New();
400 if (cbor_info == CBOR_VAR_FOLLOWS) {
401 uint8_t sc;
402 if (rin->read1(rin, &sc)) { logprintf("r1 fail in var map tag\n"); return NULL; }
403 while (sc != CBOR_BREAK) {
404 PyObject* key = inner_loads_c(rin, sc);
405 PyObject* value;
406 if (key == NULL) { logprintf("var map key fail\n"); return NULL; }
407 value = inner_loads(rin);
408 if (value == NULL) { logprintf("var map val vail\n"); return NULL; }
409 PyDict_SetItem(out, key, value);
410 Py_DECREF(key);
411 Py_DECREF(value);
412
413 if (rin->read1(rin, &sc)) { logprintf("r1 fail in var map tag\n"); return NULL; }
414 }
415 if (out == NULL) {
416 PyErr_SetString(PyExc_RuntimeError, "unknown error decoding VAR MAP");
417 }
418 } else {
419 unsigned int i;
420 for (i = 0; i < aux; i++) {
421 if (loads_kv(out, rin) != 0) {
422 logprintf("map kv[%d] failed\n", i);
423 return NULL;
424 }
425 }
426 if (out == NULL) {
427 PyErr_SetString(PyExc_RuntimeError, "unknown error decoding MAP");
428 }
429 }
430 return out;
431 case CBOR_TAG:
432 return loads_tag(rin, aux);
433 case CBOR_7:
434 if (aux == 20) {
435 out = Py_False;
436 Py_INCREF(out);
437 } else if (aux == 21) {
438 out = Py_True;
439 Py_INCREF(out);
440 } else if (aux == 22) {
441 out = Py_None;
442 Py_INCREF(out);
443 } else if (aux == 23) {
444 // js `undefined`, closest is py None
445 out = Py_None;
446 Py_INCREF(out);
447 }
448 if (out == NULL) {
449 PyErr_Format(PyExc_ValueError, "unknown section 7 marker %02x, aux=%llu", c, aux);
450 }
451 return out;
452 default:
453 PyErr_Format(PyExc_RuntimeError, "unknown cbor marker %02x", c);
454 return NULL;
455 }
456 #pragma GCC diagnostic push
457 #pragma GCC diagnostic ignored "-Wunreachable-code"
458 PyErr_SetString(PyExc_RuntimeError, "cbor library internal error moof!");
459 return NULL;
460 #pragma GCC diagnostic pop
461 }
462
loads_kv(PyObject * out,Reader * rin)463 static int loads_kv(PyObject* out, Reader* rin) {
464 PyObject* key = inner_loads(rin);
465 PyObject* value;
466 if (key == NULL) { logprintf("map key fail\n"); return -1; }
467 value = inner_loads(rin);
468 if (value == NULL) { logprintf("map val fail\n"); return -1; }
469 PyDict_SetItem(out, key, value);
470 Py_DECREF(key);
471 Py_DECREF(value);
472 return 0;
473 }
474
loads_bignum(Reader * rin,uint8_t c)475 static PyObject* loads_bignum(Reader* rin, uint8_t c) {
476 PyObject* out = NULL;
477
478 uint8_t bytes_info = c & CBOR_INFO_BITS;
479 if (bytes_info < 24) {
480 int i;
481 PyObject* eight = PyLong_FromLong(8);
482 out = PyLong_FromLong(0);
483 for (i = 0; i < bytes_info; i++) {
484 // TODO: is this leaking like crazy?
485 PyObject* curbyte;
486 PyObject* tout = PyNumber_Lshift(out, eight);
487 Py_DECREF(out);
488 out = tout;
489 uint8_t cb;
490 if (rin->read1(rin, &cb)) {
491 logprintf("r1 fail in bignum %d/%d\n", i, bytes_info);
492 Py_DECREF(eight);
493 Py_DECREF(out);
494 return NULL;
495 }
496 curbyte = PyLong_FromLong(cb);
497 tout = PyNumber_Or(out, curbyte);
498 Py_DECREF(curbyte);
499 Py_DECREF(out);
500 out = tout;
501 }
502 Py_DECREF(eight);
503 return out;
504 } else {
505 PyErr_Format(PyExc_NotImplementedError, "TODO: TAG BIGNUM for bigger bignum bytes_info=%d, len(ull)=%lu\n", bytes_info, sizeof(unsigned long long));
506 return NULL;
507 }
508 }
509
510
511 // returns a PyObject for cbor.cbor.Tag
512 // Returned PyObject* is a BORROWED reference from the module dict
getCborTagClass(void)513 static PyObject* getCborTagClass(void) {
514 PyObject* cbor_module = PyImport_ImportModule("cbor.cbor");
515 PyObject* moddict = PyModule_GetDict(cbor_module);
516 PyObject* tag_class = PyDict_GetItemString(moddict, "Tag");
517 // moddict and tag_class are 'borrowed reference'
518 Py_DECREF(cbor_module);
519
520 return tag_class;
521 }
522
523
loads_tag(Reader * rin,uint64_t aux)524 static PyObject* loads_tag(Reader* rin, uint64_t aux) {
525 PyObject* out = NULL;
526 // return an object CBORTag(tagnum, nextob)
527 if (aux == CBOR_TAG_BIGNUM) {
528 // If the next object is bytes, interpret it here without making a PyObject for it.
529 uint8_t sc;
530 if (rin->read1(rin, &sc)) { logprintf("r1 fail in bignum tag\n"); return NULL; }
531 if ((sc & CBOR_TYPE_MASK) == CBOR_BYTES) {
532 return loads_bignum(rin, sc);
533 } else {
534 PyErr_Format(PyExc_ValueError, "TAG BIGNUM not followed by bytes but %02x", sc);
535 return NULL;
536 }
537 #pragma GCC diagnostic push
538 #pragma GCC diagnostic ignored "-Wunreachable-code"
539 PyErr_Format(PyExc_ValueError, "TODO: WRITEME CBOR TAG BIGNUM %02x ...\n", sc);
540 return NULL;
541 #pragma GCC diagnostic pop
542 } else if (aux == CBOR_TAG_NEGBIGNUM) {
543 // If the next object is bytes, interpret it here without making a PyObject for it.
544 uint8_t sc;
545 if (rin->read1(rin, &sc)) { logprintf("r1 fail in negbignum tag\n"); return NULL; }
546 if ((sc & CBOR_TYPE_MASK) == CBOR_BYTES) {
547 out = loads_bignum(rin, sc);
548 if (out == NULL) { logprintf("loads_bignum fail inside TAG_NEGBIGNUM\n"); return NULL; }
549 PyObject* minusOne = PyLong_FromLong(-1);
550 PyObject* tout = PyNumber_Subtract(minusOne, out);
551 Py_DECREF(minusOne);
552 Py_DECREF(out);
553 out = tout;
554 return out;
555 } else {
556 PyErr_Format(PyExc_ValueError, "TAG NEGBIGNUM not followed by bytes but %02x", sc);
557 return NULL;
558 }
559 #pragma GCC diagnostic push
560 #pragma GCC diagnostic ignored "-Wunreachable-code"
561 PyErr_Format(PyExc_ValueError, "TODO: WRITEME CBOR TAG NEGBIGNUM %02x ...\n", sc);
562 return NULL;
563 #pragma GCC diagnostic pop
564 }
565 out = inner_loads(rin);
566 if (out == NULL) { return NULL; }
567 {
568 PyObject* tag_class = getCborTagClass();
569 PyObject* args = Py_BuildValue("(K,O)", aux, out);
570 PyObject* tout = PyObject_CallObject(tag_class, args);
571 Py_DECREF(args);
572 Py_DECREF(out);
573 // tag_class was just a borrowed reference
574 out = tout;
575 }
576 return out;
577 }
578
579
580 static PyObject*
cbor_loads(PyObject * noself,PyObject * args)581 cbor_loads(PyObject* noself, PyObject* args) {
582 PyObject* ob;
583 is_big_endian();
584 if (PyType_IsSubtype(Py_TYPE(args), &PyList_Type)) {
585 ob = PyList_GetItem(args, 0);
586 } else if (PyType_IsSubtype(Py_TYPE(args), &PyTuple_Type)) {
587 ob = PyTuple_GetItem(args, 0);
588 } else {
589 PyErr_Format(PyExc_ValueError, "args not list or tuple: %R\n", args);
590 return NULL;
591 }
592
593 if (ob == Py_None) {
594 PyErr_SetString(PyExc_ValueError, "got None for buffer to decode in loads");
595 return NULL;
596 }
597
598 {
599 PyObject* out = NULL;
600 Reader* r = NewBufferReader(ob);
601 if (!r) {
602 return NULL;
603 }
604 out = inner_loads(r);
605 r->delete(r);
606 return out;
607 }
608 }
609
610
611 #if HAS_FILE_READER
612
613 typedef struct _FileReader {
614 READER_FUNCTIONS;
615 FILE* fin;
616 void* dst;
617 Py_ssize_t dst_size;
618 Py_ssize_t read_count;
619 } FileReader;
620
621 // read from a python builtin file which contains a C FILE*
FileReader_read(void * self,Py_ssize_t len)622 static void* FileReader_read(void* self, Py_ssize_t len) {
623 FileReader* thiz = (FileReader*)self;
624 Py_ssize_t rtotal = 0;
625 uintptr_t opos;
626 //logprintf("file read %d\n", len);
627 if (len > thiz->dst_size) {
628 thiz->dst = PyMem_Realloc(thiz->dst, len);
629 thiz->dst_size = len;
630 } else if ((thiz->dst_size > (128 * 1024)) && (len < 4096)) {
631 PyMem_Free(thiz->dst);
632 thiz->dst = PyMem_Malloc(len);
633 thiz->dst_size = len;
634 }
635 opos = (uintptr_t)(thiz->dst);
636 while (1) {
637 size_t rlen = fread((void*)opos, 1, len, thiz->fin);
638 if (rlen == 0) {
639 // file isn't going to give any more
640 PyErr_Format(PyExc_ValueError, "only got %zd bytes with %zd stil to read from file", rtotal, len);
641 PyMem_Free(thiz->dst);
642 thiz->dst = NULL;
643 thiz->dst_size = 0;
644 return NULL;
645 }
646 thiz->read_count += rlen;
647 rtotal += rlen;
648 opos += rlen;
649 len -= rlen;
650 if (rtotal >= len) {
651 if (thiz->dst == NULL) {
652 PyErr_SetString(PyExc_RuntimeError, "known error in file reader, NULL dst");
653 return NULL;
654 }
655 return thiz->dst;
656 }
657 }
658 }
FileReader_read1(void * self,uint8_t * oneByte)659 static int FileReader_read1(void* self, uint8_t* oneByte) {
660 FileReader* thiz = (FileReader*)self;
661 size_t didread = fread((void*)oneByte, 1, 1, thiz->fin);
662 if (didread == 0) {
663 logprintf("failed to read 1 from file\n");
664 PyErr_SetString(PyExc_ValueError, "got nothing reading 1 from file");
665 return -1;
666 }
667 thiz->read_count++;
668 return 0;
669 }
FileReader_return_buffer(void * self,void * buffer)670 static void FileReader_return_buffer(void* self, void* buffer) {
671 // Nothing to do, we hold onto the buffer and maybe reuse it for next read
672 }
FileReader_delete(void * self)673 static void FileReader_delete(void* self) {
674 FileReader* thiz = (FileReader*)self;
675 if (thiz->dst) {
676 PyMem_Free(thiz->dst);
677 }
678 PyMem_Free(thiz);
679 }
NewFileReader(PyObject * ob)680 static Reader* NewFileReader(PyObject* ob) {
681 FileReader* fr = (FileReader*)PyMem_Malloc(sizeof(FileReader));
682 if (fr == NULL) {
683 PyErr_SetString(PyExc_MemoryError, "failed to allocate FileReader");
684 return NULL;
685 }
686 fr->fin = PyFile_AsFile(ob);
687 if (fr->fin == NULL) {
688 PyErr_SetString(PyExc_RuntimeError, "PyFile_AsFile NULL");
689 PyMem_Free(fr);
690 return NULL;
691 }
692 fr->dst = NULL;
693 fr->dst_size = 0;
694 fr->read_count = 0;
695 SET_READER_FUNCTIONS(fr, FileReader);
696 return (Reader*)fr;
697 }
698
699 #endif /* Python 2.7 FileReader */
700
701
702 typedef struct _ObjectReader {
703 READER_FUNCTIONS;
704 PyObject* ob;
705
706 // We got one object with all the bytes neccessary, and need to
707 // DECREF it later.
708 PyObject* retval;
709 void* bytes;
710
711 // OR, we got several objects, we DECREFed them as we went, and
712 // need to Free() this buffer at the end.
713 void* dst;
714
715 Py_ssize_t read_count;
716 int exception_is_external;
717 } ObjectReader;
718
719 // read from a python file-like object which has a .read(n) method
ObjectReader_read(void * context,Py_ssize_t len)720 static void* ObjectReader_read(void* context, Py_ssize_t len) {
721 ObjectReader* thiz = (ObjectReader*)context;
722 Py_ssize_t rtotal = 0;
723 uintptr_t opos = 0;
724 //logprintf("ob read %d\n", len);
725 assert(!thiz->dst);
726 assert(!thiz->bytes);
727 while (rtotal < len) {
728 PyObject* retval = PyObject_CallMethod(thiz->ob, "read", "n", len - rtotal, NULL);
729 Py_ssize_t rlen;
730 if (retval == NULL) {
731 thiz->exception_is_external = 1;
732 logprintf("exception in object.read()\n");
733 return NULL;
734 }
735 if (!PyBytes_Check(retval)) {
736 logprintf("object.read() is not bytes\n");
737 PyErr_SetString(PyExc_ValueError, "expected ob.read() to return a bytes object\n");
738 Py_DECREF(retval);
739 return NULL;
740 }
741 rlen = PyBytes_Size(retval);
742 thiz->read_count += rlen;
743 if (rlen > len - rtotal) {
744 logprintf("object.read() is too much!\n");
745 PyErr_Format(PyExc_ValueError, "ob.read() returned %ld bytes but only wanted %lu\n", rlen, len - rtotal);
746 Py_DECREF(retval);
747 return NULL;
748 }
749 if (rlen == len) {
750 // best case! All in one call to read()
751 // We _keep_ a reference to retval until later.
752 thiz->retval = retval;
753 thiz->bytes = PyBytes_AsString(retval);
754 assert(thiz->bytes);
755 thiz->dst = NULL;
756 opos = 0;
757 return thiz->bytes;
758 }
759 if (thiz->dst == NULL) {
760 thiz->dst = PyMem_Malloc(len);
761 opos = (uintptr_t)thiz->dst;
762 }
763 // else, not enough all in one go
764 memcpy((void*)opos, PyBytes_AsString(retval), rlen);
765 Py_DECREF(retval);
766 opos += rlen;
767 rtotal += rlen;
768 }
769 assert(thiz->dst);
770 return thiz->dst;
771 }
ObjectReader_read1(void * self,uint8_t * oneByte)772 static int ObjectReader_read1(void* self, uint8_t* oneByte) {
773 ObjectReader* thiz = (ObjectReader*)self;
774 PyObject* retval = PyObject_CallMethod(thiz->ob, "read", "i", 1, NULL);
775 Py_ssize_t rlen;
776 if (retval == NULL) {
777 thiz->exception_is_external = 1;
778 //logprintf("call ob read(1) failed\n");
779 return -1;
780 }
781 if (!PyBytes_Check(retval)) {
782 PyErr_SetString(PyExc_ValueError, "expected ob.read() to return a bytes object\n");
783 return -1;
784 }
785 rlen = PyBytes_Size(retval);
786 thiz->read_count += rlen;
787 if (rlen > 1) {
788 PyErr_Format(PyExc_ValueError, "TODO: raise exception: WAT ob.read() returned %ld bytes but only wanted 1\n", rlen);
789 return -1;
790 }
791 if (rlen == 1) {
792 *oneByte = PyBytes_AsString(retval)[0];
793 Py_DECREF(retval);
794 return 0;
795 }
796 PyErr_SetString(PyExc_ValueError, "got nothing reading 1");
797 return -1;
798 }
ObjectReader_return_buffer(void * context,void * buffer)799 static void ObjectReader_return_buffer(void* context, void* buffer) {
800 ObjectReader* thiz = (ObjectReader*)context;
801 if (buffer == thiz->bytes) {
802 Py_DECREF(thiz->retval);
803 thiz->retval = NULL;
804 thiz->bytes = NULL;
805 } else if (buffer == thiz->dst) {
806 PyMem_Free(thiz->dst);
807 thiz->dst = NULL;
808 } else {
809 logprintf("TODO: raise exception, could not release buffer %p, wanted dst=%p or bytes=%p\n", buffer, thiz->dst, thiz->bytes);
810 }
811 }
ObjectReader_delete(void * context)812 static void ObjectReader_delete(void* context) {
813 ObjectReader* thiz = (ObjectReader*)context;
814 if (thiz->retval != NULL) {
815 Py_DECREF(thiz->retval);
816 }
817 if (thiz->dst != NULL) {
818 PyMem_Free(thiz->dst);
819 }
820 PyMem_Free(thiz);
821 }
NewObjectReader(PyObject * ob)822 static Reader* NewObjectReader(PyObject* ob) {
823 ObjectReader* r = (ObjectReader*)PyMem_Malloc(sizeof(ObjectReader));
824 r->ob = ob;
825 r->retval = NULL;
826 r->bytes = NULL;
827 r->dst = NULL;
828 r->read_count = 0;
829 r->exception_is_external = 0;
830 SET_READER_FUNCTIONS(r, ObjectReader);
831 return (Reader*)r;
832 }
833
834 typedef struct _BufferReader {
835 READER_FUNCTIONS;
836 uint8_t* raw;
837 Py_ssize_t len;
838 uintptr_t pos;
839 } BufferReader;
840
841 // read from a buffer, aka loads()
BufferReader_read(void * context,Py_ssize_t len)842 static void* BufferReader_read(void* context, Py_ssize_t len) {
843 BufferReader* thiz = (BufferReader*)context;
844 //logprintf("br %p %d (%d)\n", thiz, len, thiz->len);
845 if (len <= thiz->len) {
846 void* out = (void*)thiz->pos;
847 thiz->pos += len;
848 thiz->len -= len;
849 assert(out);
850 return out;
851 }
852 PyErr_Format(PyExc_ValueError, "buffer read for %zd but only have %zd\n", len, thiz->len);
853 return NULL;
854 }
BufferReader_read1(void * self,uint8_t * oneByte)855 static int BufferReader_read1(void* self, uint8_t* oneByte) {
856 BufferReader* thiz = (BufferReader*)self;
857 //logprintf("br %p _1_ (%d)\n", thiz, thiz->len);
858 if (thiz->len <= 0) {
859 PyErr_SetString(PyExc_LookupError, "buffer exhausted");
860 return -1;
861 }
862 *oneByte = *((uint8_t*)thiz->pos);
863 thiz->pos += 1;
864 thiz->len -= 1;
865 return 0;
866 }
BufferReader_return_buffer(void * context,void * buffer)867 static void BufferReader_return_buffer(void* context, void* buffer) {
868 // nothing to do
869 }
BufferReader_delete(void * context)870 static void BufferReader_delete(void* context) {
871 BufferReader* thiz = (BufferReader*)context;
872 PyMem_Free(thiz);
873 }
NewBufferReader(PyObject * ob)874 static Reader* NewBufferReader(PyObject* ob) {
875 BufferReader* r = (BufferReader*)PyMem_Malloc(sizeof(BufferReader));
876 SET_READER_FUNCTIONS(r, BufferReader);
877 if (PyByteArray_Check(ob)) {
878 r->raw = (uint8_t*)PyByteArray_AsString(ob);
879 r->len = PyByteArray_Size(ob);
880 } else if (PyBytes_Check(ob)) {
881 r->raw = (uint8_t*)PyBytes_AsString(ob);
882 r->len = PyBytes_Size(ob);
883 } else {
884 PyErr_SetString(PyExc_ValueError, "input of unknown type not bytes or bytearray");
885 return NULL;
886 }
887 r->pos = (uintptr_t)r->raw;
888 if (r->len == 0) {
889 PyErr_SetString(PyExc_ValueError, "got zero length string in loads");
890 return NULL;
891 }
892 if (r->raw == NULL) {
893 PyErr_SetString(PyExc_ValueError, "got NULL buffer for string");
894 return NULL;
895 }
896 //logprintf("NBR(%llu, %ld)\n", r->pos, r->len);
897 return (Reader*)r;
898 }
899
900
901 static PyObject*
cbor_load(PyObject * noself,PyObject * args)902 cbor_load(PyObject* noself, PyObject* args) {
903 PyObject* ob;
904 Reader* reader;
905 is_big_endian();
906 if (PyType_IsSubtype(Py_TYPE(args), &PyList_Type)) {
907 ob = PyList_GetItem(args, 0);
908 } else if (PyType_IsSubtype(Py_TYPE(args), &PyTuple_Type)) {
909 ob = PyTuple_GetItem(args, 0);
910 } else {
911 PyErr_Format(PyExc_ValueError, "args not list or tuple: %R\n", args);
912 return NULL;
913 }
914
915 if (ob == Py_None) {
916 PyErr_SetString(PyExc_ValueError, "got None for buffer to decode in loads");
917 return NULL;
918 }
919 PyObject* retval;
920 #if HAS_FILE_READER
921 if (PyFile_Check(ob)) {
922 reader = NewFileReader(ob);
923 if (reader == NULL) { return NULL; }
924 retval = inner_loads(reader);
925 if ((retval == NULL) &&
926 (((FileReader*)reader)->read_count == 0) &&
927 (feof(((FileReader*)reader)->fin) != 0)) {
928 // never got anything, started at EOF
929 PyErr_Clear();
930 PyErr_SetString(PyExc_EOFError, "read nothing, apparent EOF");
931 }
932 reader->delete(reader);
933 } else
934 #endif
935 {
936 reader = NewObjectReader(ob);
937 retval = inner_loads(reader);
938 if ((retval == NULL) &&
939 (!((ObjectReader*)reader)->exception_is_external) &&
940 ((ObjectReader*)reader)->read_count == 0) {
941 // never got anything, assume EOF
942 PyErr_Clear();
943 PyErr_SetString(PyExc_EOFError, "read nothing, apparent EOF");
944 }
945 reader->delete(reader);
946 }
947 return retval;
948 }
949
950
tag_u64_out(uint8_t cbor_type,uint64_t aux,uint8_t * out,uintptr_t * posp)951 static void tag_u64_out(uint8_t cbor_type, uint64_t aux, uint8_t* out, uintptr_t* posp) {
952 uintptr_t pos = *posp;
953 if (out != NULL) {
954 out[pos] = cbor_type | CBOR_UINT64_FOLLOWS;
955 out[pos+1] = (aux >> 56) & 0x0ff;
956 out[pos+2] = (aux >> 48) & 0x0ff;
957 out[pos+3] = (aux >> 40) & 0x0ff;
958 out[pos+4] = (aux >> 32) & 0x0ff;
959 out[pos+5] = (aux >> 24) & 0x0ff;
960 out[pos+6] = (aux >> 16) & 0x0ff;
961 out[pos+7] = (aux >> 8) & 0x0ff;
962 out[pos+8] = aux & 0x0ff;
963 }
964 pos += 9;
965 *posp = pos;
966 }
967
968
tag_aux_out(uint8_t cbor_type,uint64_t aux,uint8_t * out,uintptr_t * posp)969 static void tag_aux_out(uint8_t cbor_type, uint64_t aux, uint8_t* out, uintptr_t* posp) {
970 uintptr_t pos = *posp;
971 if (aux <= 23) {
972 // tiny literal
973 if (out != NULL) {
974 out[pos] = cbor_type | aux;
975 }
976 pos += 1;
977 } else if (aux <= 0x0ff) {
978 // one byte value
979 if (out != NULL) {
980 out[pos] = cbor_type | CBOR_UINT8_FOLLOWS;
981 out[pos+1] = aux;
982 }
983 pos += 2;
984 } else if (aux <= 0x0ffff) {
985 // two byte value
986 if (out != NULL) {
987 out[pos] = cbor_type | CBOR_UINT16_FOLLOWS;
988 out[pos+1] = (aux >> 8) & 0x0ff;
989 out[pos+2] = aux & 0x0ff;
990 }
991 pos += 3;
992 } else if (aux <= 0x0ffffffffL) {
993 // four byte value
994 if (out != NULL) {
995 out[pos] = cbor_type | CBOR_UINT32_FOLLOWS;
996 out[pos+1] = (aux >> 24) & 0x0ff;
997 out[pos+2] = (aux >> 16) & 0x0ff;
998 out[pos+3] = (aux >> 8) & 0x0ff;
999 out[pos+4] = aux & 0x0ff;
1000 }
1001 pos += 5;
1002 } else {
1003 // eight byte value
1004 tag_u64_out(cbor_type, aux, out, posp);
1005 return;
1006 }
1007 *posp = pos;
1008 return;
1009 }
1010
1011 static int inner_dumps(EncodeOptions *optp, PyObject* ob, uint8_t* out, uintptr_t* posp);
1012
dumps_dict(EncodeOptions * optp,PyObject * ob,uint8_t * out,uintptr_t * posp)1013 static int dumps_dict(EncodeOptions *optp, PyObject* ob, uint8_t* out, uintptr_t* posp) {
1014 uintptr_t pos = *posp;
1015 Py_ssize_t dictlen = PyDict_Size(ob);
1016 PyObject* key;
1017 PyObject* val;
1018 int err;
1019
1020 tag_aux_out(CBOR_MAP, dictlen, out, &pos);
1021
1022 if (optp->sort_keys) {
1023 Py_ssize_t index = 0;
1024 PyObject* keylist = PyDict_Keys(ob);
1025 PyList_Sort(keylist);
1026
1027 //fprintf(stderr, "sortking keys\n");
1028 for (index = 0; index < PyList_Size(keylist); index++) {
1029 key = PyList_GetItem(keylist, index); // Borrowed ref
1030 val = PyDict_GetItem(ob, key); // Borrowed ref
1031 err = inner_dumps(optp, key, out, &pos);
1032 if (err != 0) { return err; }
1033 err = inner_dumps(optp, val, out, &pos);
1034 if (err != 0) { return err; }
1035 }
1036 Py_DECREF(keylist);
1037 } else {
1038 Py_ssize_t dictiter = 0;
1039 //fprintf(stderr, "unsorted keys\n");
1040 while (PyDict_Next(ob, &dictiter, &key, &val)) {
1041 err = inner_dumps(optp, key, out, &pos);
1042 if (err != 0) { return err; }
1043 err = inner_dumps(optp, val, out, &pos);
1044 if (err != 0) { return err; }
1045 }
1046 }
1047
1048 *posp = pos;
1049 return 0;
1050 }
1051
1052
dumps_bignum(EncodeOptions * optp,uint8_t tag,PyObject * val,uint8_t * out,uintptr_t * posp)1053 static void dumps_bignum(EncodeOptions *optp, uint8_t tag, PyObject* val, uint8_t* out, uintptr_t* posp) {
1054 uintptr_t pos = (posp != NULL) ? *posp : 0;
1055 PyObject* eight = PyLong_FromLong(8);
1056 PyObject* bytemask = NULL;
1057 PyObject* nval = NULL;
1058 uint8_t* revbytes = NULL;
1059 int revbytepos = 0;
1060 int val_is_orig = 1;
1061 if (out != NULL) {
1062 bytemask = PyLong_FromLongLong(0x0ff);
1063 revbytes = PyMem_Malloc(23);
1064 }
1065 while (PyObject_IsTrue(val) && (revbytepos < 23)) {
1066 if (revbytes != NULL) {
1067 PyObject* tbyte = PyNumber_And(val, bytemask);
1068 revbytes[revbytepos] = PyLong_AsLong(tbyte);
1069 Py_DECREF(tbyte);
1070 }
1071 revbytepos++;
1072 nval = PyNumber_InPlaceRshift(val, eight);
1073 if (val_is_orig) {
1074 val_is_orig = 0;
1075 } else {
1076 Py_DECREF(val);
1077 }
1078 val = nval;
1079 }
1080 if (revbytes != NULL) {
1081 out[pos] = CBOR_TAG | tag;
1082 pos++;
1083 out[pos] = CBOR_BYTES | revbytepos;
1084 pos++;
1085 revbytepos--;
1086 while (revbytepos >= 0) {
1087 out[pos] = revbytes[revbytepos];
1088 pos++;
1089 revbytepos--;
1090 }
1091 PyMem_Free(revbytes);
1092 Py_DECREF(bytemask);
1093 } else {
1094 pos += 2 + revbytepos;
1095 }
1096 if (!val_is_orig) {
1097 Py_DECREF(val);
1098 }
1099 Py_DECREF(eight);
1100 *posp = pos;
1101 }
1102
dumps_tag(EncodeOptions * optp,PyObject * ob,uint8_t * out,uintptr_t * posp)1103 static int dumps_tag(EncodeOptions *optp, PyObject* ob, uint8_t* out, uintptr_t* posp) {
1104 uintptr_t pos = (posp != NULL) ? *posp : 0;
1105 int err = 0;
1106
1107
1108 PyObject* tag_num;
1109 PyObject* tag_value;
1110 err = 0;
1111
1112 tag_num = PyObject_GetAttrString(ob, "tag");
1113 if (tag_num != NULL) {
1114 tag_value = PyObject_GetAttrString(ob, "value");
1115 if (tag_value != NULL) {
1116 #ifdef Py_INTOBJECT_H
1117 if (PyInt_Check(tag_num)) {
1118 long val = PyInt_AsLong(tag_num);
1119 if (val >= 0) {
1120 tag_aux_out(CBOR_TAG, val, out, &pos);
1121 err = inner_dumps(optp, tag_value, out, &pos);
1122 } else {
1123 PyErr_Format(PyExc_ValueError, "tag cannot be a negative int: %ld", val);
1124 err = -1;
1125 }
1126 } else
1127 #endif
1128 if (PyLong_Check(tag_num)) {
1129 int overflow = -1;
1130 long long val = PyLong_AsLongLongAndOverflow(tag_num, &overflow);
1131 if (overflow == 0) {
1132 if (val >= 0) {
1133 tag_aux_out(CBOR_TAG, val, out, &pos);
1134 err = inner_dumps(optp, tag_value, out, &pos);
1135 } else {
1136 PyErr_Format(PyExc_ValueError, "tag cannot be a negative long: %lld", val);
1137 err = -1;
1138 }
1139 } else {
1140 PyErr_SetString(PyExc_ValueError, "tag number too large");
1141 err = -1;
1142 }
1143 }
1144 Py_DECREF(tag_value);
1145 } else {
1146 PyErr_SetString(PyExc_ValueError, "broken Tag object has .tag but not .value");
1147 err = -1;
1148 }
1149 Py_DECREF(tag_num);
1150 } else {
1151 PyErr_SetString(PyExc_ValueError, "broken Tag object with no .tag");
1152 err = -1;
1153 }
1154 if (err != 0) { return err; }
1155
1156 *posp = pos;
1157 return err;
1158 }
1159
1160
1161 // With out=NULL it just counts the length.
1162 // return err, 0=OK
inner_dumps(EncodeOptions * optp,PyObject * ob,uint8_t * out,uintptr_t * posp)1163 static int inner_dumps(EncodeOptions *optp, PyObject* ob, uint8_t* out, uintptr_t* posp) {
1164 uintptr_t pos = (posp != NULL) ? *posp : 0;
1165
1166 if (ob == Py_None) {
1167 if (out != NULL) {
1168 out[pos] = CBOR_NULL;
1169 }
1170 pos += 1;
1171 } else if (PyBool_Check(ob)) {
1172 if (out != NULL) {
1173 if (PyObject_IsTrue(ob)) {
1174 out[pos] = CBOR_TRUE;
1175 } else {
1176 out[pos] = CBOR_FALSE;
1177 }
1178 }
1179 pos += 1;
1180 } else if (PyDict_Check(ob)) {
1181 int err = dumps_dict(optp, ob, out, &pos);
1182 if (err != 0) { return err; }
1183 } else if (PyList_Check(ob)) {
1184 Py_ssize_t i;
1185 Py_ssize_t listlen = PyList_Size(ob);
1186 tag_aux_out(CBOR_ARRAY, listlen, out, &pos);
1187 for (i = 0; i < listlen; i++) {
1188 int err = inner_dumps(optp, PyList_GetItem(ob, i), out, &pos);
1189 if (err != 0) { return err; }
1190 }
1191 } else if (PyTuple_Check(ob)) {
1192 Py_ssize_t i;
1193 Py_ssize_t listlen = PyTuple_Size(ob);
1194 tag_aux_out(CBOR_ARRAY, listlen, out, &pos);
1195 for (i = 0; i < listlen; i++) {
1196 int err = inner_dumps(optp, PyTuple_GetItem(ob, i), out, &pos);
1197 if (err != 0) { return err; }
1198 }
1199 // TODO: accept other enumerables and emit a variable length array
1200 #ifdef Py_INTOBJECT_H
1201 // PyInt exists in Python 2 but not 3
1202 } else if (PyInt_Check(ob)) {
1203 long val = PyInt_AsLong(ob);
1204 if (val >= 0) {
1205 tag_aux_out(CBOR_UINT, val, out, &pos);
1206 } else {
1207 tag_aux_out(CBOR_NEGINT, -1 - val, out, &pos);
1208 }
1209 #endif
1210 } else if (PyLong_Check(ob)) {
1211 int overflow = 0;
1212 long long val = PyLong_AsLongLongAndOverflow(ob, &overflow);
1213 if (overflow == 0) {
1214 if (val >= 0) {
1215 tag_aux_out(CBOR_UINT, val, out, &pos);
1216 } else {
1217 tag_aux_out(CBOR_NEGINT, -1L - val, out, &pos);
1218 }
1219 } else {
1220 if (overflow < 0) {
1221 // BIG NEGINT
1222 PyObject* minusone = PyLong_FromLongLong(-1L);
1223 PyObject* val = PyNumber_Subtract(minusone, ob);
1224 Py_DECREF(minusone);
1225 dumps_bignum(optp, CBOR_TAG_NEGBIGNUM, val, out, &pos);
1226 Py_DECREF(val);
1227 } else {
1228 // BIG INT
1229 dumps_bignum(optp, CBOR_TAG_BIGNUM, ob, out, &pos);
1230 }
1231 }
1232 } else if (PyFloat_Check(ob)) {
1233 double val = PyFloat_AsDouble(ob);
1234 tag_u64_out(CBOR_7, *((uint64_t*)(&val)), out, &pos);
1235 } else if (PyBytes_Check(ob)) {
1236 Py_ssize_t len = PyBytes_Size(ob);
1237 tag_aux_out(CBOR_BYTES, len, out, &pos);
1238 if (out != NULL) {
1239 memcpy(out + pos, PyBytes_AsString(ob), len);
1240 }
1241 pos += len;
1242 } else if (PyUnicode_Check(ob)) {
1243 PyObject* utf8 = PyUnicode_AsUTF8String(ob);
1244 Py_ssize_t len = PyBytes_Size(utf8);
1245 tag_aux_out(CBOR_TEXT, len, out, &pos);
1246 if (out != NULL) {
1247 memcpy(out + pos, PyBytes_AsString(utf8), len);
1248 }
1249 pos += len;
1250 Py_DECREF(utf8);
1251 } else {
1252 int handled = 0;
1253 {
1254 PyObject* tag_class = getCborTagClass();
1255 if (PyObject_IsInstance(ob, tag_class)) {
1256 int err = dumps_tag(optp, ob, out, &pos);
1257 if (err != 0) { return err; }
1258 handled = 1;
1259 }
1260 // tag_class was just a borrowed reference
1261 }
1262
1263 // TODO: other special object serializations here
1264
1265 if (!handled) {
1266 #if IS_PY3
1267 PyErr_Format(PyExc_ValueError, "cannot serialize unknown object: %R", ob);
1268 #else
1269 PyObject* badtype = PyObject_Type(ob);
1270 PyObject* badtypename = PyObject_Str(badtype);
1271 PyErr_Format(PyExc_ValueError, "cannot serialize unknown object of type %s", PyString_AsString(badtypename));
1272 Py_DECREF(badtypename);
1273 Py_DECREF(badtype);
1274 #endif
1275 return -1;
1276 }
1277 }
1278 if (posp != NULL) {
1279 *posp = pos;
1280 }
1281 return 0;
1282 }
1283
_dumps_kwargs(EncodeOptions * optp,PyObject * kwargs)1284 static int _dumps_kwargs(EncodeOptions *optp, PyObject* kwargs) {
1285 if (kwargs == NULL) {
1286 } else if (!PyDict_Check(kwargs)) {
1287 PyErr_Format(PyExc_ValueError, "kwargs not dict: %R\n", kwargs);
1288 return 0;
1289 } else {
1290 PyObject* sort_keys = PyDict_GetItemString(kwargs, "sort_keys"); // Borrowed ref
1291 if (sort_keys != NULL) {
1292 optp->sort_keys = PyObject_IsTrue(sort_keys);
1293 //fprintf(stderr, "sort_keys=%d\n", optp->sort_keys);
1294 }
1295 }
1296 return 1;
1297 }
1298
1299 static PyObject*
cbor_dumps(PyObject * noself,PyObject * args,PyObject * kwargs)1300 cbor_dumps(PyObject* noself, PyObject* args, PyObject* kwargs) {
1301
1302 PyObject* ob;
1303 EncodeOptions opts = {0};
1304 EncodeOptions *optp = &opts;
1305 is_big_endian();
1306 if (PyType_IsSubtype(Py_TYPE(args), &PyList_Type)) {
1307 ob = PyList_GetItem(args, 0);
1308 } else if (PyType_IsSubtype(Py_TYPE(args), &PyTuple_Type)) {
1309 ob = PyTuple_GetItem(args, 0);
1310 } else {
1311 PyErr_Format(PyExc_ValueError, "args not list or tuple: %R\n", args);
1312 return NULL;
1313 }
1314 if (ob == NULL) {
1315 return NULL;
1316 }
1317
1318 if (!_dumps_kwargs(optp, kwargs)) {
1319 return NULL;
1320 }
1321
1322 {
1323 Py_ssize_t outlen = 0;
1324 uintptr_t pos = 0;
1325 void* out = NULL;
1326 PyObject* obout = NULL;
1327 int err;
1328
1329 // first pass just to count length
1330 err = inner_dumps(optp, ob, NULL, &pos);
1331 if (err != 0) {
1332 return NULL;
1333 }
1334
1335 outlen = pos;
1336
1337 out = PyMem_Malloc(outlen);
1338 if (out == NULL) {
1339 PyErr_NoMemory();
1340 return NULL;
1341 }
1342
1343 err = inner_dumps(optp, ob, out, NULL);
1344 if (err != 0) {
1345 PyMem_Free(out);
1346 return NULL;
1347 }
1348
1349 // TODO: I wish there was a way to do this without this copy.
1350 obout = PyBytes_FromStringAndSize(out, outlen);
1351 PyMem_Free(out);
1352 return obout;
1353 }
1354 }
1355
1356 static PyObject*
cbor_dump(PyObject * noself,PyObject * args,PyObject * kwargs)1357 cbor_dump(PyObject* noself, PyObject* args, PyObject *kwargs) {
1358 // args should be (obj, fp)
1359 PyObject* ob;
1360 PyObject* fp;
1361 EncodeOptions opts = {0};
1362 EncodeOptions *optp = &opts;
1363
1364 is_big_endian();
1365 if (PyType_IsSubtype(Py_TYPE(args), &PyList_Type)) {
1366 ob = PyList_GetItem(args, 0);
1367 fp = PyList_GetItem(args, 1);
1368 } else if (PyType_IsSubtype(Py_TYPE(args), &PyTuple_Type)) {
1369 ob = PyTuple_GetItem(args, 0);
1370 fp = PyTuple_GetItem(args, 1);
1371 } else {
1372 PyErr_Format(PyExc_ValueError, "args not list or tuple: %R\n", args);
1373 return NULL;
1374 }
1375 if ((ob == NULL) || (fp == NULL)) {
1376 return NULL;
1377 }
1378
1379 if (!_dumps_kwargs(optp, kwargs)) {
1380 return NULL;
1381 }
1382
1383 {
1384 // TODO: make this smarter, right now it is justt fp.write(dumps(ob))
1385 Py_ssize_t outlen = 0;
1386 uintptr_t pos = 0;
1387 void* out = NULL;
1388 int err;
1389
1390 // first pass just to count length
1391 err = inner_dumps(optp, ob, NULL, &pos);
1392 if (err != 0) {
1393 return NULL;
1394 }
1395
1396 outlen = pos;
1397
1398 out = PyMem_Malloc(outlen);
1399 if (out == NULL) {
1400 PyErr_NoMemory();
1401 return NULL;
1402 }
1403
1404 err = inner_dumps(optp, ob, out, NULL);
1405 if (err != 0) {
1406 PyMem_Free(out);
1407 return NULL;
1408 }
1409
1410 #if HAS_FILE_READER
1411 if (PyFile_Check(fp)) {
1412 FILE* fout = PyFile_AsFile(fp);
1413 fwrite(out, 1, outlen, fout);
1414 } else
1415 #endif
1416 {
1417 PyObject* ret;
1418 PyObject* obout = NULL;
1419 #if IS_PY3
1420 PyObject* writeStr = PyUnicode_FromString("write");
1421 #else
1422 PyObject* writeStr = PyString_FromString("write");
1423 #endif
1424 obout = PyBytes_FromStringAndSize(out, outlen);
1425 //logprintf("write %zd bytes to %p.write() as %p\n", outlen, fp, obout);
1426 ret = PyObject_CallMethodObjArgs(fp, writeStr, obout, NULL);
1427 Py_DECREF(writeStr);
1428 Py_DECREF(obout);
1429 if (ret != NULL) {
1430 Py_DECREF(ret);
1431 } else {
1432 // exception in fp.write()
1433 PyMem_Free(out);
1434 return NULL;
1435 }
1436 //logprintf("wrote %zd bytes to %p.write() as %p\n", outlen, fp, obout);
1437 }
1438 PyMem_Free(out);
1439 }
1440
1441 Py_RETURN_NONE;
1442 }
1443
1444
1445 static PyMethodDef CborMethods[] = {
1446 {"loads", cbor_loads, METH_VARARGS,
1447 "parse cbor from data buffer to objects"},
1448 {"dumps", (PyCFunction)cbor_dumps, METH_VARARGS|METH_KEYWORDS,
1449 "serialize python object to bytes"},
1450 {"load", cbor_load, METH_VARARGS,
1451 "Parse cbor from data buffer to objects.\n"
1452 "Takes a file-like object capable of .read(N)\n"},
1453 {"dump", (PyCFunction)cbor_dump, METH_VARARGS|METH_KEYWORDS,
1454 "Serialize python object to bytes.\n"
1455 "dump(obj, fp)\n"
1456 "obj: object to output; fp: file-like object to .write() to\n"},
1457 {NULL, NULL, 0, NULL} /* Sentinel */
1458 };
1459
1460 #ifdef Py_InitModule
1461 // Python 2.7
1462 PyMODINIT_FUNC
init_cbor(void)1463 init_cbor(void)
1464 {
1465 (void) Py_InitModule("cbor._cbor", CborMethods);
1466 }
1467 #else
1468 // Python 3
1469 PyMODINIT_FUNC
PyInit__cbor(void)1470 PyInit__cbor(void)
1471 {
1472 static PyModuleDef modef = {
1473 PyModuleDef_HEAD_INIT,
1474 };
1475 //modef.m_base = PyModuleDef_HEAD_INIT;
1476 modef.m_name = "cbor._cbor";
1477 modef.m_doc = NULL;
1478 modef.m_size = 0;
1479 modef.m_methods = CborMethods;
1480 #ifdef Py_mod_exec
1481 modef.m_slots = NULL; // Py >= 3.5
1482 #else
1483 modef.m_reload = NULL; // Py < 3.5
1484 #endif
1485 modef.m_traverse = NULL;
1486 modef.m_clear = NULL;
1487 modef.m_free = NULL;
1488 return PyModule_Create(&modef);
1489 }
1490 #endif
1491
1492