1 /*
2 ** Object de/serialization.
3 ** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
4 */
5
6 #define lj_serialize_c
7 #define LUA_CORE
8
9 #include "lj_obj.h"
10
11 #if LJ_HASBUFFER
12 #include "lj_err.h"
13 #include "lj_buf.h"
14 #include "lj_str.h"
15 #include "lj_tab.h"
16 #include "lj_udata.h"
17 #if LJ_HASFFI
18 #include "lj_ctype.h"
19 #include "lj_cdata.h"
20 #endif
21 #if LJ_HASJIT
22 #include "lj_ir.h"
23 #endif
24 #include "lj_serialize.h"
25
26 /* Tags for internal serialization format. */
27 enum {
28 SER_TAG_NIL, /* 0x00 */
29 SER_TAG_FALSE,
30 SER_TAG_TRUE,
31 SER_TAG_NULL,
32 SER_TAG_LIGHTUD32,
33 SER_TAG_LIGHTUD64,
34 SER_TAG_INT,
35 SER_TAG_NUM,
36 SER_TAG_TAB, /* 0x08 */
37 SER_TAG_DICT_MT = SER_TAG_TAB+6,
38 SER_TAG_DICT_STR,
39 SER_TAG_INT64, /* 0x10 */
40 SER_TAG_UINT64,
41 SER_TAG_COMPLEX,
42 SER_TAG_0x13,
43 SER_TAG_0x14,
44 SER_TAG_0x15,
45 SER_TAG_0x16,
46 SER_TAG_0x17,
47 SER_TAG_0x18, /* 0x18 */
48 SER_TAG_0x19,
49 SER_TAG_0x1a,
50 SER_TAG_0x1b,
51 SER_TAG_0x1c,
52 SER_TAG_0x1d,
53 SER_TAG_0x1e,
54 SER_TAG_0x1f,
55 SER_TAG_STR, /* 0x20 + str->len */
56 };
57 LJ_STATIC_ASSERT((SER_TAG_TAB & 7) == 0);
58
59 /* -- Helper functions ---------------------------------------------------- */
60
serialize_more(char * w,SBufExt * sbx,MSize sz)61 static LJ_AINLINE char *serialize_more(char *w, SBufExt *sbx, MSize sz)
62 {
63 if (LJ_UNLIKELY(sz > (MSize)(sbx->e - w))) {
64 sbx->w = w;
65 w = lj_buf_more2((SBuf *)sbx, sz);
66 }
67 return w;
68 }
69
70 /* Write U124 to buffer. */
serialize_wu124_(char * w,uint32_t v)71 static LJ_NOINLINE char *serialize_wu124_(char *w, uint32_t v)
72 {
73 if (v < 0x1fe0) {
74 v -= 0xe0;
75 *w++ = (char)(0xe0 | (v >> 8)); *w++ = (char)v;
76 } else {
77 *w++ = (char)0xff;
78 #if LJ_BE
79 v = lj_bswap(v);
80 #endif
81 memcpy(w, &v, 4); w += 4;
82 }
83 return w;
84 }
85
serialize_wu124(char * w,uint32_t v)86 static LJ_AINLINE char *serialize_wu124(char *w, uint32_t v)
87 {
88 if (LJ_LIKELY(v < 0xe0)) {
89 *w++ = (char)v;
90 return w;
91 } else {
92 return serialize_wu124_(w, v);
93 }
94 }
95
serialize_ru124_(char * r,char * w,uint32_t * pv)96 static LJ_NOINLINE char *serialize_ru124_(char *r, char *w, uint32_t *pv)
97 {
98 uint32_t v = *pv;
99 if (v != 0xff) {
100 if (r >= w) return NULL;
101 v = ((v & 0x1f) << 8) + *(uint8_t *)r + 0xe0; r++;
102 } else {
103 if (r + 4 > w) return NULL;
104 v = lj_getu32(r); r += 4;
105 #if LJ_BE
106 v = lj_bswap(v);
107 #endif
108 }
109 *pv = v;
110 return r;
111 }
112
serialize_ru124(char * r,char * w,uint32_t * pv)113 static LJ_AINLINE char *serialize_ru124(char *r, char *w, uint32_t *pv)
114 {
115 if (LJ_LIKELY(r < w)) {
116 uint32_t v = *(uint8_t *)r; r++;
117 *pv = v;
118 if (LJ_UNLIKELY(v >= 0xe0)) {
119 r = serialize_ru124_(r, w, pv);
120 }
121 return r;
122 }
123 return NULL;
124 }
125
126 /* Prepare string dictionary for use (once). */
lj_serialize_dict_prep_str(lua_State * L,GCtab * dict)127 void LJ_FASTCALL lj_serialize_dict_prep_str(lua_State *L, GCtab *dict)
128 {
129 if (!dict->hmask) { /* No hash part means not prepared, yet. */
130 MSize i, len = lj_tab_len(dict);
131 if (!len) return;
132 lj_tab_resize(L, dict, dict->asize, hsize2hbits(len));
133 for (i = 1; i <= len && i < dict->asize; i++) {
134 cTValue *o = arrayslot(dict, i);
135 if (tvisstr(o)) {
136 if (!lj_tab_getstr(dict, strV(o))) { /* Ignore dups. */
137 lj_tab_newkey(L, dict, o)->u64 = (uint64_t)(i-1);
138 }
139 } else if (!tvisfalse(o)) {
140 lj_err_caller(L, LJ_ERR_BUFFER_BADOPT);
141 }
142 }
143 }
144 }
145
146 /* Prepare metatable dictionary for use (once). */
lj_serialize_dict_prep_mt(lua_State * L,GCtab * dict)147 void LJ_FASTCALL lj_serialize_dict_prep_mt(lua_State *L, GCtab *dict)
148 {
149 if (!dict->hmask) { /* No hash part means not prepared, yet. */
150 MSize i, len = lj_tab_len(dict);
151 if (!len) return;
152 lj_tab_resize(L, dict, dict->asize, hsize2hbits(len));
153 for (i = 1; i <= len && i < dict->asize; i++) {
154 cTValue *o = arrayslot(dict, i);
155 if (tvistab(o)) {
156 if (tvisnil(lj_tab_get(L, dict, o))) { /* Ignore dups. */
157 lj_tab_newkey(L, dict, o)->u64 = (uint64_t)(i-1);
158 }
159 } else if (!tvisfalse(o)) {
160 lj_err_caller(L, LJ_ERR_BUFFER_BADOPT);
161 }
162 }
163 }
164 }
165
166 /* -- Internal serializer ------------------------------------------------- */
167
168 /* Put serialized object into buffer. */
serialize_put(char * w,SBufExt * sbx,cTValue * o)169 static char *serialize_put(char *w, SBufExt *sbx, cTValue *o)
170 {
171 if (LJ_LIKELY(tvisstr(o))) {
172 const GCstr *str = strV(o);
173 MSize len = str->len;
174 w = serialize_more(w, sbx, 5+len);
175 w = serialize_wu124(w, SER_TAG_STR + len);
176 w = lj_buf_wmem(w, strdata(str), len);
177 } else if (tvisint(o)) {
178 uint32_t x = LJ_BE ? lj_bswap((uint32_t)intV(o)) : (uint32_t)intV(o);
179 w = serialize_more(w, sbx, 1+4);
180 *w++ = SER_TAG_INT; memcpy(w, &x, 4); w += 4;
181 } else if (tvisnum(o)) {
182 uint64_t x = LJ_BE ? lj_bswap64(o->u64) : o->u64;
183 w = serialize_more(w, sbx, 1+sizeof(lua_Number));
184 *w++ = SER_TAG_NUM; memcpy(w, &x, 8); w += 8;
185 } else if (tvispri(o)) {
186 w = serialize_more(w, sbx, 1);
187 *w++ = (char)(SER_TAG_NIL + ~itype(o));
188 } else if (tvistab(o)) {
189 const GCtab *t = tabV(o);
190 uint32_t narray = 0, nhash = 0, one = 2;
191 if (sbx->depth <= 0) lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DEPTH);
192 sbx->depth--;
193 if (t->asize > 0) { /* Determine max. length of array part. */
194 ptrdiff_t i;
195 TValue *array = tvref(t->array);
196 for (i = (ptrdiff_t)t->asize-1; i >= 0; i--)
197 if (!tvisnil(&array[i]))
198 break;
199 narray = (uint32_t)(i+1);
200 if (narray && tvisnil(&array[0])) one = 4;
201 }
202 if (t->hmask > 0) { /* Count number of used hash slots. */
203 uint32_t i, hmask = t->hmask;
204 Node *node = noderef(t->node);
205 for (i = 0; i <= hmask; i++)
206 nhash += !tvisnil(&node[i].val);
207 }
208 /* Write metatable index. */
209 if (LJ_UNLIKELY(tabref(sbx->dict_mt)) && tabref(t->metatable)) {
210 TValue mto;
211 Node *n;
212 settabV(sbufL(sbx), &mto, tabref(t->metatable));
213 n = hashgcref(tabref(sbx->dict_mt), mto.gcr);
214 do {
215 if (n->key.u64 == mto.u64) {
216 uint32_t idx = n->val.u32.lo;
217 w = serialize_more(w, sbx, 1+5);
218 *w++ = SER_TAG_DICT_MT;
219 w = serialize_wu124(w, idx);
220 break;
221 }
222 } while ((n = nextnode(n)));
223 }
224 /* Write number of array slots and hash slots. */
225 w = serialize_more(w, sbx, 1+2*5);
226 *w++ = (char)(SER_TAG_TAB + (nhash ? 1 : 0) + (narray ? one : 0));
227 if (narray) w = serialize_wu124(w, narray);
228 if (nhash) w = serialize_wu124(w, nhash);
229 if (narray) { /* Write array entries. */
230 cTValue *oa = tvref(t->array) + (one >> 2);
231 cTValue *oe = tvref(t->array) + narray;
232 while (oa < oe) w = serialize_put(w, sbx, oa++);
233 }
234 if (nhash) { /* Write hash entries. */
235 const Node *node = noderef(t->node) + t->hmask;
236 GCtab *dict_str = tabref(sbx->dict_str);
237 if (LJ_UNLIKELY(dict_str)) {
238 for (;; node--)
239 if (!tvisnil(&node->val)) {
240 if (LJ_LIKELY(tvisstr(&node->key))) {
241 /* Inlined lj_tab_getstr is 30% faster. */
242 const GCstr *str = strV(&node->key);
243 Node *n = hashstr(dict_str, str);
244 do {
245 if (tvisstr(&n->key) && strV(&n->key) == str) {
246 uint32_t idx = n->val.u32.lo;
247 w = serialize_more(w, sbx, 1+5);
248 *w++ = SER_TAG_DICT_STR;
249 w = serialize_wu124(w, idx);
250 break;
251 }
252 n = nextnode(n);
253 if (!n) {
254 MSize len = str->len;
255 w = serialize_more(w, sbx, 5+len);
256 w = serialize_wu124(w, SER_TAG_STR + len);
257 w = lj_buf_wmem(w, strdata(str), len);
258 break;
259 }
260 } while (1);
261 } else {
262 w = serialize_put(w, sbx, &node->key);
263 }
264 w = serialize_put(w, sbx, &node->val);
265 if (--nhash == 0) break;
266 }
267 } else {
268 for (;; node--)
269 if (!tvisnil(&node->val)) {
270 w = serialize_put(w, sbx, &node->key);
271 w = serialize_put(w, sbx, &node->val);
272 if (--nhash == 0) break;
273 }
274 }
275 }
276 sbx->depth++;
277 #if LJ_HASFFI
278 } else if (tviscdata(o)) {
279 CTState *cts = ctype_cts(sbufL(sbx));
280 CType *s = ctype_raw(cts, cdataV(o)->ctypeid);
281 uint8_t *sp = cdataptr(cdataV(o));
282 if (ctype_isinteger(s->info) && s->size == 8) {
283 w = serialize_more(w, sbx, 1+8);
284 *w++ = (s->info & CTF_UNSIGNED) ? SER_TAG_UINT64 : SER_TAG_INT64;
285 #if LJ_BE
286 { uint64_t u = lj_bswap64(*(uint64_t *)sp); memcpy(w, &u, 8); }
287 #else
288 memcpy(w, sp, 8);
289 #endif
290 w += 8;
291 } else if (ctype_iscomplex(s->info) && s->size == 16) {
292 w = serialize_more(w, sbx, 1+16);
293 *w++ = SER_TAG_COMPLEX;
294 #if LJ_BE
295 { /* Only swap the doubles. The re/im order stays the same. */
296 uint64_t u = lj_bswap64(((uint64_t *)sp)[0]); memcpy(w, &u, 8);
297 u = lj_bswap64(((uint64_t *)sp)[1]); memcpy(w+8, &u, 8);
298 }
299 #else
300 memcpy(w, sp, 16);
301 #endif
302 w += 16;
303 } else {
304 goto badenc; /* NYI other cdata */
305 }
306 #endif
307 } else if (tvislightud(o)) {
308 uintptr_t ud = (uintptr_t)lightudV(G(sbufL(sbx)), o);
309 w = serialize_more(w, sbx, 1+sizeof(ud));
310 if (ud == 0) {
311 *w++ = SER_TAG_NULL;
312 } else if (LJ_32 || checku32(ud)) {
313 #if LJ_BE && LJ_64
314 ud = lj_bswap64(ud);
315 #elif LJ_BE
316 ud = lj_bswap(ud);
317 #endif
318 *w++ = SER_TAG_LIGHTUD32; memcpy(w, &ud, 4); w += 4;
319 #if LJ_64
320 } else {
321 #if LJ_BE
322 ud = lj_bswap64(ud);
323 #endif
324 *w++ = SER_TAG_LIGHTUD64; memcpy(w, &ud, 8); w += 8;
325 #endif
326 }
327 } else {
328 /* NYI userdata */
329 #if LJ_HASFFI
330 badenc:
331 #endif
332 lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADENC, lj_typename(o));
333 }
334 return w;
335 }
336
337 /* Get serialized object from buffer. */
serialize_get(char * r,SBufExt * sbx,TValue * o)338 static char *serialize_get(char *r, SBufExt *sbx, TValue *o)
339 {
340 char *w = sbx->w;
341 uint32_t tp;
342 r = serialize_ru124(r, w, &tp); if (LJ_UNLIKELY(!r)) goto eob;
343 if (LJ_LIKELY(tp >= SER_TAG_STR)) {
344 uint32_t len = tp - SER_TAG_STR;
345 if (LJ_UNLIKELY(len > (uint32_t)(w - r))) goto eob;
346 setstrV(sbufL(sbx), o, lj_str_new(sbufL(sbx), r, len));
347 r += len;
348 } else if (tp == SER_TAG_INT) {
349 if (LJ_UNLIKELY(r + 4 > w)) goto eob;
350 setintV(o, (int32_t)(LJ_BE ? lj_bswap(lj_getu32(r)) : lj_getu32(r)));
351 r += 4;
352 } else if (tp == SER_TAG_NUM) {
353 if (LJ_UNLIKELY(r + 8 > w)) goto eob;
354 memcpy(o, r, 8); r += 8;
355 #if LJ_BE
356 o->u64 = lj_bswap64(o->u64);
357 #endif
358 if (!tvisnum(o)) setnanV(o); /* Fix non-canonical NaNs. */
359 } else if (tp <= SER_TAG_TRUE) {
360 setpriV(o, ~tp);
361 } else if (tp == SER_TAG_DICT_STR) {
362 GCtab *dict_str;
363 uint32_t idx;
364 r = serialize_ru124(r, w, &idx); if (LJ_UNLIKELY(!r)) goto eob;
365 idx++;
366 dict_str = tabref(sbx->dict_str);
367 if (dict_str && idx < dict_str->asize && tvisstr(arrayslot(dict_str, idx)))
368 copyTV(sbufL(sbx), o, arrayslot(dict_str, idx));
369 else
370 lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDICTX, idx);
371 } else if (tp >= SER_TAG_TAB && tp <= SER_TAG_DICT_MT) {
372 uint32_t narray = 0, nhash = 0;
373 GCtab *t, *mt = NULL;
374 if (sbx->depth <= 0) lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DEPTH);
375 sbx->depth--;
376 if (tp == SER_TAG_DICT_MT) {
377 GCtab *dict_mt;
378 uint32_t idx;
379 r = serialize_ru124(r, w, &idx); if (LJ_UNLIKELY(!r)) goto eob;
380 idx++;
381 dict_mt = tabref(sbx->dict_mt);
382 if (dict_mt && idx < dict_mt->asize && tvistab(arrayslot(dict_mt, idx)))
383 mt = tabV(arrayslot(dict_mt, idx));
384 else
385 lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDICTX, idx);
386 r = serialize_ru124(r, w, &tp); if (LJ_UNLIKELY(!r)) goto eob;
387 if (!(tp >= SER_TAG_TAB && tp < SER_TAG_DICT_MT)) goto badtag;
388 }
389 if (tp >= SER_TAG_TAB+2) {
390 r = serialize_ru124(r, w, &narray); if (LJ_UNLIKELY(!r)) goto eob;
391 }
392 if ((tp & 1)) {
393 r = serialize_ru124(r, w, &nhash); if (LJ_UNLIKELY(!r)) goto eob;
394 }
395 t = lj_tab_new(sbufL(sbx), narray, hsize2hbits(nhash));
396 /* NOBARRIER: The table is new (marked white). */
397 setgcref(t->metatable, obj2gco(mt));
398 settabV(sbufL(sbx), o, t);
399 if (narray) {
400 TValue *oa = tvref(t->array) + (tp >= SER_TAG_TAB+4);
401 TValue *oe = tvref(t->array) + narray;
402 while (oa < oe) r = serialize_get(r, sbx, oa++);
403 }
404 if (nhash) {
405 do {
406 TValue k, *v;
407 r = serialize_get(r, sbx, &k);
408 v = lj_tab_set(sbufL(sbx), t, &k);
409 if (LJ_UNLIKELY(!tvisnil(v)))
410 lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DUPKEY);
411 r = serialize_get(r, sbx, v);
412 } while (--nhash);
413 }
414 sbx->depth++;
415 #if LJ_HASFFI
416 } else if (tp >= SER_TAG_INT64 && tp <= SER_TAG_COMPLEX) {
417 uint32_t sz = tp == SER_TAG_COMPLEX ? 16 : 8;
418 GCcdata *cd;
419 if (LJ_UNLIKELY(r + sz > w)) goto eob;
420 cd = lj_cdata_new_(sbufL(sbx),
421 tp == SER_TAG_INT64 ? CTID_INT64 :
422 tp == SER_TAG_UINT64 ? CTID_UINT64 : CTID_COMPLEX_DOUBLE,
423 sz);
424 memcpy(cdataptr(cd), r, sz); r += sz;
425 #if LJ_BE
426 *(uint64_t *)cdataptr(cd) = lj_bswap64(*(uint64_t *)cdataptr(cd));
427 if (sz == 16)
428 ((uint64_t *)cdataptr(cd))[1] = lj_bswap64(((uint64_t *)cdataptr(cd))[1]);
429 #endif
430 if (sz == 16) { /* Fix non-canonical NaNs. */
431 TValue *cdo = (TValue *)cdataptr(cd);
432 if (!tvisnum(&cdo[0])) setnanV(&cdo[0]);
433 if (!tvisnum(&cdo[1])) setnanV(&cdo[1]);
434 }
435 setcdataV(sbufL(sbx), o, cd);
436 #endif
437 } else if (tp <= (LJ_64 ? SER_TAG_LIGHTUD64 : SER_TAG_LIGHTUD32)) {
438 uintptr_t ud = 0;
439 if (tp == SER_TAG_LIGHTUD32) {
440 if (LJ_UNLIKELY(r + 4 > w)) goto eob;
441 ud = (uintptr_t)(LJ_BE ? lj_bswap(lj_getu32(r)) : lj_getu32(r));
442 r += 4;
443 }
444 #if LJ_64
445 else if (tp == SER_TAG_LIGHTUD64) {
446 if (LJ_UNLIKELY(r + 8 > w)) goto eob;
447 memcpy(&ud, r, 8); r += 8;
448 #if LJ_BE
449 ud = lj_bswap64(ud);
450 #endif
451 }
452 setrawlightudV(o, lj_lightud_intern(sbufL(sbx), (void *)ud));
453 #else
454 setrawlightudV(o, (void *)ud);
455 #endif
456 } else {
457 badtag:
458 lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDEC, tp);
459 }
460 return r;
461 eob:
462 lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_EOB);
463 return NULL;
464 }
465
466 /* -- External serialization API ------------------------------------------ */
467
468 /* Encode to buffer. */
lj_serialize_put(SBufExt * sbx,cTValue * o)469 SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o)
470 {
471 sbx->depth = LJ_SERIALIZE_DEPTH;
472 sbx->w = serialize_put(sbx->w, sbx, o);
473 return sbx;
474 }
475
476 /* Decode from buffer. */
lj_serialize_get(SBufExt * sbx,TValue * o)477 char * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o)
478 {
479 sbx->depth = LJ_SERIALIZE_DEPTH;
480 return serialize_get(sbx->r, sbx, o);
481 }
482
483 /* Stand-alone encoding, borrowing from global temporary buffer. */
lj_serialize_encode(lua_State * L,cTValue * o)484 GCstr * LJ_FASTCALL lj_serialize_encode(lua_State *L, cTValue *o)
485 {
486 SBufExt sbx;
487 char *w;
488 memset(&sbx, 0, sizeof(SBufExt));
489 lj_bufx_set_borrow(L, &sbx, &G(L)->tmpbuf);
490 sbx.depth = LJ_SERIALIZE_DEPTH;
491 w = serialize_put(sbx.w, &sbx, o);
492 return lj_str_new(L, sbx.b, (size_t)(w - sbx.b));
493 }
494
495 /* Stand-alone decoding, copy-on-write from string. */
lj_serialize_decode(lua_State * L,TValue * o,GCstr * str)496 void lj_serialize_decode(lua_State *L, TValue *o, GCstr *str)
497 {
498 SBufExt sbx;
499 char *r;
500 memset(&sbx, 0, sizeof(SBufExt));
501 lj_bufx_set_cow(L, &sbx, strdata(str), str->len);
502 /* No need to set sbx.cowref here. */
503 sbx.depth = LJ_SERIALIZE_DEPTH;
504 r = serialize_get(sbx.r, &sbx, o);
505 if (r != sbx.w) lj_err_caller(L, LJ_ERR_BUFFER_LEFTOV);
506 }
507
508 #if LJ_HASJIT
509 /* Peek into buffer to find the result IRType for specialization purposes. */
lj_serialize_peektype(SBufExt * sbx)510 LJ_FUNC MSize LJ_FASTCALL lj_serialize_peektype(SBufExt *sbx)
511 {
512 uint32_t tp;
513 if (serialize_ru124(sbx->r, sbx->w, &tp)) {
514 /* This must match the handling of all tags in the decoder above. */
515 switch (tp) {
516 case SER_TAG_NIL: return IRT_NIL;
517 case SER_TAG_FALSE: return IRT_FALSE;
518 case SER_TAG_TRUE: return IRT_TRUE;
519 case SER_TAG_NULL: case SER_TAG_LIGHTUD32: case SER_TAG_LIGHTUD64:
520 return IRT_LIGHTUD;
521 case SER_TAG_INT: return LJ_DUALNUM ? IRT_INT : IRT_NUM;
522 case SER_TAG_NUM: return IRT_NUM;
523 case SER_TAG_TAB: case SER_TAG_TAB+1: case SER_TAG_TAB+2:
524 case SER_TAG_TAB+3: case SER_TAG_TAB+4: case SER_TAG_TAB+5:
525 case SER_TAG_DICT_MT:
526 return IRT_TAB;
527 case SER_TAG_INT64: case SER_TAG_UINT64: case SER_TAG_COMPLEX:
528 return IRT_CDATA;
529 case SER_TAG_DICT_STR:
530 default:
531 return IRT_STR;
532 }
533 }
534 return IRT_NIL; /* Will fail on actual decode. */
535 }
536 #endif
537
538 #endif
539