1 /*
2   Ypsilon Scheme System
3   Copyright (c) 2004-2008 Y.FUJITA / LittleWing Company Limited.
4   See license.txt for terms and conditions of use
5 */
6 
7 #include "core.h"
8 #include "arith.h"
9 #include "object_heap.h"
10 #include "object_factory.h"
11 #include "serialize.h"
12 
13 #define BVO_TAG_NOUSE               0
14 #define BVO_TAG_LOOKUP              1
15 #define BVO_TAG_IMMEDIATE           2
16 #define BVO_TAG_PLIST               3
17 #define BVO_TAG_DLIST               4
18 #define BVO_TAG_VECTOR              5
19 #define BVO_TAG_RATIONAL            6
20 #define BVO_TAG_COMPLEX             7
21 #define BVO_TAG_FLONUM              8
22 #define BVO_TAG_BIGNUM              9
23 #define BVO_TAG_BVECTOR             10
24 #define BVO_TAG_SYMBOL              11
25 #define BVO_TAG_STRING              12
26 #define BVO_TAG_UNINTERNED_SYMBOL   13
27 #define MAX_BUNDLE_SIZE             sizeof(uint64_t)
28 
serializer_t(object_heap_t * heap)29 serializer_t::serializer_t(object_heap_t* heap)
30 {
31     m_heap = heap;
32     m_lites = make_hashtable(m_heap, SCM_HASHTABLE_TYPE_EQ, lookup_mutable_hashtable_size(0));
33     int depth = 32;
34     m_stack = (scm_obj_t*)m_heap->allocate_private(sizeof(scm_obj_t) * depth);
35     m_stack_limit = m_stack + depth;
36     m_sp = m_stack;
37     m_buf_size = 128;
38     m_buf_mark = 0;
39     m_buf = (uint8_t*)m_heap->allocate_private(m_buf_size);
40     m_bad = NULL;
41 }
42 
~serializer_t()43 serializer_t::~serializer_t()
44 {
45     m_heap->deallocate_private(m_stack);
46     m_heap->deallocate_private(m_buf);
47 }
48 
49 void
emit_u8(uint8_t octet)50 serializer_t::emit_u8(uint8_t octet)
51 {
52     m_buf[m_buf_mark] = octet;
53     m_buf_mark++;
54     if (m_buf_mark + MAX_BUNDLE_SIZE >= m_buf_size) expand();
55 }
56 
57 void
emit_u32(uint32_t n)58 serializer_t::emit_u32(uint32_t n)
59 {
60 #if ARCH_IA32 || ARCH_X64
61     assert(sizeof(uint32_t) <= MAX_BUNDLE_SIZE);
62     *(uint32_t*)(m_buf + m_buf_mark) = n;
63     m_buf_mark += sizeof(uint32_t);
64     if (m_buf_mark + MAX_BUNDLE_SIZE >= m_buf_size) expand();
65 #else
66     emit_u8((n >> 0) & 0xff);
67     emit_u8((n >> 8) & 0xff);
68     emit_u8((n >> 16) & 0xff);
69     emit_u8((n >> 24) & 0xff);
70 #endif
71 }
72 
73 void
emit_u64(uint64_t n)74 serializer_t::emit_u64(uint64_t n)
75 {
76 #if ARCH_IA32 || ARCH_X64
77     assert(sizeof(uint64_t) <= MAX_BUNDLE_SIZE);
78     *(uint64_t*)(m_buf + m_buf_mark) = n;
79     m_buf_mark += sizeof(uint64_t);
80     if (m_buf_mark + MAX_BUNDLE_SIZE >= m_buf_size) expand();
81 #else
82     emit_u8((n >> 0) & 0xff);
83     emit_u8((n >> 8) & 0xff);
84     emit_u8((n >> 16) & 0xff);
85     emit_u8((n >> 24) & 0xff);
86     emit_u8((n >> 32) & 0xff);
87     emit_u8((n >> 40) & 0xff);
88     emit_u8((n >> 48) & 0xff);
89     emit_u8((n >> 56) & 0xff);
90 #endif
91 }
92 
93 void
emit_bytes(const uint8_t * s,int n)94 serializer_t::emit_bytes(const uint8_t* s, int n)
95 {
96     if (m_buf_mark + MAX_BUNDLE_SIZE + n < m_buf_size) {
97         memcpy(m_buf + m_buf_mark, s, n);
98         m_buf_mark += n;
99     } else {
100         for (int i = 0; i < n; i++) emit_u8(s[i]);
101     }
102 }
103 
104 void
expand()105 serializer_t::expand()
106 {
107     uint8_t* prev = m_buf;
108     int n2 = m_buf_size + m_buf_size;
109     m_buf = (uint8_t*)m_heap->allocate_private(n2);
110     memcpy(m_buf, prev, m_buf_size);
111     m_buf_size = n2;
112     m_heap->deallocate_private(prev);
113 }
114 
115 void
push(scm_obj_t obj)116 serializer_t::push(scm_obj_t obj)
117 {
118     if (m_sp == m_stack_limit) {
119         scm_obj_t* prev = m_stack;
120         int n = m_stack_limit - m_stack;
121         int n2 = n + n;
122         m_stack = (scm_obj_t*)m_heap->allocate_private(sizeof(scm_obj_t) * n2);
123         memcpy(m_stack, prev, sizeof(scm_obj_t) * n);
124         m_stack_limit = m_stack + n2;
125         m_sp = m_stack + n;
126         m_heap->deallocate_private(prev);
127     }
128     m_sp[0] = obj;
129     m_sp++;
130 }
131 
132 scm_obj_t
pop()133 serializer_t::pop()
134 {
135     if (m_sp == m_stack) return NULL;
136     m_sp--;
137     return m_sp[0];
138 }
139 
140 void
scan(scm_obj_t obj)141 serializer_t::scan(scm_obj_t obj)
142 {
143 
144 loop:
145     if (CELLP(obj)) {
146         if (PAIRP(obj)) {
147             scan(CAR(obj));
148             obj = CDR(obj);
149             goto loop;
150         }
151         if (SYMBOLP(obj) || STRINGP(obj)) {
152             if (get_hashtable(m_lites, obj) != scm_undef) return;
153             int nsize = put_hashtable(m_lites, obj, MAKEFIXNUM(m_lites->datum->live));
154             if (nsize) rehash_hashtable(m_heap, m_lites, nsize);
155             return;
156         }
157         if (VECTORP(obj)) {
158             scm_vector_t vector = (scm_vector_t)obj;
159             int count = vector->count;
160             if (count == 0) return;
161             scm_obj_t* elts = vector->elts;
162             for (int i = 0; i < count; i++) scan(elts[i]);
163             return;
164         }
165         if (BVECTORP(obj) || FLONUMP(obj) || BIGNUMP(obj) || RATIONALP(obj) || COMPLEXP(obj)) return;
166         if (m_bad == NULL) m_bad = obj;
167     }
168 }
169 
170 void
put_lites()171 serializer_t::put_lites()
172 {
173     scm_obj_t* lites = (scm_obj_t*)m_heap->allocate_private(sizeof(scm_obj_t) * m_lites->datum->live);
174     try {
175         hashtable_rec_t* ht_datum = m_lites->datum;
176         int nsize = m_lites->datum->capacity;
177         for (int i = 0; i < nsize; i++) {
178             scm_obj_t key = ht_datum->elts[i];
179             scm_obj_t value = ht_datum->elts[i + nsize];
180             if (CELLP(key)) {
181                 assert(FIXNUM(value) < m_lites->datum->live);
182                 lites[FIXNUM(value)] = key;
183             }
184         }
185         emit_u32(m_lites->datum->live);
186         for (int i = 0; i < m_lites->datum->live; i++) {
187             if (SYMBOLP(lites[i])) {
188                 scm_symbol_t symbol = (scm_symbol_t)lites[i];
189                 if (UNINTERNEDSYMBOLP(symbol)) {
190                     emit_u8(BVO_TAG_UNINTERNED_SYMBOL);
191                     emit_u32(i);
192                     int n = HDR_SYMBOL_SIZE(symbol->hdr) + 2;
193                     emit_u32(n);
194                     emit_bytes((uint8_t*)symbol->name, n);
195                 } else {
196                     emit_u8(BVO_TAG_SYMBOL);
197                     emit_u32(i);
198                     int n = HDR_SYMBOL_SIZE(symbol->hdr);
199                     emit_u32(n);
200                     emit_bytes((uint8_t*)symbol->name, n);
201                 }
202                 continue;
203             }
204             if (STRINGP(lites[i])) {
205                 scm_string_t string = (scm_string_t)lites[i];
206                 emit_u8(BVO_TAG_STRING);
207                 emit_u32(i);
208                 int n = string->size;
209                 emit_u32(n);
210                 emit_bytes((uint8_t*)string->name, n);
211                 continue;
212             }
213         }
214     } catch (...) {
215         m_heap->deallocate_private(lites);
216         throw;
217     }
218     m_heap->deallocate_private(lites);
219 }
220 
221 void
put_list(scm_obj_t obj)222 serializer_t::put_list(scm_obj_t obj)
223 {
224     int count = 0;
225     while (PAIRP(obj)) {
226         push(CAR(obj));
227         obj = CDR(obj);
228         count++;
229     }
230     if (obj == scm_nil) {
231         emit_u8(BVO_TAG_PLIST);
232         emit_u32(count);
233     } else {
234         emit_u8(BVO_TAG_DLIST);
235         emit_u32(count);
236         put_datum(obj);
237     }
238     while (count--) {
239         obj = pop();
240         assert(obj);
241         put_datum(obj);
242     }
243 }
244 
245 void
put_datum(scm_obj_t obj)246 serializer_t::put_datum(scm_obj_t obj)
247 {
248     if (!CELLP(obj)) {
249         emit_u8(BVO_TAG_IMMEDIATE);
250 #if ARCH_LP64
251         emit_u64((uint64_t)obj);
252 #else
253         emit_u32((uint32_t)obj);
254 #endif
255         return;
256     }
257     if (PAIRP(obj)) {
258         put_list(obj);
259         return;
260     }
261     if (SYMBOLP(obj) || STRINGP(obj)) {
262         scm_obj_t id = get_hashtable(m_lites, obj);
263         emit_u8(BVO_TAG_LOOKUP);
264         emit_u32((uint32_t)FIXNUM(id));
265         return;
266     }
267     if (VECTORP(obj)) {
268         scm_vector_t vector = (scm_vector_t)obj;
269         int count = vector->count;
270         emit_u8(BVO_TAG_VECTOR);
271         emit_u32(count);
272         scm_obj_t* elts = vector->elts;
273         for (int i = 0; i < count; i++) put_datum(elts[i]);
274         return;
275     }
276     if (BVECTORP(obj)) {
277         scm_bvector_t bv = (scm_bvector_t)obj;
278         int count = bv->count;
279         emit_u8(BVO_TAG_BVECTOR);
280         emit_u32(count);
281         emit_bytes(bv->elts, count);
282         return;
283     }
284     if (FLONUMP(obj)) {
285         union {
286             double      f64;
287             uint64_t    u64;
288         } n;
289         scm_flonum_t flonum = (scm_flonum_t)obj;
290         n.f64 = flonum->value;
291         emit_u8(BVO_TAG_FLONUM);
292         emit_u64(n.u64);
293         return;
294     }
295     if (BIGNUMP(obj)) {
296         scm_bignum_t bn = (scm_bignum_t)obj;
297         assert(sizeof(bn->elts[0]) == sizeof(uint32_t));
298         int sign = bn_get_sign(bn); // 0 or 1 or -1
299         int count = bn_get_count(bn);
300         emit_u8(BVO_TAG_BIGNUM);
301         emit_u32(sign);
302         emit_u32(count);
303 #if USE_DIGIT32
304         for (int i = 0; i < count; i++) emit_u32(bn->elts[i]);
305 #else
306         for (int i = 0; i < count; i++) emit_u64(bn->elts[i]);
307 #endif
308         return;
309     }
310     if (RATIONALP(obj)) {
311         scm_rational_t rat = (scm_rational_t)obj;
312         emit_u8(BVO_TAG_RATIONAL);
313         put_datum(rat->nume);
314         put_datum(rat->deno);
315         return;
316     }
317     if (COMPLEXP(obj)) {
318         scm_complex_t comp = (scm_complex_t)obj;
319         emit_u8(BVO_TAG_COMPLEX);
320         put_datum(comp->real);
321         put_datum(comp->imag);
322         return;
323     }
324     fatal("%s:%u internal error: datum not supported in serialized object", __FILE__, __LINE__);
325 }
326 
327 scm_obj_t
translate(scm_obj_t obj)328 serializer_t::translate(scm_obj_t obj)
329 {
330     scoped_lock lock(m_lites->lock);
331     scan(obj);
332     if (m_bad != NULL) return m_bad;
333 #if ARCH_LP64
334     emit_u8(64);
335 #else
336     emit_u8(32);
337 #endif
338     put_lites();
339     put_datum(obj);
340     scm_bvector_t bytes = make_bvector(m_heap, m_buf_mark);
341     memcpy(bytes->elts, m_buf, m_buf_mark);
342     return bytes;
343 }
344 
deserializer_t(object_heap_t * heap)345 deserializer_t::deserializer_t(object_heap_t* heap)
346 {
347     m_heap = heap;
348     m_lites = NULL;
349 }
350 
~deserializer_t()351 deserializer_t::~deserializer_t()
352 {
353     if (m_lites) m_heap->deallocate_private(m_lites);
354 }
355 
356 uint8_t
fetch_u8()357 deserializer_t::fetch_u8()
358 {
359     return *m_buf++;
360 }
361 
362 void
fetch_bytes(uint8_t * p,int n)363 deserializer_t::fetch_bytes(uint8_t* p, int n)
364 {
365     memcpy(p, m_buf, n);
366     m_buf += n;
367     if (m_buf > m_buf_tail) throw true;
368 }
369 
370 uint32_t
fetch_u32()371 deserializer_t::fetch_u32()
372 {
373 #if ARCH_IA32 || ARCH_X64
374     uint32_t value = *(uint32_t*)m_buf;
375     m_buf += sizeof(uint32_t);
376     if (m_buf > m_buf_tail) throw true;
377     return value;
378 #else
379     uint32_t value = 0;
380     value += ((uint32_t)fetch_u8() << 0);
381     value += ((uint32_t)fetch_u8() << 8);
382     value += ((uint32_t)fetch_u8() << 16);
383     value += ((uint32_t)fetch_u8() << 24);
384     if (m_buf > m_buf_tail) throw true;
385     return value;
386 #endif
387 }
388 
389 uint64_t
fetch_u64()390 deserializer_t::fetch_u64()
391 {
392 #if ARCH_IA32 || ARCH_X64
393     uint64_t value = *(uint64_t*)m_buf;
394     m_buf += sizeof(uint64_t);
395     if (m_buf > m_buf_tail) throw true;
396     return value;
397 #else
398     uint64_t value = 0;
399     value += ((uint64_t)fetch_u8() << 0);
400     value += ((uint64_t)fetch_u8() << 8);
401     value += ((uint64_t)fetch_u8() << 16);
402     value += ((uint64_t)fetch_u8() << 24);
403     value += ((uint64_t)fetch_u8() << 32);
404     value += ((uint64_t)fetch_u8() << 40);
405     value += ((uint64_t)fetch_u8() << 48);
406     value += ((uint64_t)fetch_u8() << 56);
407     if (m_buf > m_buf_tail) throw true;
408     return value;
409 #endif
410 }
411 
412 scm_obj_t
get_datum()413 deserializer_t::get_datum()
414 {
415     uint8_t octet = fetch_u8();
416     switch (octet) {
417         case BVO_TAG_LOOKUP: {
418             uint32_t uid = fetch_u32();
419             return m_lites[uid];
420         }
421         case BVO_TAG_IMMEDIATE: {
422 #if ARCH_LP64
423             return (scm_obj_t)fetch_u64();
424 #else
425             return (scm_obj_t)fetch_u32();
426 #endif
427         }
428         case BVO_TAG_PLIST: {
429             int count = fetch_u32();
430             scm_obj_t lst = scm_nil;
431             for (int i = 0; i < count; i++) {
432                 lst = make_pair(m_heap, get_datum(), lst);
433             }
434             return lst;
435         }
436         case BVO_TAG_DLIST: {
437             int count = fetch_u32();
438             scm_obj_t lst = get_datum();
439             for (int i = 0; i < count; i++) {
440                 lst = make_pair(m_heap, get_datum(), lst);
441             }
442             return lst;
443         }
444         case BVO_TAG_VECTOR: {
445             int count = fetch_u32();
446             scm_vector_t vector = make_vector(m_heap, count, scm_unspecified);
447             scm_obj_t* elts = vector->elts;
448             for (int i = 0; i < count; i++) elts[i] = get_datum();
449             return vector;
450         }
451         case BVO_TAG_RATIONAL: {
452             scm_obj_t nume = get_datum();
453             scm_obj_t deno = get_datum();
454             return make_rational(m_heap, nume, deno);
455         }
456         case BVO_TAG_COMPLEX: {
457             scm_obj_t real = get_datum();
458             scm_obj_t imag = get_datum();
459             return make_complex(m_heap, real, imag);
460         }
461         case BVO_TAG_FLONUM: {
462             union { double f64; uint64_t u64; } n;
463             n.u64 = fetch_u64();
464             return make_flonum(m_heap, n.f64);
465         }
466         case BVO_TAG_BIGNUM: {
467             int sign = (int32_t)fetch_u32();
468             int count = fetch_u32();
469             scm_bignum_t bn = make_bignum(m_heap, count);
470             bn_set_sign(bn, sign);
471 #if USE_DIGIT32
472             for (int i = 0; i < count; i++) bn->elts[i] = fetch_u32();
473 #else
474             for (int i = 0; i < count; i++) bn->elts[i] = fetch_u64();
475 #endif
476             return bn;
477         }
478         case BVO_TAG_BVECTOR: {
479             int count = fetch_u32();
480             scm_bvector_t bv = make_bvector(m_heap, count);
481             fetch_bytes(bv->elts, count);
482             return bv;
483         }
484     }
485     throw true;
486 }
487 
488 void
get_lites()489 deserializer_t::get_lites()
490 {
491     int buflen = 128;
492     char* buf = (char*)m_heap->allocate_private(buflen + 1);
493     int count = fetch_u32();
494     if (count < 0) throw true;
495     m_lites = (scm_obj_t*)m_heap->allocate_private(sizeof(scm_obj_t) * count);
496     for (int i = 0; i < count; i++) {
497         uint8_t tag = fetch_u8();
498         uint32_t uid = fetch_u32();
499         uint32_t len = fetch_u32();
500         if (uid > count) throw true;
501         if (m_buf + len > m_buf_tail) throw true;
502         if (len > buflen) {
503             m_heap->deallocate_private(buf);
504             buf = (char*)m_heap->allocate_private(len + 1);
505             buflen = len;
506         }
507         fetch_bytes((uint8_t*)buf, len);
508         buf[len] = 0;
509         switch (tag) {
510             case BVO_TAG_SYMBOL:
511                 m_lites[uid] = make_symbol(m_heap, buf, len);
512                 break;
513             case BVO_TAG_UNINTERNED_SYMBOL:
514                 m_lites[uid] = make_symbol_uninterned(m_heap, buf, len - 2, buf[len - 1]);
515                 break;
516             case BVO_TAG_STRING:
517                 m_lites[uid] = make_string_literal(m_heap, buf, len);
518                 break;
519             default:
520                 m_heap->deallocate_private(buf);
521                 throw true;
522         }
523     }
524     m_heap->deallocate_private(buf);
525 }
526 
527 scm_obj_t
translate(scm_bvector_t obj)528 deserializer_t::translate(scm_bvector_t obj)
529 {
530     try {
531         m_buf = obj->elts;
532         m_buf_tail = m_buf + obj->count;
533         if (m_buf == m_buf_tail) return NULL;
534 #if ARCH_LP64
535         if (fetch_u8() != 64) return NULL;
536 #else
537         if (fetch_u8() != 32) return NULL;
538 #endif
539         get_lites();
540         scm_obj_t obj = get_datum();
541         if (m_buf != m_buf_tail) return NULL;
542         return obj;
543     } catch (...) {
544         return NULL;
545     }
546 }
547