1 /*
2  * Copyright 2006-2008 The FLWOR Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "stdafx.h"
17 
18 #include "zorbaserialization/bin_archiver.h"
19 #include "zorbaserialization/archiver_field.h"
20 
21 #include "diagnostics/xquery_diagnostics.h"
22 #include "diagnostics/assert.h"
23 
24 #include "zorbatypes/collation_manager.h"
25 
26 #include <fstream>
27 
28 namespace zorba
29 {
30 
31 namespace serialization
32 {
33 
34 #define ZORBA_BIN_SERIALIZED_PLAN_STRING    "ZORBA-XQUERY BINARY SERIALIZED PLAN"
35 #define BUFFER_SEGMENT_SIZE  2*1024*1024
36 
37 
38 #ifdef ZORBA_PLAN_SERIALIZER_STATISTICS
39 /*******************************************************************************
40 
41 ********************************************************************************/
add_indentation(std::ostream & os,unsigned int indent)42 void add_indentation(std::ostream &os, unsigned int indent)
43 {
44   while(indent--)
45   {
46     os << " ";
47   }
48 }
49 
50 
51 /*******************************************************************************
52 
53 ********************************************************************************/
write_xml_name(std::ostream & os,const char * type)54 void write_xml_name(std::ostream& os, const char* type)
55 {
56   if (!type || !*type)
57   {
58     os << "unknown";
59     return;
60   }
61 
62   while (*type)
63   {
64     if(isalpha(*type))
65       os << *type;
66     else if(*type == '*')
67       os << "_ptr";
68     type++;
69   }
70 }
71 
72 
73 /*******************************************************************************
74 
75 ********************************************************************************/
output_statistics_archive_field(std::ostream & os,unsigned int indent,archive_field * parent)76 void output_statistics_archive_field(
77     std::ostream& os,
78     unsigned int indent,
79     archive_field* parent)
80 {
81   add_indentation(os, indent);
82   os << "<";
83   write_xml_name(os, parent->theTypeName);
84   os << " n=\"" << parent->objects_saved << "\"";
85   os << " s=\"" << parent->thebytesSaved << "\"";
86 
87   if(parent->theKind == ARCHIVE_FIELD_REFERENCING)
88       os << " t=\"ref\"";
89   else if(parent->theKind == ARCHIVE_FIELD_PTR)
90       os << " t=\"ptr\"";
91 
92   if ((!parent->theIsSimple) && (parent->theKind != ARCHIVE_FIELD_REFERENCING))
93   {
94     os << ">" << std::endl;
95     archive_field   *current_field = parent->first_child;
96 
97     while (current_field)
98     {
99       output_statistics_archive_field(os, indent+2, current_field);
100       current_field = current_field->theNextSibling;
101     }
102 
103     add_indentation(os, indent);
104     os << "</";
105     write_xml_name(os, parent->theTypeName);
106     os << ">" << std::endl;
107   }
108   else
109   {
110     os << "/>" << std::endl;
111   }
112 }
113 #endif
114 
115 
116 
117 /*******************************************************************************
118 
119 ********************************************************************************/
~BinArchiver()120 BinArchiver::~BinArchiver()
121 {
122   if (theBuffer)
123     free(theBuffer);
124 }
125 
126 
127 ////////////////////////////////////////////////////////////////////////////////
128 //                                                                            //
129 //  writing archive                                                           //
130 //                                                                            //
131 ////////////////////////////////////////////////////////////////////////////////
132 
133 
134 /*******************************************************************************
135   Open archiver for output
136 ********************************************************************************/
BinArchiver(std::ostream * os)137 BinArchiver::BinArchiver(std::ostream* os)
138   :
139   Archiver(true),
140   theStringPool(1024, false, false),
141   theFirstBinaryString(0)
142 {
143   this->is = NULL;
144   this->os = os;
145   theLastId = 0;
146   theCurrentByte = 0;
147   theBitfill = 0;
148 
149   theBuffer = NULL;
150 }
151 
152 
153 /*******************************************************************************
154 
155 ********************************************************************************/
serialize_out()156 void BinArchiver::serialize_out()
157 {
158   if (!os)
159   {
160     throw ZORBA_EXCEPTION(zerr::ZCSE0007_INPUT_ARCHIVE_USED_FOR_OUT_SERIALIZATION);
161   }
162 
163   prepare_serialize_out();
164 
165 #ifdef ZORBA_PLAN_SERIALIZER_STATISTICS
166   bytes_saved = 0;
167   objects_saved = 0;
168   nr_ptrs = 0;
169   strings_saved = 0;
170 #endif
171 
172   os->write(ZORBA_BIN_SERIALIZED_PLAN_STRING, sizeof(ZORBA_BIN_SERIALIZED_PLAN_STRING));
173 
174   os->write(theArchiveName.c_str(), (std::streamsize)theArchiveName.length()+1);
175   os->write(theArchiveInfo.c_str(), (std::streamsize)theArchiveInfo.length()+1);
176   write_uint32(theArchiveVersion);
177 
178   write_uint32(theFieldCounter);
179 
180 #ifndef NDEBUG
181   write_uint32(0);//for debug
182 #else
183   write_uint32(1);//for release
184 #endif
185 
186   //first gather all strings in a string pool
187   collect_strings(theRootField);
188 
189   //now serialize the string pool
190   serialize_out_string_pool();
191 
192   //now serialize the fields
193   serialize_compound_fields(theRootField);
194 
195   if (theBitfill)
196   {
197     theCurrentByte <<= (8-theBitfill);
198     os->write((char*)&theCurrentByte, 1);
199 #ifdef ZORBA_PLAN_SERIALIZER_STATISTICS
200     bytes_saved++;
201 #endif
202   }
203 
204 #ifdef ZORBA_PLAN_SERIALIZER_STATISTICS
205   std::ofstream   plan_xml("plan.xml");
206   plan_xml << "<plan objects_saved=\"" << objects_saved << "\""
207            << " nr_ptrs=\"" << nr_ptrs << "\""
208            << " string_count=\"" << strings.size() << "\""
209            << " string_pool_size=\"" << strings_saved << "\" >"
210            << std::endl;
211   output_statistics_archive_field(plan_xml, 2, out_fields);
212   plan_xml << "</plan>" << std::endl;
213   std::cout << "Plan serialized:" << std::endl
214             << "string count = " << strings.size() << std::endl
215             << "size occupied by strings = " << strings_saved  << std::endl
216             << "obj_count = " << objects_saved << std::endl
217             << "objs that are ptr = " << nr_ptrs << std::endl
218             << "size occupied by objects = " << bytes_saved << std::endl;
219   std::cout << "Get some other details in plan.xml" << std::endl;
220 #endif
221 }
222 
223 
224 /*******************************************************************************
225 
226 ********************************************************************************/
collect_strings(archive_field * parent_field)227 void BinArchiver::collect_strings(archive_field* parent_field)
228 {
229   archive_field* field = parent_field->theFirstChild;
230 
231   while (field)
232   {
233     if (field->theIsSimple &&
234         (field->theKind == ARCHIVE_FIELD_NORMAL ||
235          field->theKind == ARCHIVE_FIELD_PTR))
236     {
237       switch (field->theType)
238       {
239       case TYPE_INT64:
240       case TYPE_UINT64:
241       case TYPE_INT32:
242       case TYPE_UINT32:
243       case TYPE_ENUM:
244       case TYPE_INT16:
245       case TYPE_UINT16:
246       case TYPE_CHAR:
247       case TYPE_UCHAR:
248       case TYPE_BOOL:
249       {
250         break;
251       }
252       case TYPE_ZSTRING:
253       case TYPE_STD_STRING:
254       case TYPE_COLLATOR:
255       {
256         field->theValuePosInPool = add_to_string_pool(field->theStringValue);
257         break;
258       }
259       default:
260       {
261         ZORBA_ASSERT(false);
262       }
263       }
264     }
265 
266     if (!field->theIsSimple && field->theKind != ARCHIVE_FIELD_REFERENCING)
267     {
268       collect_strings(field);
269     }
270 
271     field = field->theNextSibling;
272   }
273 }
274 
275 
276 /*******************************************************************************
277 
278 ********************************************************************************/
add_to_string_pool(const zstring & str)279 int BinArchiver::add_to_string_pool(const zstring& str)
280 {
281   csize strPos = 0;
282 
283   if (theStringPool.get(str, strPos))
284   {
285     StringInfo& info = theStrings.at(strPos-1);
286     ++info.count;
287     return strPos;
288   }
289 
290   StringInfo info;
291   info.str = str;
292   info.binary = (str.size() != strlen(str.c_str()));
293   info.count = 1;
294   info.theDiskPos = theStrings.size()+1;
295 
296   theStrings.push_back(info);
297 
298   strPos = theStrings.size();
299 
300   theOrderedStrings.push_back(strPos-1);
301 
302   theStringPool.insert(str, strPos);
303 
304   return strPos;
305 }
306 
307 
308 /*******************************************************************************
309 
310 ********************************************************************************/
serialize_out_string_pool()311 void BinArchiver::serialize_out_string_pool()
312 {
313   //td::cout << "String pool size = " << theStrings.size() << std::endl;
314 
315 #if 1
316   //sort strings based on use count
317   csize i, j;
318   for (i = 0; i < theOrderedStrings.size(); ++i)
319   {
320     for (j = i+1; j < theOrderedStrings.size(); ++j)
321     {
322       if ((theStrings.at(theOrderedStrings[i]).binary &&
323            !theStrings.at(theOrderedStrings[j]).binary) ||
324           (theStrings.at(theOrderedStrings[i]).binary ==
325            theStrings.at(theOrderedStrings[j]).binary &&
326            theStrings.at(theOrderedStrings[i]).count <
327            theStrings.at(theOrderedStrings[j]).count))
328       {
329         unsigned int temp;
330         temp = theOrderedStrings[i];
331         theOrderedStrings[i] = theOrderedStrings[j];
332         theOrderedStrings[j] = temp;
333       }
334     }
335   }
336 
337   //std::cout << std::endl;
338 
339   for (i = 0; i < theOrderedStrings.size(); ++i)
340   {
341     if (theFirstBinaryString == 0 && theStrings.at(theOrderedStrings[i]).binary)
342     {
343       ZORBA_ASSERT(i > 0);
344       theFirstBinaryString = i;
345     }
346 
347     theStrings.at(theOrderedStrings[i]).theDiskPos = i+1;
348 
349     //std::cout << i << ": " << theStrings.at(theOrderedStrings[i]).str << std::endl;
350   }
351 #endif
352 
353   if (theFirstBinaryString == 0)
354     theFirstBinaryString = theStrings.size() + 1;
355 
356   write_uint32((uint32_t)theStrings.size());
357   write_uint32((uint32_t)theFirstBinaryString);
358 
359   if (theBitfill)
360   {
361     theCurrentByte <<= (8-theBitfill);
362     os->write((char*)&theCurrentByte, 1);
363     theBitfill = 0;
364     theCurrentByte = 0;
365 #ifdef ZORBA_PLAN_SERIALIZER_STATISTICS
366     bytes_saved++;
367 #endif
368   }
369 
370   std::vector<csize>::const_iterator ite = theOrderedStrings.begin();
371   std::vector<csize>::const_iterator end = theOrderedStrings.end();
372   for (; ite != end; ++ite)
373   {
374     write_string(theStrings.at(*ite));
375   }
376 }
377 
378 
379 /*******************************************************************************
380 
381 ********************************************************************************/
write_string(const StringInfo & info)382 void BinArchiver::write_string(const StringInfo& info)
383 {
384   if (info.binary)
385   {
386     assert(theBitfill == 0);
387 
388     write_uint64(info.str.size());
389 
390     if (theBitfill)
391     {
392       theCurrentByte <<= (8-theBitfill);
393       os->write((char*)&theCurrentByte, 1);
394       theBitfill = 0;
395       theCurrentByte = 0;
396 #ifdef ZORBA_PLAN_SERIALIZER_STATISTICS
397       bytes_saved++;
398 #endif
399     }
400 
401     os->write(info.str.c_str(), (std::streamsize)info.str.size());
402   }
403   else
404   {
405     os->write(info.str.c_str(), (std::streamsize)info.str.size() + 1);
406   }
407 #ifdef ZORBA_PLAN_SERIALIZER_STATISTICS
408   strings_saved += len;
409 #endif
410 }
411 
412 
413 /*******************************************************************************
414 
415 ********************************************************************************/
serialize_compound_fields(archive_field * parent_field)416 void BinArchiver::serialize_compound_fields(archive_field* parent_field)
417 {
418 #ifdef ZORBA_PLAN_SERIALIZER_STATISTICS
419   unsigned int bytes_saved1 = bytes_saved;
420   unsigned int objects_saved1 = objects_saved;
421 #endif
422   archive_field* field = parent_field->theFirstChild;
423   unsigned char small_treat;
424 
425   while (field)
426   {
427     if (field->theIsSimple)
428     {
429       switch(field->theKind)
430       {
431       case ARCHIVE_FIELD_NORMAL:
432       case ARCHIVE_FIELD_PTR:
433       {
434         if (field->theId != 0)
435         {
436           small_treat = 0;
437           write_bits(small_treat, 2);
438 
439           write_int_exp(field->theId - theLastId);
440           theLastId = field->theId;
441         }
442 
443         switch (field->theType)
444         {
445         case TYPE_INT64:
446         {
447           write_int64(field->theValue.int64v);
448           break;
449         }
450         case TYPE_UINT64:
451         {
452           write_uint64(field->theValue.uint64v);
453           break;
454         }
455         case TYPE_INT32:
456         {
457           write_int32(field->theValue.int32v);
458           break;
459         }
460         case TYPE_UINT32:
461         {
462           write_uint32(field->theValue.uint32v);
463           break;
464         }
465         case TYPE_ENUM:
466         {
467           write_enum(field->theValue.uint32v);
468           break;
469         }
470         case TYPE_INT16:
471         {
472           write_int32(field->theValue.int16v);
473           break;
474         }
475         case TYPE_UINT16:
476         {
477           write_uint32(field->theValue.uint16v);
478           break;
479         }
480         case TYPE_CHAR:
481         {
482           write_bits(field->theValue.charv, 8);
483           break;
484         }
485         case TYPE_UCHAR:
486         {
487           write_bits(field->theValue.ucharv, 8);
488           break;
489         }
490         case TYPE_BOOL:
491         {
492           write_bit(field->theValue.boolv ? 1 : 0);
493           break;
494         }
495         case TYPE_ZSTRING:
496         case TYPE_STD_STRING:
497         case TYPE_COLLATOR:
498         {
499           assert(field->theValuePosInPool);
500           write_int_exp2(theStrings.at(field->theValuePosInPool-1).theDiskPos);
501           break;
502         }
503         default:
504         {
505           ZORBA_ASSERT(false);
506         }
507         }
508 
509         break;
510       }
511       case ARCHIVE_FIELD_NULL:
512       {
513         small_treat = 1;
514         write_bits(small_treat, 2);
515         break;
516       }
517       case ARCHIVE_FIELD_REFERENCING:
518       {
519         small_treat = 2;
520         write_bits(small_treat, 2);
521         assert(field->theReferredField);
522         write_uint32(field->theReferredField->theId);
523         break;
524       }
525       default:
526       {
527         ZORBA_ASSERT(false);
528       }
529       }
530     }
531     else
532     {
533       assert(field->theValuePosInPool == 0);
534 
535       switch (field->theKind)
536       {
537       case ARCHIVE_FIELD_NULL:
538       {
539         small_treat = 1;
540         write_bits(small_treat, 2);
541         assert(field->theId == 0);
542         break;
543       }
544       case ARCHIVE_FIELD_REFERENCING:
545       {
546         small_treat = 2;
547         write_bits(small_treat, 2);
548         assert(field->theReferredField);
549         write_uint32(field->theReferredField->theId);
550         assert(field->theId == 0);
551         break;
552       }
553       case ARCHIVE_FIELD_BASECLASS:
554       {
555         small_treat = 3;
556         write_bits(small_treat, 2);
557         assert(field->theId == 0);
558         break;
559       }
560       case ARCHIVE_FIELD_NORMAL:
561       {
562         small_treat = 0;
563         write_bits(small_treat, 2);
564         assert(field->theId);
565         write_int_exp(field->theId - theLastId);
566         theLastId = field->theId;
567         break;
568       }
569       case ARCHIVE_FIELD_PTR:
570       {
571         small_treat = 0;
572         write_bits(small_treat, 2);
573         assert(field->theId);
574         write_int_exp(field->theId - theLastId);
575         theLastId = field->theId;
576 
577         if (field->theIsClass)
578         {
579           assert(field->theType != TYPE_LAST);
580           write_enum(field->theType);
581         }
582 
583         break;
584       }
585       default:
586       {
587         ZORBA_ASSERT(false);
588       }
589       }
590     }
591 
592 #ifdef ZORBA_PLAN_SERIALIZER_STATISTICS
593     objects_saved++;
594     if (field->theKind == ARCHIVE_FIELD_PTR)
595       nr_ptrs++;
596 #endif
597 
598     if (!field->theIsSimple)
599     {
600       if (field->theKind != ARCHIVE_FIELD_REFERENCING)
601       {
602         serialize_compound_fields(field);
603 #ifndef NDEBUG
604         unsigned char tempbyte;
605         tempbyte = 0xFF;
606         write_bits(tempbyte, 8);
607 #endif
608       }
609     }
610 
611     field = field->theNextSibling;
612   }
613 
614 #ifdef ZORBA_PLAN_SERIALIZER_STATISTICS
615   parent_field->thebytesSaved = bytes_saved - bytes_saved1;
616   parent_field->theObjectsSaved = objects_saved - objects_saved1;
617 #endif
618 }
619 
620 
621 /*******************************************************************************
622   Pack 7 bits of the intval into a byte, and set the high-order bit of that
623   byte to 0, unless it is the last byte to be written, in which case its
624   high-order bit is set to 1, signifying the end of the number.
625 ********************************************************************************/
write_int64(int64_t intval)626 void BinArchiver::write_int64(int64_t intval)
627 {
628   if (intval < 0)
629   {
630     write_bit(1);
631     uint64_t absval = -intval;
632 
633     write_uint64(absval);
634   }
635   else
636   {
637     write_bit(0);
638     write_uint64((uint64_t)intval);
639   }
640 }
641 
642 
write_uint64(uint64_t intval)643 void BinArchiver::write_uint64(uint64_t intval)
644 {
645   uint64_t shifted_int = (intval >> 7);
646   unsigned char tmp;
647 
648   while (shifted_int)
649   {
650     tmp = intval & 0x7F;
651 
652     write_bits(tmp, 8);
653 
654     intval = shifted_int;
655     shifted_int = (intval >> 7);
656   }
657 
658   tmp = (intval & 0x7F) | 0x80;
659 
660   write_bits(tmp, 8);
661 }
662 
663 
write_uint32(uint32_t intval)664 void BinArchiver::write_uint32(uint32_t intval)
665 {
666   uint32_t shifted_int = (intval >> 7);
667   unsigned char tmp;
668 
669   while (shifted_int)
670   {
671     tmp = intval & 0x7F;
672 
673     write_bits(tmp, 8);
674 
675     intval = shifted_int;
676     shifted_int = (intval >> 7);
677   }
678 
679   tmp = (intval & 0x7F) | 0x80;
680 
681   write_bits(tmp, 8);
682 }
683 
684 
write_int32(int32_t intval)685 void BinArchiver::write_int32(int32_t intval)
686 {
687   if (intval < 0)
688   {
689     write_bit(1);
690     uint32_t absval = -intval;
691 
692     write_uint32(absval);
693   }
694   else
695   {
696     write_bit(0);
697     write_uint32((uint32_t)intval);
698   }
699 }
700 
701 
702 /*******************************************************************************
703 
704 ********************************************************************************/
write_int_exp(unsigned int intval)705 void BinArchiver::write_int_exp(unsigned int intval)
706 {
707   if (intval == 1)
708   {
709     write_bit(0);
710   }
711   else if (intval < (1<<4))
712   {
713     write_bit(1);
714     write_bit(0);
715     write_bits(intval, 4);
716   }
717   else if (intval < (1<<13))
718   {
719     write_bit(1);
720     write_bit(1);
721     write_bit(0);
722     write_bits(intval, 13);
723   }
724   else
725   {
726     write_bit(1);
727     write_bit(1);
728     write_bit(1);
729     write_bits(intval, 32);
730   }
731 }
732 
733 
734 /*******************************************************************************
735 
736 ********************************************************************************/
write_int_exp2(unsigned int intval)737 void BinArchiver::write_int_exp2(unsigned int intval)
738 {
739   if (intval < (1<<4))
740   {
741     write_bit(0);
742     write_bits(intval, 4);
743   }
744   else if (intval < (1<<12))
745   {
746     write_bit(1);
747     write_bit(0);
748     write_bits(intval, 12);
749   }
750   else if (intval < (1<<20))
751   {
752     write_bit(1);
753     write_bit(1);
754     write_bit(0);
755     write_bits(intval, 20);
756   }
757   else
758   {
759     write_bit(1);
760     write_bit(1);
761     write_bit(1);
762     write_bits(intval, 32);
763   }
764 }
765 
766 
767 /*******************************************************************************
768 
769 ********************************************************************************/
write_enum(unsigned int intval)770 void BinArchiver::write_enum(unsigned int intval)
771 {
772   if (intval < (1 << 5))
773   {
774     write_bit(0);
775     write_bits(intval, 5);
776   }
777   else if (intval < (1 << 8))
778   {
779     write_bit(1);
780     write_bit(0);
781     write_bits(intval, 8);
782   }
783   else if (intval < (1 << 13))
784   {
785     write_bit(1);
786     write_bit(1);
787     write_bit(0);
788     write_bits(intval, 13);
789   }
790   else
791   {
792     write_bit(1);
793     write_bit(1);
794     write_bit(1);
795     write_bits(intval, 32);
796   }
797 }
798 
799 
800 /*******************************************************************************
801 
802 ********************************************************************************/
write_bits(unsigned int value,unsigned int bits)803 void BinArchiver::write_bits(unsigned int value, unsigned int bits)
804 {
805   while (bits)
806   {
807     write_bit((value >> (bits-1)));
808     --bits;
809   }
810 }
811 
812 
813 /*******************************************************************************
814 
815 ********************************************************************************/
write_bit(unsigned char bit)816 void BinArchiver::write_bit(unsigned char bit)
817 {
818   theCurrentByte <<= 1;
819   theCurrentByte |= (bit & 0x01);
820   ++theBitfill;
821 
822   if (theBitfill == 8)
823   {
824     os->write((char*)&theCurrentByte, 1);
825     theCurrentByte = 0;
826     theBitfill = 0;
827 #ifdef ZORBA_PLAN_SERIALIZER_STATISTICS
828     bytes_saved++;
829 #endif
830   }
831 }
832 
833 
834 ////////////////////////////////////////////////////////////////////////////////
835 //                                                                            //
836 //  reading archive                                                           //
837 //                                                                            //
838 ////////////////////////////////////////////////////////////////////////////////
839 
840 
841 /*******************************************************************************
842   Open archiver for input
843 ********************************************************************************/
BinArchiver(std::istream * is)844 BinArchiver::BinArchiver(std::istream* is)
845   :
846   Archiver(false),
847   theStringPool(false, false)
848 {
849   this->is = is;
850   this->os = NULL;
851   theLastId = 0;
852   theCurrentByte = 0;
853   theBitfill = 8;
854 
855   //read the plan serializer info
856   char preface_string[200];
857   unsigned int preface_len = 0;
858 
859   while (preface_len < sizeof(preface_string))
860   {
861 	  is->read(preface_string + preface_len, 1);
862 	  if (is->gcount() < 1)
863 	  {
864       throw ZORBA_EXCEPTION(zerr::ZCSE0011_INPUT_ARCHIVE_NOT_ZORBA_ARCHIVE);
865 	  }
866 
867     if (preface_string[preface_len] == 0)
868       break;
869 
870     preface_len++;
871   }
872 
873   if (strcmp(preface_string, ZORBA_BIN_SERIALIZED_PLAN_STRING))
874   {
875     throw ZORBA_EXCEPTION(zerr::ZCSE0011_INPUT_ARCHIVE_NOT_ZORBA_ARCHIVE);
876   }
877 
878   theBuffer = (unsigned char*)malloc(BUFFER_SEGMENT_SIZE);
879   size_read = 0;
880 
881   while (1)
882   {
883     is->read((char*)theBuffer + size_read, BUFFER_SEGMENT_SIZE);
884     size_read += (size_t)is->gcount();
885 
886     if (is->gcount() == BUFFER_SEGMENT_SIZE)
887     {
888       theBuffer = (unsigned char*)realloc(theBuffer, size_read + BUFFER_SEGMENT_SIZE);
889     }
890     else
891     {
892       break;
893     }
894   }
895 
896   theCurrentBytePtr = theBuffer;
897 
898   read_string(theArchiveName);
899   read_string(theArchiveInfo);
900   theArchiveVersion = read_uint32();
901 
902   theFieldCounter = read_uint32();
903 
904   unsigned int is_release = read_uint32();
905 
906 #ifndef NDEBUG
907   if(is_release)
908   {
909     throw ZORBA_EXCEPTION(zerr::ZCSE0016_CANNOT_LOAD_FROM_RELEASE_TO_DEBUG);
910   }
911 #else
912   if(!is_release)
913   {
914     throw ZORBA_EXCEPTION(zerr::ZCSE0017_CANNOT_LOAD_FROM_DEBUG_TO_RELEASE);
915   }
916 #endif
917 
918   read_string_pool();
919 
920   root_tag_is_read();
921 }
922 
923 
924 /*******************************************************************************
925 
926 ********************************************************************************/
read_string_pool()927 void BinArchiver::read_string_pool()
928 {
929   theStrings.clear();
930 
931   csize count = read_uint32();
932   csize theFirstBinaryString = read_uint32();
933 
934   if (theBitfill != 8)
935   {
936     ++theCurrentBytePtr;
937     theBitfill = 8;
938   }
939 
940   StringInfo str_pos;
941 
942   for (csize i = 0; i < count; ++i)
943   {
944     if (i < theFirstBinaryString)
945       read_string(str_pos.str);
946     else
947       read_binary_string(str_pos.str);
948 
949     theStrings.push_back(str_pos);
950   }
951 
952   theBitfill = 8;
953 }
954 
955 
956 /*******************************************************************************
957   Read a null-terminated string from disk into the string pool (the string is
958   coopied)
959 ********************************************************************************/
read_string(zstring & str)960 void BinArchiver::read_string(zstring& str)
961 {
962   str = (char*)theCurrentBytePtr;
963 
964   theCurrentBytePtr += str.size();
965 
966   ++theCurrentBytePtr;
967 }
968 
969 
970 /*******************************************************************************
971   Read a binary string from disk into the string pool (the string is coopied)
972 ********************************************************************************/
read_binary_string(zstring & str)973 void BinArchiver::read_binary_string(zstring& str)
974 {
975   csize size = read_uint64();
976 
977   if (theBitfill != 8)
978   {
979     ++theCurrentBytePtr;
980     theBitfill = 8;
981   }
982 
983   str.assign((char*)theCurrentBytePtr, size);
984 
985   theCurrentBytePtr += size;
986 }
987 
988 
989 /*******************************************************************************
990 
991 ********************************************************************************/
read_int64()992 int64_t BinArchiver::read_int64()
993 {
994   unsigned char sign = read_bit();
995 
996   if (sign == 0)
997   {
998     return static_cast<int64_t>(read_uint64());
999   }
1000   else
1001   {
1002     uint64_t absval = read_uint64();
1003     return -static_cast<int64_t>(absval);
1004   }
1005 }
1006 
1007 
read_uint64()1008 uint64_t BinArchiver::read_uint64()
1009 {
1010   uint64_t outval = 0;
1011   unsigned char tmp;
1012   int i = 0;
1013 
1014   do
1015   {
1016     tmp = read_bits(8);
1017     outval |= ((uint64_t)(tmp & 0x7F) << (7*i));
1018     ++i;
1019   }
1020   while(!(tmp & 0x80));
1021 
1022   return outval;
1023 }
1024 
1025 
1026 /*******************************************************************************
1027   used for reading a referring id
1028 ********************************************************************************/
read_uint32()1029 uint32_t BinArchiver::read_uint32()
1030 {
1031   uint32_t outval = 0;
1032   unsigned char tmp;
1033   int i = 0;
1034 
1035   do
1036   {
1037     tmp = read_bits(8);
1038     outval |= ((unsigned int)(tmp & 0x7F) << (7*i));
1039     ++i;
1040   }
1041   while(!(tmp & 0x80));
1042 
1043   return outval;
1044 }
1045 
1046 
read_int32()1047 int32_t BinArchiver::read_int32()
1048 {
1049   unsigned char sign = read_bit();
1050 
1051   if (sign == 0)
1052   {
1053     return static_cast<int32_t>(read_uint32());
1054   }
1055   else
1056   {
1057     uint32_t absval = read_uint32();
1058     return -static_cast<int32_t>(absval);
1059   }
1060 }
1061 
1062 
1063 /*******************************************************************************
1064 
1065 ********************************************************************************/
read_bit()1066 unsigned char BinArchiver::read_bit()
1067 {
1068   if (theBitfill == 0)
1069   {
1070     ++theCurrentBytePtr;
1071     theBitfill = 8;
1072   }
1073 
1074   --theBitfill;
1075   unsigned char result = (*theCurrentBytePtr & 0x80) ? 1 : 0;
1076   *theCurrentBytePtr <<= 1;
1077   return result;
1078 }
1079 
1080 
1081 /*******************************************************************************
1082 
1083 ********************************************************************************/
read_bits(unsigned int bits)1084 unsigned int BinArchiver::read_bits(unsigned int bits)
1085 {
1086   unsigned int result = 0;
1087 
1088   while (bits)
1089   {
1090     if (!theBitfill)
1091     {
1092       ++theCurrentBytePtr;
1093       theBitfill = 8;
1094     }
1095 
1096     if (theBitfill <= bits)
1097     {
1098       result <<= theBitfill;
1099       result |= *theCurrentBytePtr >> (8-theBitfill);
1100       bits -= theBitfill;
1101       theBitfill = 0;
1102     }
1103     else
1104     {
1105       result <<= bits;
1106       result |= *theCurrentBytePtr >> (8 - bits);
1107       theBitfill -= bits;
1108       *theCurrentBytePtr <<= bits;
1109       bits = 0;
1110     }
1111   }
1112   return result;
1113 }
1114 
1115 
1116 /*******************************************************************************
1117   used for reading the field id. Optimized for the numer 1
1118 ********************************************************************************/
read_int_exp()1119 unsigned int BinArchiver::read_int_exp()
1120 {
1121   unsigned char bit;
1122 
1123   if (theBitfill == 0)
1124   {
1125     ++theCurrentBytePtr;
1126     theBitfill = 8;
1127   }
1128 
1129   // read bit
1130   bit = (*theCurrentBytePtr & 0x80);
1131 
1132   --theBitfill;
1133   if (theBitfill == 0)
1134   {
1135     ++theCurrentBytePtr;
1136     theBitfill = 8;
1137   }
1138   else
1139   {
1140     *theCurrentBytePtr <<= 1;
1141   }
1142 
1143   if (!bit)
1144     return 1;
1145 
1146   // read bit
1147   bit = (*theCurrentBytePtr & 0x80);
1148 
1149   --theBitfill;
1150   if (theBitfill == 0)
1151   {
1152     ++theCurrentBytePtr;
1153     theBitfill = 8;
1154   }
1155   else
1156   {
1157     *theCurrentBytePtr <<= 1;
1158   }
1159 
1160   if (!bit)
1161     return read_bits(4);
1162 
1163   // read bit
1164   bit = (*theCurrentBytePtr & 0x80);
1165 
1166   --theBitfill;
1167   if (theBitfill == 0)
1168   {
1169     ++theCurrentBytePtr;
1170     theBitfill = 8;
1171   }
1172   else
1173   {
1174     *theCurrentBytePtr <<= 1;
1175   }
1176 
1177   if (!bit)
1178     return read_bits(13);
1179   else
1180     return read_bits(32);
1181 }
1182 
1183 
1184 /*******************************************************************************
1185   used for reading the position of the field value
1186 ********************************************************************************/
read_int_exp2()1187 unsigned int BinArchiver::read_int_exp2()
1188 {
1189   unsigned char bit;
1190 
1191   if (theBitfill == 0)
1192   {
1193     ++theCurrentBytePtr;
1194     theBitfill = 8;
1195   }
1196 
1197   // read bit
1198   bit = (*theCurrentBytePtr & 0x80);
1199 
1200   --theBitfill;
1201   if (theBitfill == 0)
1202   {
1203     ++theCurrentBytePtr;
1204     theBitfill = 8;
1205   }
1206   else
1207   {
1208     *theCurrentBytePtr <<= 1;
1209   }
1210 
1211   if (!bit)
1212     return read_bits(4);
1213 
1214   // read bit
1215   bit = (*theCurrentBytePtr & 0x80);
1216 
1217   --theBitfill;
1218   if (theBitfill == 0)
1219   {
1220     ++theCurrentBytePtr;
1221     theBitfill = 8;
1222   }
1223   else
1224   {
1225     *theCurrentBytePtr <<= 1;
1226   }
1227 
1228   if (!bit)
1229     return read_bits(12);
1230 
1231   // read bit
1232   bit = (*theCurrentBytePtr & 0x80);
1233 
1234   --theBitfill;
1235   if (theBitfill == 0)
1236   {
1237     ++theCurrentBytePtr;
1238     theBitfill = 8;
1239   }
1240   else
1241   {
1242     *theCurrentBytePtr <<= 1;
1243   }
1244 
1245   if (!bit)
1246     return read_bits(20);
1247   else
1248     return read_bits(32);
1249 }
1250 
1251 
1252 /*******************************************************************************
1253 
1254 ********************************************************************************/
read_enum()1255 unsigned int BinArchiver::read_enum()
1256 {
1257   unsigned char bit;
1258 
1259   bit = read_bit();
1260 
1261   if (!bit)
1262     return read_bits(5);
1263 
1264   bit = read_bit();
1265 
1266   if (!bit)
1267     return read_bits(8);
1268 
1269   bit = read_bit();
1270 
1271   if(!bit)
1272     return read_bits(13);
1273   else
1274     return read_bits(32);
1275 }
1276 
1277 
1278 /*******************************************************************************
1279 
1280 ********************************************************************************/
read_next_simple_temp_field_impl(TypeCode type,void * obj)1281 void BinArchiver::read_next_simple_temp_field_impl(TypeCode type, void* obj)
1282 {
1283   if (!is)
1284   {
1285     throw ZORBA_EXCEPTION(zerr::ZCSE0008_OUTPUT_ARCHIVE_USED_FOR_IN_SERIALIZATION);
1286   }
1287 
1288   switch (type)
1289   {
1290   case TYPE_INT64:
1291   {
1292     *static_cast<int64_t*>(obj) = read_int64();
1293     break;
1294   }
1295   case TYPE_UINT64:
1296   {
1297     *static_cast<uint64_t*>(obj) = read_uint64();
1298     break;
1299   }
1300   case TYPE_INT32:
1301   {
1302     *static_cast<int32_t*>(obj) = read_int32();
1303     break;
1304   }
1305   case TYPE_UINT32:
1306   {
1307     *static_cast<uint32_t*>(obj) = read_uint32();
1308     break;
1309   }
1310   case TYPE_ENUM:
1311   {
1312     *static_cast<uint32_t*>(obj) = read_enum();
1313     break;
1314   }
1315   case TYPE_INT16:
1316   {
1317     *static_cast<int16_t*>(obj) = read_int32();
1318     break;
1319   }
1320   case TYPE_UINT16:
1321   {
1322     *static_cast<uint16_t*>(obj) = read_uint32();
1323     break;
1324   }
1325   case TYPE_CHAR:
1326   {
1327     *static_cast<char*>(obj) = read_bits(8);
1328     break;
1329   }
1330   case TYPE_UCHAR:
1331   {
1332     *static_cast<unsigned char*>(obj) = read_bits(8);
1333     break;
1334   }
1335   case TYPE_BOOL:
1336   {
1337     *static_cast<bool*>(obj) = read_bit();
1338     break;
1339   }
1340   case TYPE_ZSTRING:
1341   {
1342     unsigned int value_pos = read_int_exp2();
1343     assert(value_pos);
1344     *static_cast<zstring*>(obj) = theStrings.at(value_pos-1).str;
1345     break;
1346   }
1347   case TYPE_STD_STRING:
1348   {
1349     unsigned int value_pos = read_int_exp2();
1350     assert(value_pos);
1351     *reinterpret_cast<std::string*>(obj) = theStrings.at(value_pos-1).str.c_str();
1352     break;
1353   }
1354   default:
1355   {
1356     ZORBA_ASSERT(false);
1357   }
1358   }
1359 }
1360 
1361 
1362 /*******************************************************************************
1363 
1364 ********************************************************************************/
read_next_simple_ptr_field_impl(TypeCode type,void ** obj)1365 void BinArchiver::read_next_simple_ptr_field_impl(TypeCode type, void** obj)
1366 {
1367   if (!is)
1368   {
1369     throw ZORBA_EXCEPTION(zerr::ZCSE0008_OUTPUT_ARCHIVE_USED_FOR_IN_SERIALIZATION);
1370   }
1371 
1372   unsigned char small_treat = read_bits(2);
1373 
1374   switch (small_treat)
1375   {
1376   case 0: // ARCHIVE_FIELD_PTR
1377   {
1378     int id = read_int_exp() + theLastId;
1379     theLastId = id;
1380 
1381     switch (type)
1382     {
1383     case TYPE_STD_STRING:
1384     {
1385       unsigned int value_pos = read_int_exp2();
1386       assert(value_pos);
1387       *reinterpret_cast<std::string**>(obj) =
1388       new std::string(theStrings.at(value_pos-1).str.c_str());
1389 
1390       break;
1391     }
1392     case TYPE_COLLATOR:
1393     {
1394       unsigned int value_pos = read_int_exp2();
1395       assert(value_pos);
1396       zstring uri = theStrings.at(value_pos-1).str;
1397 
1398       if (!uri.empty())
1399       {
1400         *reinterpret_cast<XQPCollator**>(obj) =
1401         CollationFactory::createCollator(uri.c_str());
1402       }
1403       else
1404       {
1405         *reinterpret_cast<XQPCollator**>(obj) =
1406         CollationFactory::createCollator();
1407       }
1408 
1409       break;
1410     }
1411     default:
1412     {
1413       throw ZORBA_EXCEPTION(zerr::ZCSE0002_INCOMPATIBLE_INPUT_FIELD, ERROR_PARAMS(id));
1414     }
1415     }
1416 
1417     register_reference(id, ARCHIVE_FIELD_PTR, *obj);
1418 
1419     break;
1420   }
1421   case 1: // ARCHIVE_FIELD_NULL
1422   {
1423     obj = NULL;
1424     break;
1425   }
1426   case 2: // ARCHIVE_FIELD_REFERENCING
1427   {
1428     int referencing = read_uint32();
1429 
1430     *obj = get_reference_value(referencing);
1431 
1432     ZORBA_ASSERT(obj);
1433     break;
1434   }
1435   default:
1436   {
1437     ZORBA_ASSERT(false);
1438   }
1439   }
1440 }
1441 
1442 
1443 /*******************************************************************************
1444 
1445 ********************************************************************************/
read_next_compound_field_impl(bool is_class,ArchiveFieldKind & field_treat,TypeCode & type,int & id,int & referencing)1446 void BinArchiver::read_next_compound_field_impl(
1447     bool is_class,
1448     ArchiveFieldKind& field_treat,
1449     TypeCode& type,
1450     int& id,
1451     int& referencing)
1452 {
1453   if (!is)
1454   {
1455     throw ZORBA_EXCEPTION(zerr::ZCSE0008_OUTPUT_ARCHIVE_USED_FOR_IN_SERIALIZATION);
1456   }
1457 
1458   type = TYPE_LAST;
1459   id = -1;
1460   referencing = -1;
1461 
1462   unsigned char small_treat = read_bits(2);
1463 
1464   switch (small_treat)
1465   {
1466   case 0:
1467   {
1468     assert(field_treat == ARCHIVE_FIELD_NORMAL || field_treat == ARCHIVE_FIELD_PTR);
1469 
1470     id = read_int_exp() + theLastId;
1471     theLastId = id;
1472 
1473     if (is_class && (field_treat == ARCHIVE_FIELD_PTR))
1474     {
1475       unsigned int tmp = read_enum();
1476       assert(tmp <= TYPE_LAST);
1477 
1478       type = static_cast<TypeCode>(tmp);
1479     }
1480 
1481     break;
1482   }
1483   case 1:
1484   {
1485     field_treat = ARCHIVE_FIELD_NULL;
1486     break;
1487   }
1488   case 2:
1489   {
1490     field_treat = ARCHIVE_FIELD_REFERENCING;
1491     referencing = read_uint32();
1492     break;
1493   }
1494   case 3:
1495   {
1496     field_treat = ARCHIVE_FIELD_BASECLASS;
1497     break;
1498   }
1499   default:
1500   {
1501     ZORBA_ASSERT(false);
1502   }
1503   }
1504 }
1505 
1506 
1507 /*******************************************************************************
1508 
1509 ********************************************************************************/
read_end_current_level_impl()1510 void BinArchiver::read_end_current_level_impl()
1511 {
1512 #ifndef NDEBUG
1513   unsigned char  tempbyte = 0;
1514 
1515   tempbyte = read_bits(8);
1516   if (tempbyte != 0xFF)
1517   {
1518     throw ZORBA_EXCEPTION(zerr::ZCSE0002_INCOMPATIBLE_INPUT_FIELD,
1519     ERROR_PARAMS(theLastId));
1520   }
1521 #endif
1522 }
1523 
1524 
1525 } // namespace serialization
1526 } // namespace zorba
1527 /* vim:set et sw=2 ts=2: */
1528