1 //
2 //  Copyright (C) 2002-2008 Greg Landrum and Rational Discovery LLC
3 //
4 //  @@ All Rights Reserved @@
5 //  This file is part of the RDKit.
6 //  The contents are covered by the terms of the BSD license
7 //  which is included in the file license.txt, found at the root
8 //  of the RDKit source tree.
9 //
10 //
11 #include <RDGeneral/export.h>
12 #ifndef _RD_STREAMOPS_H
13 #define _RD_STREAMOPS_H
14 
15 #include "types.h"
16 #include "Invariant.h"
17 #include "RDProps.h"
18 #include <string>
19 #include <sstream>
20 #include <iostream>
21 #include <boost/cstdint.hpp>
22 #include <boost/predef.h>
23 
24 namespace RDKit {
25 // this code block for handling endian problems is adapted from :
26 // http://stackoverflow.com/questions/105252/how-do-i-convert-between-big-endian-and-little-endian-values-in-c
27 enum EEndian {
28   LITTLE_ENDIAN_ORDER,
29   BIG_ENDIAN_ORDER,
30 #if defined(BOOST_ENDIAN_LITTLE_BYTE) || defined(BOOST_ENDIAN_LITTLE_WORD)
31   HOST_ENDIAN_ORDER = LITTLE_ENDIAN_ORDER
32 #elif defined(BOOST_ENDIAN_BIG_BYTE)
33   HOST_ENDIAN_ORDER = BIG_ENDIAN_ORDER
34 #elif defined(BOOST_ENDIAN_BIG_WORD)
35 #error "Cannot compile on word-swapped big-endian systems"
36 #else
37 #error "Failed to determine the system endian value"
38 #endif
39 };
40 
41 // this function swap the bytes of values given it's size as a template
42 // parameter (could sizeof be used?).
43 template <class T, unsigned int size>
SwapBytes(T value)44 inline T SwapBytes(T value) {
45   if (size < 2) return value;
46 
47   union {
48     T value;
49     char bytes[size];
50   } in, out;
51 
52   in.value = value;
53 
54   for (unsigned int i = 0; i < size; ++i) {
55     out.bytes[i] = in.bytes[size - 1 - i];
56   }
57 
58   return out.value;
59 }
60 
61 // Here is the function you will use. Again there is two compile-time assertion
62 // that use the boost libraries. You could probably comment them out, but if you
63 // do be cautious not to use this function for anything else than integers
64 // types. This function need to be called like this :
65 //
66 //     int x = someValue;
67 //     int i = EndianSwapBytes<HOST_ENDIAN_ORDER, BIG_ENDIAN_ORDER>(x);
68 //
69 template <EEndian from, EEndian to, class T>
EndianSwapBytes(T value)70 inline T EndianSwapBytes(T value) {
71   // A : La donnée à swapper à une taille de 2, 4 ou 8 octets
72   BOOST_STATIC_ASSERT(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 ||
73                       sizeof(T) == 8);
74   if (sizeof(T) == 1) return value;
75 
76   // A : La donnée à swapper est d'un type arithmetic
77   // BOOST_STATIC_ASSERT(boost::is_arithmetic<T>::value);
78 
79   // Si from et to sont du même type on ne swap pas.
80   if (from == to) return value;
81 
82   return SwapBytes<T, sizeof(T)>(value);
83 }
84 template <EEndian from, EEndian to>
EndianSwapBytes(char value)85 inline char EndianSwapBytes(char value) {
86   return value;
87 }
88 template <EEndian from, EEndian to>
EndianSwapBytes(unsigned char value)89 inline unsigned char EndianSwapBytes(unsigned char value) {
90   return value;
91 }
92 template <EEndian from, EEndian to>
EndianSwapBytes(signed char value)93 inline signed char EndianSwapBytes(signed char value) {
94   return value;
95 }
96 // --------------------------------------
97 
98 //! Packs an integer and outputs it to a stream
appendPackedIntToStream(std::stringstream & ss,boost::uint32_t num)99 inline void appendPackedIntToStream(std::stringstream &ss,
100                                     boost::uint32_t num) {
101   int nbytes, bix;
102   unsigned int val, res;
103   char tc;
104 
105   res = num;
106   while (1) {
107     if (res < (1 << 7)) {
108       val = (res << 1);
109       nbytes = 1;
110       break;
111     }
112     res -= (1 << 7);
113     if (res < (1 << 14)) {
114       val = ((res << 2) | 1);
115       nbytes = 2;
116       break;
117     }
118     res -= (1 << 14);
119     if (res < (1 << 21)) {
120       val = ((res << 3) | 3);
121       nbytes = 3;
122       break;
123     }
124     res -= (1 << 21);
125     if (res < (1 << 29)) {
126       val = ((res << 3) | 7);
127       nbytes = 4;
128       break;
129     } else {
130       CHECK_INVARIANT(0, "ERROR: Integer too big to pack\n");
131     }
132   }
133   // val = EndianSwapBytes<HOST_ENDIAN_ORDER,LITTLE_ENDIAN_ORDER>(val);
134 
135   for (bix = 0; bix < nbytes; bix++) {
136     tc = (char)(val & 255);
137     ss.write(&tc, 1);
138     val >>= 8;
139   }
140 }
141 
142 //! Reads an integer from a stream in packed format and returns the result.
readPackedIntFromStream(std::stringstream & ss)143 inline boost::uint32_t readPackedIntFromStream(std::stringstream &ss) {
144   boost::uint32_t val, num;
145   int shift, offset;
146   char tmp;
147   ss.read(&tmp, sizeof(tmp));
148   if (ss.fail()) {
149     throw std::runtime_error("failed to read from stream");
150   }
151 
152   val = UCHAR(tmp);
153   offset = 0;
154   if ((val & 1) == 0) {
155     shift = 1;
156   } else if ((val & 3) == 1) {
157     ss.read((char *)&tmp, sizeof(tmp));
158     if (ss.fail()) {
159       throw std::runtime_error("failed to read from stream");
160     }
161 
162     val |= (UCHAR(tmp) << 8);
163     shift = 2;
164     offset = (1 << 7);
165   } else if ((val & 7) == 3) {
166     ss.read((char *)&tmp, sizeof(tmp));
167     if (ss.fail()) {
168       throw std::runtime_error("failed to read from stream");
169     }
170 
171     val |= (UCHAR(tmp) << 8);
172     ss.read((char *)&tmp, sizeof(tmp));
173     if (ss.fail()) {
174       throw std::runtime_error("failed to read from stream");
175     }
176 
177     val |= (UCHAR(tmp) << 16);
178     shift = 3;
179     offset = (1 << 7) + (1 << 14);
180   } else {
181     ss.read((char *)&tmp, sizeof(tmp));
182     if (ss.fail()) {
183       throw std::runtime_error("failed to read from stream");
184     }
185 
186     val |= (UCHAR(tmp) << 8);
187     ss.read((char *)&tmp, sizeof(tmp));
188     if (ss.fail()) {
189       throw std::runtime_error("failed to read from stream");
190     }
191 
192     val |= (UCHAR(tmp) << 16);
193     ss.read((char *)&tmp, sizeof(tmp));
194     if (ss.fail()) {
195       throw std::runtime_error("failed to read from stream");
196     }
197 
198     val |= (UCHAR(tmp) << 24);
199     shift = 3;
200     offset = (1 << 7) + (1 << 14) + (1 << 21);
201   }
202   num = (val >> shift) + offset;
203   // num = EndianSwapBytes<LITTLE_ENDIAN_ORDER,HOST_ENDIAN_ORDER>(num);
204   return num;
205 }
206 
207 //! Reads an integer from a char * in packed format and returns the result.
208 //!  The argument is advanced
pullPackedIntFromString(const char * & text)209 inline boost::uint32_t pullPackedIntFromString(const char *&text) {
210   boost::uint32_t val, num;
211   int shift, offset;
212   char tmp;
213   tmp = *text;
214   text++;
215   val = UCHAR(tmp);
216   offset = 0;
217   if ((val & 1) == 0) {
218     shift = 1;
219   } else if ((val & 3) == 1) {
220     tmp = *text;
221     text++;
222     val |= (UCHAR(tmp) << 8);
223     shift = 2;
224     offset = (1 << 7);
225   } else if ((val & 7) == 3) {
226     tmp = *text;
227     text++;
228     val |= (UCHAR(tmp) << 8);
229     tmp = *text;
230     text++;
231     val |= (UCHAR(tmp) << 16);
232     shift = 3;
233     offset = (1 << 7) + (1 << 14);
234   } else {
235     tmp = *text;
236     text++;
237     val |= (UCHAR(tmp) << 8);
238     tmp = *text;
239     text++;
240     val |= (UCHAR(tmp) << 16);
241     tmp = *text;
242     text++;
243     val |= (UCHAR(tmp) << 24);
244     shift = 3;
245     offset = (1 << 7) + (1 << 14) + (1 << 21);
246   }
247   num = (val >> shift) + offset;
248   // num = EndianSwapBytes<LITTLE_ENDIAN_ORDER,HOST_ENDIAN_ORDER>(num);
249   return num;
250 }
251 
252 //! does a binary write of an object to a stream
253 template <typename T>
streamWrite(std::ostream & ss,const T & val)254 void streamWrite(std::ostream &ss, const T &val) {
255   T tval = EndianSwapBytes<HOST_ENDIAN_ORDER, LITTLE_ENDIAN_ORDER>(val);
256   ss.write((const char *)&tval, sizeof(T));
257 }
258 
259 //! special case for string
streamWrite(std::ostream & ss,const std::string & what)260 inline void streamWrite(std::ostream &ss, const std::string &what) {
261   unsigned int l = rdcast<unsigned int>(what.length());
262   ss.write((const char *)&l, sizeof(l));
263   ss.write(what.c_str(), sizeof(char) * l);
264 };
265 
266 template <typename T>
streamWriteVec(std::ostream & ss,const T & val)267 void streamWriteVec(std::ostream &ss, const T &val) {
268   streamWrite(ss, static_cast<boost::uint64_t>(val.size()));
269   for (size_t i = 0; i < val.size(); ++i) streamWrite(ss, val[i]);
270 }
271 
272 //! does a binary read of an object from a stream
273 template <typename T>
streamRead(std::istream & ss,T & loc)274 void streamRead(std::istream &ss, T &loc) {
275   T tloc;
276   ss.read((char *)&tloc, sizeof(T));
277   if (ss.fail()) {
278     throw std::runtime_error("failed to read from stream");
279   }
280   loc = EndianSwapBytes<LITTLE_ENDIAN_ORDER, HOST_ENDIAN_ORDER>(tloc);
281 }
282 
283 //! special case for string
284 template <class T>
streamRead(std::istream & ss,T & obj,int version)285 void streamRead(std::istream &ss, T &obj, int version) {
286   RDUNUSED_PARAM(version);
287   streamRead(ss, obj);
288 }
289 
streamRead(std::istream & ss,std::string & what,int version)290 inline void streamRead(std::istream &ss, std::string &what, int version) {
291   RDUNUSED_PARAM(version);
292   unsigned int l;
293   ss.read((char *)&l, sizeof(l));
294   if (ss.fail()) {
295     throw std::runtime_error("failed to read from stream");
296   }
297   char *buff = new char[l];
298   ss.read(buff, sizeof(char) * l);
299   if (ss.fail()) {
300     throw std::runtime_error("failed to read from stream");
301   }
302   what = std::string(buff, l);
303   delete[] buff;
304 };
305 
306 template <class T>
streamReadVec(std::istream & ss,T & val)307 void streamReadVec(std::istream &ss, T &val) {
308   boost::uint64_t size;
309   streamRead(ss, size);
310   val.resize(size);
311 
312   for (size_t i = 0; i < size; ++i) streamRead(ss, val[i]);
313 }
314 
streamReadStringVec(std::istream & ss,std::vector<std::string> & val,int version)315 inline void streamReadStringVec(std::istream &ss, std::vector<std::string> &val,
316                                 int version) {
317   boost::uint64_t size;
318   streamRead(ss, size);
319   val.resize(size);
320 
321   for (size_t i = 0; i < size; ++i) streamRead(ss, val[i], version);
322 }
323 
324 //! grabs the next line from an instream and returns it.
getLine(std::istream * inStream)325 inline std::string getLine(std::istream *inStream) {
326   std::string res;
327   std::getline(*inStream, res);
328   if ((res.length() > 0) && (res[res.length() - 1] == '\r')) {
329     res.erase(res.length() - 1);
330   }
331   return res;
332 }
333 //! grabs the next line from an instream and returns it.
getLine(std::istream & inStream)334 inline std::string getLine(std::istream &inStream) {
335   return getLine(&inStream);
336 }
337 
338 // n.b. We can't use RDTypeTag directly, they are implementation
339 //  specific
340 namespace DTags {
341 const unsigned char StringTag = 0;
342 const unsigned char IntTag = 1;
343 const unsigned char UnsignedIntTag = 2;
344 const unsigned char BoolTag = 3;
345 const unsigned char FloatTag = 4;
346 const unsigned char DoubleTag = 5;
347 const unsigned char VecStringTag = 6;
348 const unsigned char VecIntTag = 7;
349 const unsigned char VecUIntTag = 8;
350 const unsigned char VecBoolTag = 9;
351 const unsigned char VecFloatTag = 10;
352 const unsigned char VecDoubleTag = 11;
353 
354 const unsigned char CustomTag = 0xFE;  // custom data
355 const unsigned char EndTag = 0xFF;
356 }  // namespace DTags
357 
358 class CustomPropHandler {
359  public:
~CustomPropHandler()360   virtual ~CustomPropHandler(){};
361   virtual const char *getPropName() const = 0;
362   virtual bool canSerialize(const RDValue &value) const = 0;
363   virtual bool read(std::istream &ss, RDValue &value) const = 0;
364   virtual bool write(std::ostream &ss, const RDValue &value) const = 0;
365   virtual CustomPropHandler *clone() const = 0;
366 };
367 
368 typedef std::vector<std::shared_ptr<const CustomPropHandler>>
369     CustomPropHandlerVec;
370 
371 inline bool isSerializable(const Dict::Pair &pair,
372                            const CustomPropHandlerVec &handlers = {}) {
373   switch (pair.val.getTag()) {
374     case RDTypeTag::StringTag:
375     case RDTypeTag::IntTag:
376     case RDTypeTag::UnsignedIntTag:
377     case RDTypeTag::BoolTag:
378     case RDTypeTag::FloatTag:
379     case RDTypeTag::DoubleTag:
380 
381     case RDTypeTag::VecStringTag:
382     case RDTypeTag::VecIntTag:
383     case RDTypeTag::VecUnsignedIntTag:
384     case RDTypeTag::VecFloatTag:
385     case RDTypeTag::VecDoubleTag:
386       return true;
387     case RDTypeTag::AnyTag:
388       for (auto &handler : handlers) {
389         if (handler->canSerialize(pair.val)) {
390           return true;
391         }
392       }
393       return false;
394     default:
395       return false;
396   }
397 }
398 
399 inline bool streamWriteProp(std::ostream &ss, const Dict::Pair &pair,
400                             const CustomPropHandlerVec &handlers = {}) {
401   if (!isSerializable(pair, handlers)) {
402     return false;
403   }
404 
405   streamWrite(ss, pair.key);
406   switch (pair.val.getTag()) {
407     case RDTypeTag::StringTag:
408       streamWrite(ss, DTags::StringTag);
409       streamWrite(ss, rdvalue_cast<std::string>(pair.val));
410       break;
411     case RDTypeTag::IntTag:
412       streamWrite(ss, DTags::IntTag);
413       streamWrite(ss, rdvalue_cast<int>(pair.val));
414       break;
415     case RDTypeTag::UnsignedIntTag:
416       streamWrite(ss, DTags::UnsignedIntTag);
417       streamWrite(ss, rdvalue_cast<unsigned int>(pair.val));
418       break;
419     case RDTypeTag::BoolTag:
420       streamWrite(ss, DTags::BoolTag);
421       streamWrite(ss, rdvalue_cast<bool>(pair.val));
422       break;
423     case RDTypeTag::FloatTag:
424       streamWrite(ss, DTags::FloatTag);
425       streamWrite(ss, rdvalue_cast<float>(pair.val));
426       break;
427     case RDTypeTag::DoubleTag:
428       streamWrite(ss, DTags::DoubleTag);
429       streamWrite(ss, rdvalue_cast<double>(pair.val));
430       break;
431 
432     case RDTypeTag::VecStringTag:
433       streamWrite(ss, DTags::VecStringTag);
434       streamWriteVec(ss, rdvalue_cast<std::vector<std::string>>(pair.val));
435       break;
436     case RDTypeTag::VecDoubleTag:
437       streamWrite(ss, DTags::VecDoubleTag);
438       streamWriteVec(ss, rdvalue_cast<std::vector<double>>(pair.val));
439       break;
440     case RDTypeTag::VecFloatTag:
441       streamWrite(ss, DTags::VecFloatTag);
442       streamWriteVec(ss, rdvalue_cast<std::vector<float>>(pair.val));
443       break;
444     case RDTypeTag::VecIntTag:
445       streamWrite(ss, DTags::VecIntTag);
446       streamWriteVec(ss, rdvalue_cast<std::vector<int>>(pair.val));
447       break;
448     case RDTypeTag::VecUnsignedIntTag:
449       streamWrite(ss, DTags::VecUIntTag);
450       streamWriteVec(ss, rdvalue_cast<std::vector<unsigned int>>(pair.val));
451       break;
452     default:
453       for (auto &handler : handlers) {
454         if (handler->canSerialize(pair.val)) {
455           // The form of a custom tag is
456           //  CustomTag
457           //  customPropName (must be unique)
458           //  custom serialization
459           streamWrite(ss, DTags::CustomTag);
460           streamWrite(ss, std::string(handler->getPropName()));
461           handler->write(ss, pair.val);
462           return true;
463         }
464       }
465 
466       return false;
467   }
468   return true;
469 }
470 
471 inline bool streamWriteProps(std::ostream &ss, const RDProps &props,
472                              bool savePrivate = false,
473                              bool saveComputed = false,
474                              const CustomPropHandlerVec &handlers = {}) {
475   STR_VECT propsToSave = props.getPropList(savePrivate, saveComputed);
476   std::set<std::string> propnames(propsToSave.begin(), propsToSave.end());
477 
478   const Dict &dict = props.getDict();
479   unsigned int count = 0;
480   for (Dict::DataType::const_iterator it = dict.getData().begin();
481        it != dict.getData().end(); ++it) {
482     if (propnames.find(it->key) != propnames.end()) {
483       if (isSerializable(*it, handlers)) {
484         count++;
485       }
486     }
487   }
488 
489   streamWrite(ss, count);  // packed int?
490 
491   unsigned int writtenCount = 0;
492   for (Dict::DataType::const_iterator it = dict.getData().begin();
493        it != dict.getData().end(); ++it) {
494     if (propnames.find(it->key) != propnames.end()) {
495       if (isSerializable(*it, handlers)) {
496         // note - not all properties are serializable, this may be
497         //  a null op
498         if (streamWriteProp(ss, *it, handlers)) {
499           writtenCount++;
500         }
501       }
502     }
503   }
504   POSTCONDITION(count == writtenCount,
505                 "Estimated property count not equal to written");
506   return true;
507 }
508 
509 template <class T>
readRDValue(std::istream & ss,RDValue & value)510 void readRDValue(std::istream &ss, RDValue &value) {
511   T v;
512   streamRead(ss, v);
513   value = v;
514 }
515 
516 template <class T>
readRDVecValue(std::istream & ss,RDValue & value)517 void readRDVecValue(std::istream &ss, RDValue &value) {
518   std::vector<T> v;
519   streamReadVec(ss, v);
520   value = v;
521 }
522 
readRDValueString(std::istream & ss,RDValue & value)523 inline void readRDValueString(std::istream &ss, RDValue &value) {
524   std::string v;
525   int version = 0;
526   streamRead(ss, v, version);
527   value = v;
528 }
529 
readRDStringVecValue(std::istream & ss,RDValue & value)530 inline void readRDStringVecValue(std::istream &ss, RDValue &value) {
531   std::vector<std::string> v;
532   int version = 0;
533   streamReadStringVec(ss, v, version);
534   value = v;
535 }
536 
537 inline bool streamReadProp(std::istream &ss, Dict::Pair &pair,
538                            bool &dictHasNonPOD,
539                            const CustomPropHandlerVec &handlers = {}) {
540   int version = 0;
541   streamRead(ss, pair.key, version);
542 
543   unsigned char type;
544   streamRead(ss, type);
545   switch (type) {
546     case DTags::IntTag:
547       readRDValue<int>(ss, pair.val);
548       break;
549     case DTags::UnsignedIntTag:
550       readRDValue<unsigned int>(ss, pair.val);
551       break;
552     case DTags::BoolTag:
553       readRDValue<bool>(ss, pair.val);
554       break;
555     case DTags::FloatTag:
556       readRDValue<float>(ss, pair.val);
557       break;
558     case DTags::DoubleTag:
559       readRDValue<double>(ss, pair.val);
560       break;
561 
562     case DTags::StringTag:
563       readRDValueString(ss, pair.val);
564       dictHasNonPOD = true;
565       break;
566     case DTags::VecStringTag:
567       readRDStringVecValue(ss, pair.val);
568       dictHasNonPOD = true;
569       break;
570     case DTags::VecIntTag:
571       readRDVecValue<int>(ss, pair.val);
572       dictHasNonPOD = true;
573       break;
574     case DTags::VecUIntTag:
575       readRDVecValue<unsigned int>(ss, pair.val);
576       dictHasNonPOD = true;
577       break;
578     case DTags::VecFloatTag:
579       readRDVecValue<float>(ss, pair.val);
580       dictHasNonPOD = true;
581       break;
582     case DTags::VecDoubleTag:
583       readRDVecValue<double>(ss, pair.val);
584       dictHasNonPOD = true;
585       break;
586     case DTags::CustomTag: {
587       std::string propType;
588       int version = 0;
589       streamRead(ss, propType, version);
590       for (auto &handler : handlers) {
591         if (propType == handler->getPropName()) {
592           handler->read(ss, pair.val);
593           dictHasNonPOD = true;
594           return true;
595         }
596       }
597       return false;
598     }
599 
600     default:
601       return false;
602   }
603   return true;
604 }
605 
606 inline unsigned int streamReadProps(std::istream &ss, RDProps &props,
607                                     const CustomPropHandlerVec &handlers = {}) {
608   unsigned int count;
609   streamRead(ss, count);
610 
611   Dict &dict = props.getDict();
612   dict.reset();  // Clear data before repopulating
613   dict.getData().resize(count);
614   for (unsigned index = 0; index < count; ++index) {
615     CHECK_INVARIANT(streamReadProp(ss, dict.getData()[index],
616                                    dict.getNonPODStatus(), handlers),
617                     "Corrupted property serialization detected");
618   }
619 
620   return count;
621 }
622 
623 }  // namespace RDKit
624 
625 #endif
626