1 //
2 // Copyright (C) 2002-2008 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 //
11 #include <RDGeneral/export.h>
12 #ifndef _RD_STREAMOPS_H
13 #define _RD_STREAMOPS_H
14
15 #include "types.h"
16 #include "Invariant.h"
17 #include "RDProps.h"
18 #include <string>
19 #include <sstream>
20 #include <iostream>
21 #include <boost/cstdint.hpp>
22 #include <boost/predef.h>
23
24 namespace RDKit {
25 // this code block for handling endian problems is adapted from :
26 // http://stackoverflow.com/questions/105252/how-do-i-convert-between-big-endian-and-little-endian-values-in-c
27 enum EEndian {
28 LITTLE_ENDIAN_ORDER,
29 BIG_ENDIAN_ORDER,
30 #if defined(BOOST_ENDIAN_LITTLE_BYTE) || defined(BOOST_ENDIAN_LITTLE_WORD)
31 HOST_ENDIAN_ORDER = LITTLE_ENDIAN_ORDER
32 #elif defined(BOOST_ENDIAN_BIG_BYTE)
33 HOST_ENDIAN_ORDER = BIG_ENDIAN_ORDER
34 #elif defined(BOOST_ENDIAN_BIG_WORD)
35 #error "Cannot compile on word-swapped big-endian systems"
36 #else
37 #error "Failed to determine the system endian value"
38 #endif
39 };
40
41 // this function swap the bytes of values given it's size as a template
42 // parameter (could sizeof be used?).
43 template <class T, unsigned int size>
SwapBytes(T value)44 inline T SwapBytes(T value) {
45 if (size < 2) return value;
46
47 union {
48 T value;
49 char bytes[size];
50 } in, out;
51
52 in.value = value;
53
54 for (unsigned int i = 0; i < size; ++i) {
55 out.bytes[i] = in.bytes[size - 1 - i];
56 }
57
58 return out.value;
59 }
60
61 // Here is the function you will use. Again there is two compile-time assertion
62 // that use the boost libraries. You could probably comment them out, but if you
63 // do be cautious not to use this function for anything else than integers
64 // types. This function need to be called like this :
65 //
66 // int x = someValue;
67 // int i = EndianSwapBytes<HOST_ENDIAN_ORDER, BIG_ENDIAN_ORDER>(x);
68 //
69 template <EEndian from, EEndian to, class T>
EndianSwapBytes(T value)70 inline T EndianSwapBytes(T value) {
71 // A : La donnée à swapper à une taille de 2, 4 ou 8 octets
72 BOOST_STATIC_ASSERT(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 ||
73 sizeof(T) == 8);
74 if (sizeof(T) == 1) return value;
75
76 // A : La donnée à swapper est d'un type arithmetic
77 // BOOST_STATIC_ASSERT(boost::is_arithmetic<T>::value);
78
79 // Si from et to sont du même type on ne swap pas.
80 if (from == to) return value;
81
82 return SwapBytes<T, sizeof(T)>(value);
83 }
84 template <EEndian from, EEndian to>
EndianSwapBytes(char value)85 inline char EndianSwapBytes(char value) {
86 return value;
87 }
88 template <EEndian from, EEndian to>
EndianSwapBytes(unsigned char value)89 inline unsigned char EndianSwapBytes(unsigned char value) {
90 return value;
91 }
92 template <EEndian from, EEndian to>
EndianSwapBytes(signed char value)93 inline signed char EndianSwapBytes(signed char value) {
94 return value;
95 }
96 // --------------------------------------
97
98 //! Packs an integer and outputs it to a stream
appendPackedIntToStream(std::stringstream & ss,boost::uint32_t num)99 inline void appendPackedIntToStream(std::stringstream &ss,
100 boost::uint32_t num) {
101 int nbytes, bix;
102 unsigned int val, res;
103 char tc;
104
105 res = num;
106 while (1) {
107 if (res < (1 << 7)) {
108 val = (res << 1);
109 nbytes = 1;
110 break;
111 }
112 res -= (1 << 7);
113 if (res < (1 << 14)) {
114 val = ((res << 2) | 1);
115 nbytes = 2;
116 break;
117 }
118 res -= (1 << 14);
119 if (res < (1 << 21)) {
120 val = ((res << 3) | 3);
121 nbytes = 3;
122 break;
123 }
124 res -= (1 << 21);
125 if (res < (1 << 29)) {
126 val = ((res << 3) | 7);
127 nbytes = 4;
128 break;
129 } else {
130 CHECK_INVARIANT(0, "ERROR: Integer too big to pack\n");
131 }
132 }
133 // val = EndianSwapBytes<HOST_ENDIAN_ORDER,LITTLE_ENDIAN_ORDER>(val);
134
135 for (bix = 0; bix < nbytes; bix++) {
136 tc = (char)(val & 255);
137 ss.write(&tc, 1);
138 val >>= 8;
139 }
140 }
141
142 //! Reads an integer from a stream in packed format and returns the result.
readPackedIntFromStream(std::stringstream & ss)143 inline boost::uint32_t readPackedIntFromStream(std::stringstream &ss) {
144 boost::uint32_t val, num;
145 int shift, offset;
146 char tmp;
147 ss.read(&tmp, sizeof(tmp));
148 if (ss.fail()) {
149 throw std::runtime_error("failed to read from stream");
150 }
151
152 val = UCHAR(tmp);
153 offset = 0;
154 if ((val & 1) == 0) {
155 shift = 1;
156 } else if ((val & 3) == 1) {
157 ss.read((char *)&tmp, sizeof(tmp));
158 if (ss.fail()) {
159 throw std::runtime_error("failed to read from stream");
160 }
161
162 val |= (UCHAR(tmp) << 8);
163 shift = 2;
164 offset = (1 << 7);
165 } else if ((val & 7) == 3) {
166 ss.read((char *)&tmp, sizeof(tmp));
167 if (ss.fail()) {
168 throw std::runtime_error("failed to read from stream");
169 }
170
171 val |= (UCHAR(tmp) << 8);
172 ss.read((char *)&tmp, sizeof(tmp));
173 if (ss.fail()) {
174 throw std::runtime_error("failed to read from stream");
175 }
176
177 val |= (UCHAR(tmp) << 16);
178 shift = 3;
179 offset = (1 << 7) + (1 << 14);
180 } else {
181 ss.read((char *)&tmp, sizeof(tmp));
182 if (ss.fail()) {
183 throw std::runtime_error("failed to read from stream");
184 }
185
186 val |= (UCHAR(tmp) << 8);
187 ss.read((char *)&tmp, sizeof(tmp));
188 if (ss.fail()) {
189 throw std::runtime_error("failed to read from stream");
190 }
191
192 val |= (UCHAR(tmp) << 16);
193 ss.read((char *)&tmp, sizeof(tmp));
194 if (ss.fail()) {
195 throw std::runtime_error("failed to read from stream");
196 }
197
198 val |= (UCHAR(tmp) << 24);
199 shift = 3;
200 offset = (1 << 7) + (1 << 14) + (1 << 21);
201 }
202 num = (val >> shift) + offset;
203 // num = EndianSwapBytes<LITTLE_ENDIAN_ORDER,HOST_ENDIAN_ORDER>(num);
204 return num;
205 }
206
207 //! Reads an integer from a char * in packed format and returns the result.
208 //! The argument is advanced
pullPackedIntFromString(const char * & text)209 inline boost::uint32_t pullPackedIntFromString(const char *&text) {
210 boost::uint32_t val, num;
211 int shift, offset;
212 char tmp;
213 tmp = *text;
214 text++;
215 val = UCHAR(tmp);
216 offset = 0;
217 if ((val & 1) == 0) {
218 shift = 1;
219 } else if ((val & 3) == 1) {
220 tmp = *text;
221 text++;
222 val |= (UCHAR(tmp) << 8);
223 shift = 2;
224 offset = (1 << 7);
225 } else if ((val & 7) == 3) {
226 tmp = *text;
227 text++;
228 val |= (UCHAR(tmp) << 8);
229 tmp = *text;
230 text++;
231 val |= (UCHAR(tmp) << 16);
232 shift = 3;
233 offset = (1 << 7) + (1 << 14);
234 } else {
235 tmp = *text;
236 text++;
237 val |= (UCHAR(tmp) << 8);
238 tmp = *text;
239 text++;
240 val |= (UCHAR(tmp) << 16);
241 tmp = *text;
242 text++;
243 val |= (UCHAR(tmp) << 24);
244 shift = 3;
245 offset = (1 << 7) + (1 << 14) + (1 << 21);
246 }
247 num = (val >> shift) + offset;
248 // num = EndianSwapBytes<LITTLE_ENDIAN_ORDER,HOST_ENDIAN_ORDER>(num);
249 return num;
250 }
251
252 //! does a binary write of an object to a stream
253 template <typename T>
streamWrite(std::ostream & ss,const T & val)254 void streamWrite(std::ostream &ss, const T &val) {
255 T tval = EndianSwapBytes<HOST_ENDIAN_ORDER, LITTLE_ENDIAN_ORDER>(val);
256 ss.write((const char *)&tval, sizeof(T));
257 }
258
259 //! special case for string
streamWrite(std::ostream & ss,const std::string & what)260 inline void streamWrite(std::ostream &ss, const std::string &what) {
261 unsigned int l = rdcast<unsigned int>(what.length());
262 ss.write((const char *)&l, sizeof(l));
263 ss.write(what.c_str(), sizeof(char) * l);
264 };
265
266 template <typename T>
streamWriteVec(std::ostream & ss,const T & val)267 void streamWriteVec(std::ostream &ss, const T &val) {
268 streamWrite(ss, static_cast<boost::uint64_t>(val.size()));
269 for (size_t i = 0; i < val.size(); ++i) streamWrite(ss, val[i]);
270 }
271
272 //! does a binary read of an object from a stream
273 template <typename T>
streamRead(std::istream & ss,T & loc)274 void streamRead(std::istream &ss, T &loc) {
275 T tloc;
276 ss.read((char *)&tloc, sizeof(T));
277 if (ss.fail()) {
278 throw std::runtime_error("failed to read from stream");
279 }
280 loc = EndianSwapBytes<LITTLE_ENDIAN_ORDER, HOST_ENDIAN_ORDER>(tloc);
281 }
282
283 //! special case for string
284 template <class T>
streamRead(std::istream & ss,T & obj,int version)285 void streamRead(std::istream &ss, T &obj, int version) {
286 RDUNUSED_PARAM(version);
287 streamRead(ss, obj);
288 }
289
streamRead(std::istream & ss,std::string & what,int version)290 inline void streamRead(std::istream &ss, std::string &what, int version) {
291 RDUNUSED_PARAM(version);
292 unsigned int l;
293 ss.read((char *)&l, sizeof(l));
294 if (ss.fail()) {
295 throw std::runtime_error("failed to read from stream");
296 }
297 char *buff = new char[l];
298 ss.read(buff, sizeof(char) * l);
299 if (ss.fail()) {
300 throw std::runtime_error("failed to read from stream");
301 }
302 what = std::string(buff, l);
303 delete[] buff;
304 };
305
306 template <class T>
streamReadVec(std::istream & ss,T & val)307 void streamReadVec(std::istream &ss, T &val) {
308 boost::uint64_t size;
309 streamRead(ss, size);
310 val.resize(size);
311
312 for (size_t i = 0; i < size; ++i) streamRead(ss, val[i]);
313 }
314
streamReadStringVec(std::istream & ss,std::vector<std::string> & val,int version)315 inline void streamReadStringVec(std::istream &ss, std::vector<std::string> &val,
316 int version) {
317 boost::uint64_t size;
318 streamRead(ss, size);
319 val.resize(size);
320
321 for (size_t i = 0; i < size; ++i) streamRead(ss, val[i], version);
322 }
323
324 //! grabs the next line from an instream and returns it.
getLine(std::istream * inStream)325 inline std::string getLine(std::istream *inStream) {
326 std::string res;
327 std::getline(*inStream, res);
328 if ((res.length() > 0) && (res[res.length() - 1] == '\r')) {
329 res.erase(res.length() - 1);
330 }
331 return res;
332 }
333 //! grabs the next line from an instream and returns it.
getLine(std::istream & inStream)334 inline std::string getLine(std::istream &inStream) {
335 return getLine(&inStream);
336 }
337
338 // n.b. We can't use RDTypeTag directly, they are implementation
339 // specific
340 namespace DTags {
341 const unsigned char StringTag = 0;
342 const unsigned char IntTag = 1;
343 const unsigned char UnsignedIntTag = 2;
344 const unsigned char BoolTag = 3;
345 const unsigned char FloatTag = 4;
346 const unsigned char DoubleTag = 5;
347 const unsigned char VecStringTag = 6;
348 const unsigned char VecIntTag = 7;
349 const unsigned char VecUIntTag = 8;
350 const unsigned char VecBoolTag = 9;
351 const unsigned char VecFloatTag = 10;
352 const unsigned char VecDoubleTag = 11;
353
354 const unsigned char CustomTag = 0xFE; // custom data
355 const unsigned char EndTag = 0xFF;
356 } // namespace DTags
357
358 class CustomPropHandler {
359 public:
~CustomPropHandler()360 virtual ~CustomPropHandler(){};
361 virtual const char *getPropName() const = 0;
362 virtual bool canSerialize(const RDValue &value) const = 0;
363 virtual bool read(std::istream &ss, RDValue &value) const = 0;
364 virtual bool write(std::ostream &ss, const RDValue &value) const = 0;
365 virtual CustomPropHandler *clone() const = 0;
366 };
367
368 typedef std::vector<std::shared_ptr<const CustomPropHandler>>
369 CustomPropHandlerVec;
370
371 inline bool isSerializable(const Dict::Pair &pair,
372 const CustomPropHandlerVec &handlers = {}) {
373 switch (pair.val.getTag()) {
374 case RDTypeTag::StringTag:
375 case RDTypeTag::IntTag:
376 case RDTypeTag::UnsignedIntTag:
377 case RDTypeTag::BoolTag:
378 case RDTypeTag::FloatTag:
379 case RDTypeTag::DoubleTag:
380
381 case RDTypeTag::VecStringTag:
382 case RDTypeTag::VecIntTag:
383 case RDTypeTag::VecUnsignedIntTag:
384 case RDTypeTag::VecFloatTag:
385 case RDTypeTag::VecDoubleTag:
386 return true;
387 case RDTypeTag::AnyTag:
388 for (auto &handler : handlers) {
389 if (handler->canSerialize(pair.val)) {
390 return true;
391 }
392 }
393 return false;
394 default:
395 return false;
396 }
397 }
398
399 inline bool streamWriteProp(std::ostream &ss, const Dict::Pair &pair,
400 const CustomPropHandlerVec &handlers = {}) {
401 if (!isSerializable(pair, handlers)) {
402 return false;
403 }
404
405 streamWrite(ss, pair.key);
406 switch (pair.val.getTag()) {
407 case RDTypeTag::StringTag:
408 streamWrite(ss, DTags::StringTag);
409 streamWrite(ss, rdvalue_cast<std::string>(pair.val));
410 break;
411 case RDTypeTag::IntTag:
412 streamWrite(ss, DTags::IntTag);
413 streamWrite(ss, rdvalue_cast<int>(pair.val));
414 break;
415 case RDTypeTag::UnsignedIntTag:
416 streamWrite(ss, DTags::UnsignedIntTag);
417 streamWrite(ss, rdvalue_cast<unsigned int>(pair.val));
418 break;
419 case RDTypeTag::BoolTag:
420 streamWrite(ss, DTags::BoolTag);
421 streamWrite(ss, rdvalue_cast<bool>(pair.val));
422 break;
423 case RDTypeTag::FloatTag:
424 streamWrite(ss, DTags::FloatTag);
425 streamWrite(ss, rdvalue_cast<float>(pair.val));
426 break;
427 case RDTypeTag::DoubleTag:
428 streamWrite(ss, DTags::DoubleTag);
429 streamWrite(ss, rdvalue_cast<double>(pair.val));
430 break;
431
432 case RDTypeTag::VecStringTag:
433 streamWrite(ss, DTags::VecStringTag);
434 streamWriteVec(ss, rdvalue_cast<std::vector<std::string>>(pair.val));
435 break;
436 case RDTypeTag::VecDoubleTag:
437 streamWrite(ss, DTags::VecDoubleTag);
438 streamWriteVec(ss, rdvalue_cast<std::vector<double>>(pair.val));
439 break;
440 case RDTypeTag::VecFloatTag:
441 streamWrite(ss, DTags::VecFloatTag);
442 streamWriteVec(ss, rdvalue_cast<std::vector<float>>(pair.val));
443 break;
444 case RDTypeTag::VecIntTag:
445 streamWrite(ss, DTags::VecIntTag);
446 streamWriteVec(ss, rdvalue_cast<std::vector<int>>(pair.val));
447 break;
448 case RDTypeTag::VecUnsignedIntTag:
449 streamWrite(ss, DTags::VecUIntTag);
450 streamWriteVec(ss, rdvalue_cast<std::vector<unsigned int>>(pair.val));
451 break;
452 default:
453 for (auto &handler : handlers) {
454 if (handler->canSerialize(pair.val)) {
455 // The form of a custom tag is
456 // CustomTag
457 // customPropName (must be unique)
458 // custom serialization
459 streamWrite(ss, DTags::CustomTag);
460 streamWrite(ss, std::string(handler->getPropName()));
461 handler->write(ss, pair.val);
462 return true;
463 }
464 }
465
466 return false;
467 }
468 return true;
469 }
470
471 inline bool streamWriteProps(std::ostream &ss, const RDProps &props,
472 bool savePrivate = false,
473 bool saveComputed = false,
474 const CustomPropHandlerVec &handlers = {}) {
475 STR_VECT propsToSave = props.getPropList(savePrivate, saveComputed);
476 std::set<std::string> propnames(propsToSave.begin(), propsToSave.end());
477
478 const Dict &dict = props.getDict();
479 unsigned int count = 0;
480 for (Dict::DataType::const_iterator it = dict.getData().begin();
481 it != dict.getData().end(); ++it) {
482 if (propnames.find(it->key) != propnames.end()) {
483 if (isSerializable(*it, handlers)) {
484 count++;
485 }
486 }
487 }
488
489 streamWrite(ss, count); // packed int?
490
491 unsigned int writtenCount = 0;
492 for (Dict::DataType::const_iterator it = dict.getData().begin();
493 it != dict.getData().end(); ++it) {
494 if (propnames.find(it->key) != propnames.end()) {
495 if (isSerializable(*it, handlers)) {
496 // note - not all properties are serializable, this may be
497 // a null op
498 if (streamWriteProp(ss, *it, handlers)) {
499 writtenCount++;
500 }
501 }
502 }
503 }
504 POSTCONDITION(count == writtenCount,
505 "Estimated property count not equal to written");
506 return true;
507 }
508
509 template <class T>
readRDValue(std::istream & ss,RDValue & value)510 void readRDValue(std::istream &ss, RDValue &value) {
511 T v;
512 streamRead(ss, v);
513 value = v;
514 }
515
516 template <class T>
readRDVecValue(std::istream & ss,RDValue & value)517 void readRDVecValue(std::istream &ss, RDValue &value) {
518 std::vector<T> v;
519 streamReadVec(ss, v);
520 value = v;
521 }
522
readRDValueString(std::istream & ss,RDValue & value)523 inline void readRDValueString(std::istream &ss, RDValue &value) {
524 std::string v;
525 int version = 0;
526 streamRead(ss, v, version);
527 value = v;
528 }
529
readRDStringVecValue(std::istream & ss,RDValue & value)530 inline void readRDStringVecValue(std::istream &ss, RDValue &value) {
531 std::vector<std::string> v;
532 int version = 0;
533 streamReadStringVec(ss, v, version);
534 value = v;
535 }
536
537 inline bool streamReadProp(std::istream &ss, Dict::Pair &pair,
538 bool &dictHasNonPOD,
539 const CustomPropHandlerVec &handlers = {}) {
540 int version = 0;
541 streamRead(ss, pair.key, version);
542
543 unsigned char type;
544 streamRead(ss, type);
545 switch (type) {
546 case DTags::IntTag:
547 readRDValue<int>(ss, pair.val);
548 break;
549 case DTags::UnsignedIntTag:
550 readRDValue<unsigned int>(ss, pair.val);
551 break;
552 case DTags::BoolTag:
553 readRDValue<bool>(ss, pair.val);
554 break;
555 case DTags::FloatTag:
556 readRDValue<float>(ss, pair.val);
557 break;
558 case DTags::DoubleTag:
559 readRDValue<double>(ss, pair.val);
560 break;
561
562 case DTags::StringTag:
563 readRDValueString(ss, pair.val);
564 dictHasNonPOD = true;
565 break;
566 case DTags::VecStringTag:
567 readRDStringVecValue(ss, pair.val);
568 dictHasNonPOD = true;
569 break;
570 case DTags::VecIntTag:
571 readRDVecValue<int>(ss, pair.val);
572 dictHasNonPOD = true;
573 break;
574 case DTags::VecUIntTag:
575 readRDVecValue<unsigned int>(ss, pair.val);
576 dictHasNonPOD = true;
577 break;
578 case DTags::VecFloatTag:
579 readRDVecValue<float>(ss, pair.val);
580 dictHasNonPOD = true;
581 break;
582 case DTags::VecDoubleTag:
583 readRDVecValue<double>(ss, pair.val);
584 dictHasNonPOD = true;
585 break;
586 case DTags::CustomTag: {
587 std::string propType;
588 int version = 0;
589 streamRead(ss, propType, version);
590 for (auto &handler : handlers) {
591 if (propType == handler->getPropName()) {
592 handler->read(ss, pair.val);
593 dictHasNonPOD = true;
594 return true;
595 }
596 }
597 return false;
598 }
599
600 default:
601 return false;
602 }
603 return true;
604 }
605
606 inline unsigned int streamReadProps(std::istream &ss, RDProps &props,
607 const CustomPropHandlerVec &handlers = {}) {
608 unsigned int count;
609 streamRead(ss, count);
610
611 Dict &dict = props.getDict();
612 dict.reset(); // Clear data before repopulating
613 dict.getData().resize(count);
614 for (unsigned index = 0; index < count; ++index) {
615 CHECK_INVARIANT(streamReadProp(ss, dict.getData()[index],
616 dict.getNonPODStatus(), handlers),
617 "Corrupted property serialization detected");
618 }
619
620 return count;
621 }
622
623 } // namespace RDKit
624
625 #endif
626