1 // SPDX-License-Identifier: BSD-3-Clause
2 // Copyright (c) Contributors to the OpenEXR Project.
3
4 //-----------------------------------------------------------------------------
5 //
6 // ID Manifest class implementation
7 //
8 //-----------------------------------------------------------------------------
9
10 #include <ImfIDManifest.h>
11 #include <Iex.h>
12 #include <zlib.h>
13 #include "ImfXdr.h"
14 #include "ImfIO.h"
15
16 #include <stdint.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <algorithm>
20
21 //
22 // debugging only
23 //
24 #ifdef DUMP_TABLE
25 #include <iostream>
26 #endif
27
28 OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_ENTER
29
30
31 using namespace OPENEXR_IMF_INTERNAL_NAMESPACE;
32 using std::make_pair;
33 using std::set;
34 using std::vector;
35 using std::map;
36 using std::string;
37 using std::pair;
38 using std::sort;
39 using std::fill;
40
41 const std::string IDManifest::UNKNOWN = "unknown";
42 const std::string IDManifest::NOTHASHED = "none";
43 const std::string IDManifest::CUSTOMHASH = "custom";
44 const std::string IDManifest::MURMURHASH3_32 = "MurmurHash3_32";
45 const std::string IDManifest::MURMURHASH3_64 = "MurmurHash3_64";
46
47 const std::string IDManifest::ID_SCHEME = "id";
48 const std::string IDManifest::ID2_SCHEME = "id2";
49
50
IDManifest()51 IDManifest::IDManifest() { }
52
53 namespace
54 {
55
56
57 // map of strings to index of string in table
58 typedef std::map<std::string,int> indexedStringSet;
59
60
61
62 // when handling vectors/sets of strings, the string is got by deferencing the pointer/iterator
stringSize(const T & i)63 template<class T> size_t stringSize(const T &i)
64 {
65 return i->size();
66 }
67
cStr(const T & i)68 template<class T> const char* cStr(const T &i)
69 {
70 return i->c_str();
71 }
72
73 /*
74 // but for indexedStringSet the string is the first of the iterator pair
75 size_t stringSize(indexedStringSet::const_iterator &i )
76 {
77 return i->first.size();
78 }
79
80 const char* cStr(indexedStringSet::const_iterator &i)
81 {
82 return i->first.c_str();
83 }
84 */
85
getVariableLengthIntegerSize(uint64_t value)86 size_t getVariableLengthIntegerSize(uint64_t value)
87 {
88
89 if(value < 1llu<<7)
90 {
91 return 1;
92 }
93
94 if(value < 1llu<<14)
95 {
96 return 2;
97 }
98 if(value < 1llu<<21)
99 {
100 return 3;
101 }
102 if(value < 1llu<<28)
103 {
104 return 4;
105 }
106 if(value < 1llu<<35)
107 {
108 return 5;
109 }
110 if(value < 1llu<<42)
111 {
112 return 6;
113 }
114 if(value < 1llu<<49)
115 {
116 return 7;
117 }
118 if(value < 1llu<<56)
119 {
120 return 8;
121 }
122 if(value < 1llu<<63)
123 {
124 return 9;
125 }
126 return 10;
127
128 }
129
readVariableLengthInteger(const char * & readPtr,const char * endPtr)130 uint64_t readVariableLengthInteger(const char*& readPtr,const char* endPtr)
131 {
132 // bytes are stored LSB first, so each byte that is read from the stream must be
133 // shifted before mixing into the existing length
134 int shift=0;
135 unsigned char byte=0;
136 uint64_t value=0;
137 do{
138 if(readPtr>=endPtr)
139 {
140 throw IEX_NAMESPACE::InputExc ("IDManifest too small for variable length integer");
141 }
142 byte = *(unsigned char*)readPtr++;
143 // top bit of byte isn't part of actual number, it just indicates there's more info to come
144 // so take bottom 7 bits, shift them to the right place, and insert them
145 //
146 value|=(uint64_t(byte&127)) << shift;
147 shift+=7;
148 }while(byte&128); //while top bit set on previous byte, there is more to come
149 return value;
150 }
151
writeVariableLengthInteger(char * & outPtr,uint64_t value)152 void writeVariableLengthInteger(char*& outPtr,uint64_t value)
153 {
154 do
155 {
156 unsigned char byte = (unsigned char)(value&127);
157 value>>=7;
158 if(value>0)
159 {
160 byte|=128;
161 }
162 *(unsigned char*) outPtr++ = byte;
163 }
164 while(value>0);
165 }
166
167
168
169 //
170 // read a list of strings into the given container
171 // format is:
172 // numberOfStrings (unless numberOfStrings already passed in)
173 // length of string 0
174 // length of string 1
175 // ...
176 // string 0
177 // string 1
178 // ...
179 // (the sizes come first then the strings because that helps compression performance)
180 // note - updates readPtr to point to first byte after readStrings
181 //
182
183
readStringList(const char * & readPtr,const char * endPtr,T & outputVector,int numberOfStrings=0)184 template<class T> void readStringList(const char*& readPtr,const char* endPtr,T & outputVector,int numberOfStrings=0)
185 {
186 if(numberOfStrings==0)
187 {
188 if(readPtr+4>endPtr)
189 {
190 throw IEX_NAMESPACE::InputExc ("IDManifest too small for string list size");
191 }
192 Xdr::read<CharPtrIO>(readPtr,numberOfStrings);
193 }
194
195 vector<size_t> lengths(numberOfStrings);
196
197 for( int i=0 ; i<numberOfStrings ; ++i )
198 {
199 lengths[i] = readVariableLengthInteger(readPtr,endPtr);
200 }
201 for( int i=0 ; i<numberOfStrings ; ++i )
202 {
203
204 if(readPtr+lengths[i] > endPtr)
205 {
206 throw IEX_NAMESPACE::InputExc ("IDManifest too small for string");
207 }
208 outputVector.insert(outputVector.end(),string(readPtr , lengths[i]));
209 readPtr += lengths[i];
210 }
211 }
212
213 //
214 // computes number of bytes required to serialize vector/set of strings
215 //
getStringListSize(const T & stringList,size_t entries=0)216 template<typename T> int getStringListSize(const T& stringList , size_t entries=0)
217 {
218 int totalSize=0;
219 if(entries==0)
220 {
221 totalSize+=4; // 4 bytes to store number of entries;
222 }
223 else
224 {
225 if(stringList.size()!=entries)
226 {
227 throw IEX_NAMESPACE::InputExc ("Incorrect number of components stored in ID Manifest");
228 }
229 }
230 for(typename T::const_iterator i = stringList.begin() ; i!=stringList.end() ; ++i)
231 {
232 size_t length = stringSize(i);
233 totalSize+=length;
234 // up to five bytes for variable length encoded size
235
236 totalSize+=getVariableLengthIntegerSize(length);
237
238
239
240 }
241 return totalSize;
242 }
243
244 //
245 // write string list to outPtr. if entries nonzero, omits number of entries,
246 // but confirms 'entries' == T.size()
247 //
248 template<typename T> void
writeStringList(char * & outPtr,const T & stringList,int entries=0)249 writeStringList( char*& outPtr,const T& stringList,int entries=0)
250 {
251 int size = stringList.size();
252 if(entries==0)
253 {
254 Xdr::write<CharPtrIO>(outPtr,size);
255 }
256 else
257 {
258 if(size!=entries)
259 {
260 throw IEX_NAMESPACE::InputExc ("Incorrect number of components stored in ID Manifest");
261 }
262 }
263 for(typename T::const_iterator i = stringList.begin() ; i!=stringList.end() ; ++i)
264 {
265 int stringLength = stringSize(i);
266 //
267 // variable length encoding:
268 // values between 0 and 127 inclusive are stored in a single byte
269 // values betwwen 128 and 16384 are encoded with two bytes: 1LLLLLLL 0MMMMMMMM where L and M are the least and most significant bits of the value
270 // in general, values are stored least significant values first, with the top bit of each byte indicating more values follow
271 // the top bit is clear in the last byte of the value
272 // (this scheme requires two bytes to store values above 1<<7, and five bytes to store values above 1<<28)
273 //
274
275 writeVariableLengthInteger(outPtr,stringLength);
276
277
278 }
279
280 for(typename T::const_iterator i = stringList.begin() ; i!=stringList.end() ; ++i)
281 {
282 int stringLength = stringSize(i);
283 Xdr::write<CharPtrIO>(outPtr,(const char*) cStr(i),stringLength);
284 }
285 }
286
getStringSize(const string & str)287 int getStringSize(const string & str)
288 {
289 return 4+str.size();
290 }
291
readPascalString(const char * & readPtr,const char * endPtr,string & outputString)292 void readPascalString(const char*& readPtr, const char* endPtr, string& outputString)
293 {
294
295 if(readPtr+4>endPtr)
296 {
297 throw IEX_NAMESPACE::InputExc ("IDManifest too small for string size");
298
299 }
300 unsigned int length=0;
301 Xdr::read<CharPtrIO>(readPtr,length);
302
303 if(readPtr+length>endPtr)
304 {
305 throw IEX_NAMESPACE::InputExc ("IDManifest too small for string");
306 }
307 outputString = string((const char*) readPtr,length);
308 readPtr+=length;
309 }
310
311 void
writePascalString(char * & outPtr,const string & str)312 writePascalString(char*& outPtr,const string& str)
313 {
314 unsigned int length = str.size();
315 Xdr::write<CharPtrIO>((char*&) outPtr,length);
316 Xdr::write<CharPtrIO>((char*&) outPtr,(const char*) str.c_str(),length);
317 }
318
319
320
321 }
322
IDManifest(const char * data,const char * endOfData)323 IDManifest::IDManifest(const char* data,const char* endOfData)
324 {
325 init(data,endOfData);
326 }
327
init(const char * data,const char * endOfData)328 void IDManifest::init(const char* data, const char* endOfData)
329 {
330
331
332
333 unsigned int version;
334 Xdr::read<CharPtrIO>( data,version);
335 if(version!=0)
336 {
337 throw IEX_NAMESPACE::InputExc ("Unrecognized IDmanifest version");
338 }
339
340 //
341 // first comes list of all strings used in manifest
342 //
343 vector<string> stringList;
344 readStringList(data,endOfData,stringList);
345
346 //
347 // expand the strings in the stringlist
348 // each string begins with number of characters to copy from the previous string
349 // the remainder is the 'new' bit that appears after that
350 //
351
352 for(size_t i=1;i<stringList.size();++i)
353 {
354
355 size_t common; // number of characters in common with previous string
356 int stringStart=1; // first character of string itself;
357 //
358 // previous string had more than 255 characters?
359 //
360 if(stringList[i-1].size()>255)
361 {
362 common = size_t( ((unsigned char) (stringList[i][0])) <<8 ) + size_t( (unsigned char) (stringList[i][1]));
363 stringStart=2;
364 }
365 else
366 {
367 common = (unsigned char) stringList[i][0];
368 }
369 if(common>stringList[i-1].size())
370 {
371 throw IEX_NAMESPACE::InputExc ("Bad common string length in IDmanifest string table");
372 }
373 stringList[i] = stringList[i-1].substr(0,common)+stringList[i].substr(stringStart);
374
375 }
376
377 //
378 // decode mapping table from indices in table to indices in string list
379 // the mapping uses smaller indices for more commonly occuring strings, since these are encoded with fewer bits
380 // comments in serialize function describe the format
381 //
382
383 vector<int> mapping(stringList.size());
384
385
386 //
387 // overlapping sequences: A list [(4,5),(3,6)] expands to 4,5,3,6 - because 4 and 5 are including already
388 // they are not included again
389 // the 'seen' list indicates which values have already been used, so they are not re-referenced
390 //
391
392 vector<char> seen(stringList.size());
393
394 int rleLength;
395 if(endOfData<data+4)
396 {
397 throw IEX_NAMESPACE::InputExc ("IDManifest too small");
398 }
399
400 Xdr::read<CharPtrIO>( data,rleLength);
401
402 int currentIndex=0;
403 for(int i=0;i<rleLength;++i)
404 {
405 int first;
406 int last;
407 if(endOfData<data+8)
408 {
409 throw IEX_NAMESPACE::InputExc ("IDManifest too small");
410 }
411 Xdr::read<CharPtrIO>( data,first);
412 Xdr::read<CharPtrIO>( data,last);
413
414 if(first<0 || last<0 || first>last || first>= int(stringList.size()) || last>=int(stringList.size()))
415 {
416 throw IEX_NAMESPACE::InputExc ("Bad mapping table entry in IDManifest");
417 }
418 for(int entry=first ; entry<=last;entry++)
419 {
420 // don't remap already mapped values
421 if(seen[entry]==0)
422 {
423 mapping[currentIndex]=entry;
424 seen[entry]=1;
425 currentIndex++;
426 }
427 }
428
429 }
430
431 #ifdef DUMP_TABLE
432 //
433 // dump mapping table for debugging
434 //
435 for(size_t i=0;i<mapping.size();++i)
436 {
437 std::cout << i << ' ' << mapping[i] << std::endl;
438 }
439 #endif
440
441
442
443
444 //
445 // number of manifest entries comes after string list
446 //
447 int manifestEntries;
448
449 if(endOfData<data+4)
450 {
451 throw IEX_NAMESPACE::InputExc ("IDManifest too small");
452 }
453
454 Xdr::read<CharPtrIO>( data,manifestEntries);
455
456 _manifest.clear();
457
458 _manifest.resize(manifestEntries);
459
460 for(int manifestEntry = 0 ; manifestEntry < manifestEntries ; ++manifestEntry)
461 {
462
463 ChannelGroupManifest& m = _manifest[manifestEntry];
464
465 //
466 // read header of this manifest entry
467 //
468 readStringList(data,endOfData,m._channels);
469 readStringList(data,endOfData,m._components);
470
471 char lifetime;
472 if(endOfData<data+4)
473 {
474 throw IEX_NAMESPACE::InputExc ("IDManifest too small");
475 }
476 Xdr::read<CharPtrIO>(data,lifetime);
477
478
479
480 m.setLifetime(IdLifetime(lifetime));
481 readPascalString(data,endOfData,m._hashScheme);
482 readPascalString(data,endOfData,m._encodingScheme);
483
484 if(endOfData<data+5)
485 {
486 throw IEX_NAMESPACE::InputExc ("IDManifest too small");
487 }
488 char storageScheme;
489 Xdr::read<CharPtrIO>(data,storageScheme);
490
491 int tableSize;
492 Xdr::read<CharPtrIO>(data,tableSize);
493
494 uint64_t previousId=0;
495
496 for(int entry = 0 ; entry < tableSize ; ++entry)
497 {
498 uint64_t id;
499
500 switch(storageScheme)
501 {
502 case 0 :
503 {
504 if(endOfData<data+8)
505 {
506 throw IEX_NAMESPACE::InputExc ("IDManifest too small");
507 }
508 Xdr::read<CharPtrIO>(data,id);
509 break;
510 }
511 case 1 :
512 {
513 if(endOfData<data+4)
514 {
515 throw IEX_NAMESPACE::InputExc ("IDManifest too small");
516 }
517 unsigned int id32;
518 Xdr::read<CharPtrIO>(data,id32);
519 id = id32;
520 break;
521 }
522 default :
523 {
524 id = readVariableLengthInteger(data,endOfData);
525 }
526
527 }
528
529 id+=previousId;
530 previousId=id;
531
532 //
533 // insert into table - insert tells us if it was already there
534 //
535 pair< map< uint64_t, vector<string> >::iterator,bool> insertion = m._table.insert(make_pair(id,vector<string>()));
536 if(insertion.second==false)
537 {
538 throw IEX_NAMESPACE::InputExc("ID manifest contains multiple entries for the same ID");
539 }
540 (insertion.first)->second.resize(m.getComponents().size());
541 for(size_t i=0;i<m.getComponents().size();++i)
542 {
543 int stringIndex = readVariableLengthInteger(data,endOfData);
544 if(size_t(stringIndex)>stringList.size() || stringIndex<0)
545 {
546 throw IEX_NAMESPACE::InputExc ("Bad string index in IDManifest");
547 }
548 (insertion.first)->second[i]=stringList[ mapping[ stringIndex] ];
549 }
550 }
551 }
552 }
553
554
IDManifest(const CompressedIDManifest & compressed)555 IDManifest::IDManifest(const CompressedIDManifest& compressed)
556 {
557 //
558 // decompress the compressed manifest
559 //
560
561
562 vector<Bytef> uncomp(compressed._uncompressedDataSize);
563 uLongf outSize = compressed._uncompressedDataSize;
564 if(Z_OK != ::uncompress(&uncomp[0] , &outSize , (const Bytef*) compressed._data, compressed._compressedDataSize))
565 {
566 throw IEX_NAMESPACE::InputExc ("IDManifest decompression (zlib) failed.");
567 }
568 if(outSize!=compressed._uncompressedDataSize)
569 {
570 throw IEX_NAMESPACE::InputExc ("IDManifest decompression (zlib) failed: mismatch in decompressed data size");
571 }
572
573 init((const char*) &uncomp[0],(const char*) &uncomp[0] + outSize);
574
575
576
577 }
578
579
serialize(std::vector<char> & data) const580 void IDManifest::serialize(std::vector< char >& data) const
581 {
582
583
584
585 indexedStringSet stringSet;
586
587 //
588 // build string map - this turns unique strings into indices
589 // the manifest stores the string indices - this allows duplicated
590 // strings to point to the same place
591 // grabs all the strings regardless of which manifest/mapping they are in
592 //
593 // at this point we just count the manifest entries
594 //
595 {
596 //
597 // over each channel group
598 //
599 for(size_t m=0;m<_manifest.size();++m)
600 {
601 // over each mapping
602 for( IDManifest::ChannelGroupManifest::IDTable::const_iterator i = _manifest[m]._table.begin(); i!=_manifest[m]._table.end(); ++i)
603 {
604 // over each string in the mapping
605
606 for(size_t s = 0 ; s < i->second.size() ; ++s)
607 {
608 stringSet[ i->second[s] ]++;
609 }
610 }
611 }
612 }
613
614
615
616 //
617 // build compressed string representation - all but first string starts with number of characters to copy from previous string.
618 // max 65535 bytes - use two bytes to store if previous string was more than 255 characters, big endian
619 //
620 vector<string> prefixedStringList(stringSet.size());
621
622 //
623 // also make a sorted list so the most common entry appears first. Keep equally likely entries in numerical order
624 //
625 vector< pair<int,int> > sortedIndices(stringSet.size());
626
627 string prevString;
628 int index=0;
629 for( indexedStringSet::iterator i = stringSet.begin() ; i!= stringSet.end() ; ++i)
630 {
631
632
633 // no prefix on first string - map stores index of each string, so use that rather than a counter;
634 if(index==0)
635 {
636 prefixedStringList[index]=i->first;
637 }
638 else
639 {
640 size_t common=0;
641 while(common<65535
642 && common < prevString.size()
643 && common < i->first.size()
644 && prevString[common] == i->first[common])
645 {
646 ++common;
647 }
648
649 if(prevString.size()>255)
650 {
651 //
652 // long previous string - use two bytes to encode number of common chars
653 //
654 prefixedStringList[index] = string( 1, char(common>>8)) + string(1,char(common&255)) + i->first.substr(common);
655 }
656 else
657 {
658 prefixedStringList[index] = string( 1,char(common)) + i->first.substr(common);
659 }
660
661 }
662 prevString = i->first;
663 sortedIndices[index].first = -i->second; // use negative of count so largest count appears first
664 sortedIndices[index].second = index;
665
666 //
667 // also, repurpose stringSet so that it maps from string names to indices in the string table
668 //
669 i->second = index;
670
671 index++;
672 }
673
674 sort( sortedIndices.begin(),sortedIndices.end());
675
676 //
677 // the first 1<<7 characters will all be encoded with 1 byte, regardless of how common they are
678 // the next 1<<14 characters will be encoded with 2 bytes
679 // (a full huffman encode would do this at the bit level, not the byte level)
680 //
681 // the mapping table can be reduced in size by rewriting the IDs to exploit that
682 // can rearrange the IDs to have more long runs by sorting numbers
683 // that will need the same number of bytes to encode together
684 //
685 {
686 size_t i=0;
687
688 for(;i<sortedIndices.size() && i <1<<7;++i)
689 {
690 sortedIndices[i].first=1;
691 }
692 for(;i<sortedIndices.size() && i <1<<14;++i)
693 {
694 sortedIndices[i].first=2;
695 }
696 for(;i<sortedIndices.size() && i <1<<21;++i)
697 {
698 sortedIndices[i].first=3;
699 }
700 for(;i<sortedIndices.size() && i <1<<28;++i)
701 {
702 sortedIndices[i].first=4;
703 }
704 for(;i<sortedIndices.size();++i)
705 {
706 sortedIndices[i].first=5;
707 }
708 }
709 sort( sortedIndices.begin(),sortedIndices.end());
710
711
712
713 vector<int> stringIndices(sortedIndices.size());
714
715 //
716 // table will be stored with RLE encoding - store pairs of 'start index,end index'
717 // so, the sequence 10,11,12,1,2,3,4 is stored as [ (10,12) , (1,4)]
718 //
719 // sequential IDs ignore already referenced IDs, so the sequence 11,9,10,12,13 can be stored as [ (11,11) , (9,13)]
720 // on reading, don't reference an entry that has already been seen
721 // on writing, need to track which entries have already been stored to allow this overlapping to occur
722 //
723
724 vector< pair<int,int> > RLEmapping;
725
726 if(sortedIndices.size()>0)
727 {
728 RLEmapping.resize(1);
729 RLEmapping[0].first = sortedIndices[0].second;
730 RLEmapping[0].second = sortedIndices[0].second;
731
732 fill(stringIndices.begin(),stringIndices.end(),-1);
733
734 stringIndices[sortedIndices[0].second]=0;
735
736 //
737 // as the loop below runs, nextToInclude tracks the value that can be merged with the current run length
738 // (RLWmapping.back()) - generally this is on more than the current length, but it jumps forward
739 // over values already seen
740 //
741 int nextToInclude=stringIndices[sortedIndices[0].second]+1;
742
743
744 for( size_t i=1;i<sortedIndices.size() ; ++i)
745 {
746 if( sortedIndices[i].second == nextToInclude)
747 {
748 //
749 // this index can be treated as part of the current run, so extend the run to include it
750 //
751 RLEmapping.back().second=sortedIndices[i].second;
752
753 }
754 else
755 {
756 pair<int,int> newEntry(sortedIndices[i].second,sortedIndices[i].second);
757 RLEmapping.push_back(newEntry);
758 }
759 // build mapping for this entry
760 stringIndices[sortedIndices[i].second]=i;
761
762 // what would the next entry have to be to be included in this run
763 // skip over already mapped strings
764 nextToInclude=sortedIndices[i].second+1;
765
766
767 while ( nextToInclude < int(stringIndices.size()) && stringIndices[nextToInclude]>=0)
768 {
769 nextToInclude++;
770 }
771 }
772 }
773 #ifdef DUMP_TABLE
774 // dump RLE table for debugging
775 for( size_t i=1;i<sortedIndices.size() ; ++i)
776 {
777 std::cout << i << ' ' << sortedIndices[i].second << std::endl;
778 }
779 #endif
780
781
782
783 // now compute size of uncompressed memory block for serialization
784
785 int outputSize = 8; // at least need four bytes for integer to store number of channel manifests, plus four bytes to indicate version pattern
786
787 outputSize += getStringListSize(prefixedStringList);
788
789 //
790 // RLE mapping table size - number of entries followed by eight bytes for each run length
791 //
792 outputSize+= RLEmapping.size()*8+4;
793
794 //
795 // track which storage scheme is optimal for storing the IDs of each type
796 // ID storage scheme: 0 = 8 bytes per ID, 1 = 4 bytes per ID, 2 = variable
797 //
798
799 std::vector<char> storageSchemes;
800
801 for(size_t groupNumber = 0 ; groupNumber < _manifest.size() ; ++groupNumber)
802 {
803 const ChannelGroupManifest& m = _manifest[groupNumber];
804 outputSize += getStringListSize(m._channels); //size of channel group
805 outputSize += getStringListSize(m._components); //size of component list
806 outputSize += 1; //size of lifetime enum
807 outputSize += getStringSize(m._hashScheme);
808 outputSize += getStringSize(m._encodingScheme);
809
810 outputSize += 1; // ID scheme
811 outputSize += 4; // size of storage for number of 32 bit entries in ID table
812
813 uint64_t previousId=0;
814 uint64_t IdStorageForVariableScheme = 0;
815 bool canUse32Bits = true;
816 for( IDManifest::ChannelGroupManifest::IDTable::const_iterator i = m._table.begin(); i!=m._table.end(); ++i)
817 {
818
819 uint64_t idToStore = i->first-previousId;
820 IdStorageForVariableScheme+=getVariableLengthIntegerSize(idToStore);
821 if(idToStore >= 1llu<<32)
822 {
823 canUse32Bits = false;
824 }
825 previousId=i->first;
826
827 for(size_t s=0;s<m._components.size();++s)
828 {
829 int stringID = stringSet[ i->second[s] ];
830 int idToWrite = stringIndices[stringID];
831 outputSize+= getVariableLengthIntegerSize(idToWrite);
832 }
833 }
834 // pick best scheme to use to store IDs
835 if(canUse32Bits)
836 {
837 if(IdStorageForVariableScheme < m._table.size()*4)
838 {
839 //
840 // variable storage smaller than fixed 32 bit, so use that
841 //
842 storageSchemes.push_back(2);
843 outputSize+=IdStorageForVariableScheme;
844 }
845 else
846 {
847 //
848 // variable scheme bigger than fixed 32 bit, but all ID differences fit into 32 bits
849 //
850 storageSchemes.push_back(1);
851 outputSize+=m._table.size()*4;
852 }
853 }
854 else
855 {
856 if(IdStorageForVariableScheme < m._table.size()*8)
857 {
858 //
859 // variable storage smaller than fixed 64 bit, so use that
860 //
861 storageSchemes.push_back(2);
862 outputSize+=IdStorageForVariableScheme;
863 }
864 else
865 {
866 //
867 // variable scheme bigger than fixed 64 bit, and some ID differences bigger than 32 bit
868 //
869 storageSchemes.push_back(0);
870 outputSize+=m._table.size()*8;
871 }
872 }
873
874 }
875
876 //
877 // resize output array
878 //
879 data.resize(outputSize);
880
881
882
883 //
884 // populate output array
885 //
886 char* outPtr = &data[0];
887
888 //
889 // zeroes to indicate this is version 0 of the header
890 //
891 Xdr::write<CharPtrIO>(outPtr, int(0));
892
893 //
894 // table of strings
895 //
896 writeStringList(outPtr,prefixedStringList);
897
898
899 //
900 // RLE block
901 //
902 Xdr::write<CharPtrIO>(outPtr, int(RLEmapping.size()));
903 for(size_t i=0;i<RLEmapping.size();++i)
904 {
905 Xdr::write<CharPtrIO>(outPtr, RLEmapping[i].first);
906 Xdr::write<CharPtrIO>(outPtr, RLEmapping[i].second);
907 }
908
909
910
911 //
912 // number of manifests
913 //
914 Xdr::write<CharPtrIO>(outPtr, int(_manifest.size()));
915 int manifestIndex=0;
916
917 for(size_t groupNumber = 0 ; groupNumber < _manifest.size() ; ++groupNumber)
918 {
919 const ChannelGroupManifest& m = _manifest[groupNumber];
920 //
921 // manifest header
922 //
923 writeStringList(outPtr,m._channels);
924 writeStringList(outPtr,m._components);
925 Xdr::write<CharPtrIO>(outPtr,char(m._lifeTime));
926 writePascalString(outPtr,m._hashScheme);
927 writePascalString(outPtr,m._encodingScheme);
928
929 char scheme = storageSchemes[manifestIndex];
930 Xdr::write<CharPtrIO>(outPtr,scheme);
931
932 Xdr::write<CharPtrIO>(outPtr,int(m._table.size()));
933
934 uint64_t previousId=0;
935 //
936 // table
937 //
938 for( IDManifest::ChannelGroupManifest::IDTable::const_iterator i = m._table.begin(); i!=m._table.end(); ++i)
939 {
940
941 uint64_t idToWrite = i->first-previousId;
942 switch(scheme)
943 {
944 case 0 : Xdr::write<CharPtrIO>(outPtr, idToWrite);break;
945 case 1 : Xdr::write<CharPtrIO>(outPtr,(unsigned int) idToWrite);break;
946 case 2 : writeVariableLengthInteger(outPtr,idToWrite);
947 }
948
949 previousId=i->first;
950
951 for(size_t s=0;s<m._components.size();++s)
952 {
953 int stringID = stringSet[ i->second[s] ];
954 int idToWrite = stringIndices[stringID];
955 writeVariableLengthInteger(outPtr , idToWrite );
956 }
957 }
958 manifestIndex++;
959 }
960 //
961 // check we've written the ID manifest correctly
962 //
963 if(outPtr!=&data[0]+data.size())
964 {
965 throw IEX_NAMESPACE::ArgExc("Error - IDManifest size error");
966 }
967 }
968
969 bool
operator ==(const IDManifest & other) const970 IDManifest::operator==(const IDManifest& other) const
971 {
972 return other._manifest == _manifest;
973 }
974
975 bool
operator !=(const IDManifest & other) const976 IDManifest::operator!=(const IDManifest& other) const
977 {
978 return !(*this==other);
979 }
980
981 bool
merge(const IDManifest & other)982 IDManifest::merge(const IDManifest& other)
983 {
984 bool conflict = false;
985 for(size_t otherManifest = 0 ; otherManifest < other._manifest.size() ; ++ otherManifest)
986 {
987 bool merged = false;
988 for(size_t thisManifest = 0 ; thisManifest < _manifest.size() ; ++ thisManifest)
989 {
990 if( _manifest[thisManifest]._channels == other._manifest[otherManifest]._channels)
991 {
992 // found same channels
993
994 merged = true;
995
996 if(other._manifest[otherManifest]._components != _manifest[thisManifest]._components)
997 {
998 // cannot merge if components are different
999 conflict = true;
1000 }
1001 else
1002 {
1003
1004 // if(other._manifest[otherManifest]._encodingScheme != _manifest[thisManifest]._encodingScheme ||
1005 // other._manifest[otherManifest]._hashScheme != _manifest[thisManifest]._hashScheme ||
1006 // other._manifest[otherManifest]._hashScheme != _manifest[thisManifest]._hashScheme ||
1007 // other._manifest[otherManifest]._lifeTime != _manifest[thisManifest]._lifeTime)
1008 // {
1009 // conflict = true;
1010 // }
1011
1012 for( IDManifest::ChannelGroupManifest::ConstIterator it = other._manifest[otherManifest].begin() ; it!= other._manifest[otherManifest].end() ; ++it)
1013 {
1014 IDManifest::ChannelGroupManifest::ConstIterator ours = _manifest[thisManifest].find( it.id());
1015 if( ours == _manifest[thisManifest].end())
1016 {
1017 _manifest[thisManifest].insert( it.id() , it.text());
1018 }
1019 else
1020 {
1021 if(ours.text() != it.text())
1022 {
1023 conflict = true;
1024 }
1025 }
1026 }
1027 }
1028 }
1029 }
1030
1031 if(!merged)
1032 {
1033 _manifest.push_back(other._manifest[otherManifest]);
1034 }
1035
1036
1037 }
1038
1039 return conflict;
1040
1041 }
1042
1043
CompressedIDManifest()1044 CompressedIDManifest::CompressedIDManifest() : _compressedDataSize(0) , _uncompressedDataSize(0) , _data(NULL) {}
1045
1046
CompressedIDManifest(const CompressedIDManifest & other)1047 CompressedIDManifest::CompressedIDManifest(const CompressedIDManifest& other)
1048 : _compressedDataSize(other._compressedDataSize) ,
1049 _uncompressedDataSize(other._uncompressedDataSize) ,
1050 _data((unsigned char*) malloc(other._compressedDataSize) )
1051 {
1052 memcpy(_data,other._data,_compressedDataSize);
1053 }
1054
1055 CompressedIDManifest&
operator =(const CompressedIDManifest & other)1056 CompressedIDManifest::operator=(const CompressedIDManifest& other)
1057 {
1058 if(this!=&other)
1059 {
1060 if(_data)
1061 {
1062 free(_data);
1063 }
1064 _data = (unsigned char*) malloc( other._compressedDataSize);
1065 _compressedDataSize = other._compressedDataSize;
1066 _uncompressedDataSize = other._uncompressedDataSize;
1067 memcpy(_data,other._data,_compressedDataSize);
1068 }
1069 return *this;
1070 }
1071
1072
1073
~CompressedIDManifest()1074 CompressedIDManifest::~CompressedIDManifest()
1075 {
1076 if(_data)
1077 {
1078 free(_data);
1079 }
1080 _data = NULL;
1081 _compressedDataSize=0;
1082 }
1083
1084
1085
CompressedIDManifest(const IDManifest & manifest)1086 CompressedIDManifest::CompressedIDManifest(const IDManifest& manifest)
1087 {
1088 //
1089 // make a compressed copy of the manifest by serializing the data into contiguous memory,
1090 // then calling zlib to compress
1091 //
1092
1093
1094 std::vector<char> serial;
1095
1096 manifest.serialize(serial);
1097
1098 uLong outputSize = serial.size();
1099
1100 //
1101 // allocate a buffer which is guaranteed to be big enough for compression
1102 //
1103 uLongf compressedDataSize = compressBound(outputSize);
1104 _data = (unsigned char*) malloc(compressedDataSize);
1105 if(Z_OK != ::compress(_data,&compressedDataSize,(Bytef*) &serial[0],outputSize))
1106 {
1107 throw IEX_NAMESPACE::InputExc("ID manifest compression failed");
1108 }
1109
1110 // now call realloc to reallocate the buffer to a smaller size - this might free up memory
1111 _data = (unsigned char*) realloc(_data,compressedDataSize);
1112
1113 _uncompressedDataSize = outputSize;
1114 _compressedDataSize = compressedDataSize;
1115
1116 }
1117
ChannelGroupManifest()1118 IDManifest::ChannelGroupManifest::ChannelGroupManifest() : _lifeTime(IDManifest::LIFETIME_STABLE) , _hashScheme(IDManifest::UNKNOWN) , _encodingScheme(IDManifest::UNKNOWN) , _insertingEntry(false)
1119 {
1120
1121 }
1122
getComponents() const1123 const vector< string >& IDManifest::ChannelGroupManifest::getComponents() const
1124 {
1125 return _components;
1126 }
1127
getChannels()1128 set< string >& IDManifest::ChannelGroupManifest::getChannels()
1129 {
1130 return _channels;
1131 }
1132
getChannels() const1133 const set< string >& IDManifest::ChannelGroupManifest::getChannels() const
1134 {
1135 return _channels;
1136 }
1137
setChannel(const string & channel)1138 void IDManifest::ChannelGroupManifest::setChannel(const string& channel)
1139 {
1140 _channels.clear();
1141 _channels.insert(channel);
1142 }
1143
setChannels(const set<string> & channels)1144 void IDManifest::ChannelGroupManifest::setChannels(const set< string >& channels)
1145 {
1146 _channels = channels;
1147 }
1148
1149
1150
1151
1152
1153
1154 //
1155 // set number of components of table
1156 //
setComponents(const std::vector<std::string> & components)1157 void IDManifest::ChannelGroupManifest::setComponents(const std::vector< std::string >& components)
1158 {
1159
1160 // if there are already entries in the table, cannot change the number of components
1161 if(_table.size()!=0 && components.size()!=_components.size())
1162 {
1163 THROW (IEX_NAMESPACE::ArgExc, "attempt to change number of components in manifest once entries have been added");
1164 }
1165 _components = components;
1166 }
1167
setComponent(const std::string & component)1168 void IDManifest::ChannelGroupManifest::setComponent(const std::string & component)
1169 {
1170 vector<string> components(1);
1171 components[0] = component;
1172 setComponents(components);
1173 }
1174
1175 IDManifest::ChannelGroupManifest::ConstIterator
begin() const1176 IDManifest::ChannelGroupManifest::begin() const
1177 {
1178 return IDManifest::ChannelGroupManifest::ConstIterator(_table.begin());
1179 }
1180
1181 IDManifest::ChannelGroupManifest::Iterator
begin()1182 IDManifest::ChannelGroupManifest::begin()
1183 {
1184 return IDManifest::ChannelGroupManifest::Iterator(_table.begin());
1185 }
1186
1187 IDManifest::ChannelGroupManifest::ConstIterator
end() const1188 IDManifest::ChannelGroupManifest::end() const
1189 {
1190 return IDManifest::ChannelGroupManifest::ConstIterator(_table.end());
1191 }
1192
end()1193 IDManifest::ChannelGroupManifest::Iterator IDManifest::ChannelGroupManifest::end()
1194 {
1195 return IDManifest::ChannelGroupManifest::Iterator(_table.end());
1196 }
1197
1198 IDManifest::ChannelGroupManifest::ConstIterator
find(uint64_t idValue) const1199 IDManifest::ChannelGroupManifest::find(uint64_t idValue) const
1200 {
1201 return IDManifest::ChannelGroupManifest::ConstIterator(_table.find(idValue));
1202 }
1203
1204 void
erase(uint64_t idValue)1205 IDManifest::ChannelGroupManifest::erase(uint64_t idValue)
1206 {
1207 _table.erase(idValue);
1208 }
1209 size_t
size() const1210 IDManifest::ChannelGroupManifest::size() const
1211 {
1212 return _table.size();
1213 }
1214
1215
1216
find(uint64_t idValue)1217 IDManifest::ChannelGroupManifest::Iterator IDManifest::ChannelGroupManifest::find(uint64_t idValue)
1218 {
1219 return IDManifest::ChannelGroupManifest::Iterator(_table.find(idValue));
1220 }
1221
operator [](uint64_t idValue)1222 std::vector< std::string >& IDManifest::ChannelGroupManifest::operator[](uint64_t idValue)
1223 {
1224 return _table[idValue];
1225 }
1226
1227
1228
1229 IDManifest::ChannelGroupManifest::Iterator
insert(uint64_t idValue,const std::string & text)1230 IDManifest::ChannelGroupManifest::insert(uint64_t idValue, const std::string& text)
1231 {
1232 if(_components.size()!=1)
1233 {
1234 THROW (IEX_NAMESPACE::ArgExc, "Cannot insert single component attribute into manifest with multiple components");
1235 }
1236 vector<string> tempVector(1);
1237 tempVector[0] = text;
1238 return IDManifest::ChannelGroupManifest::Iterator(_table.insert(make_pair(idValue,tempVector)).first);
1239 }
1240
1241
1242 IDManifest::ChannelGroupManifest::Iterator
insert(uint64_t idValue,const std::vector<std::string> & text)1243 IDManifest::ChannelGroupManifest::insert(uint64_t idValue, const std::vector< std::string >& text)
1244 {
1245 if(_components.size()!=text.size())
1246 {
1247 THROW (IEX_NAMESPACE::ArgExc, "mismatch between number of components in manifest and number of components in inserted entry");
1248 }
1249 return IDManifest::ChannelGroupManifest::Iterator(_table.insert(make_pair(idValue,text)).first);
1250 }
1251
1252
1253
1254 uint64_t
insert(const std::vector<std::string> & text)1255 IDManifest::ChannelGroupManifest::insert(const std::vector< std::string >& text)
1256 {
1257 uint64_t hash;
1258 if(_hashScheme == MURMURHASH3_32)
1259 {
1260 hash = MurmurHash32(text);
1261 }
1262 else if(_hashScheme == MURMURHASH3_64)
1263 {
1264 hash = MurmurHash64(text);
1265 }
1266 else
1267 {
1268 THROW (IEX_NAMESPACE::ArgExc, "Cannot compute hash: unknown hashing scheme");
1269 }
1270 insert(hash,text);
1271 return hash;
1272
1273 }
1274
1275 uint64_t
insert(const std::string & text)1276 IDManifest::ChannelGroupManifest::insert(const std::string & text)
1277 {
1278 uint64_t hash;
1279 if(_hashScheme == MURMURHASH3_32)
1280 {
1281 hash = MurmurHash32(text);
1282 }
1283 else if(_hashScheme == MURMURHASH3_64)
1284 {
1285 hash = MurmurHash64(text);
1286 }
1287 else
1288 {
1289 THROW (IEX_NAMESPACE::ArgExc, "Cannot compute hash: unknown hashing scheme");
1290 }
1291 insert(hash,text);
1292 return hash;
1293 }
1294
1295
1296
1297 IDManifest::ChannelGroupManifest&
operator <<(uint64_t idValue)1298 IDManifest::ChannelGroupManifest::operator<<(uint64_t idValue)
1299 {
1300 if(_insertingEntry)
1301 {
1302 THROW (IEX_NAMESPACE::ArgExc,"not enough components inserted into previous entry in ID table before inserting new entry");
1303 }
1304
1305 _insertionIterator = _table.insert( make_pair(idValue,std::vector<std::string>() )).first;
1306
1307 //
1308 // flush out previous entry: reinserting an attribute overwrites previous entry
1309 //
1310 _insertionIterator->second.resize(0);
1311
1312
1313 //
1314 // curious edge-case: it's possible to have an ID table with no strings, just a list of IDs
1315 // There's little purpose to this, but it means that this entry is now 'complete'
1316 //
1317 if(_components.size()==0)
1318 {
1319 _insertingEntry = false;
1320 }
1321 else
1322 {
1323 _insertingEntry = true;
1324 }
1325 return *this;
1326 }
1327
1328 IDManifest::ChannelGroupManifest&
operator <<(const std::string & text)1329 IDManifest::ChannelGroupManifest::operator<<(const std::string& text)
1330 {
1331 if(!_insertingEntry)
1332 {
1333 THROW (IEX_NAMESPACE::ArgExc,"attempt to insert too many strings into entry, or attempt to insert text before ID integer");
1334 }
1335 if(_insertionIterator->second.size() >= _components.size())
1336 {
1337 THROW (IEX_NAMESPACE::ArgExc,"Internal error: too many strings in component");
1338 }
1339 _insertionIterator->second.push_back(text);
1340
1341 //
1342 // if the last component has been inserted, switch off insertingEntry, to mark all entries as complete
1343 //
1344 if(_insertionIterator->second.size() == _components.size())
1345 {
1346 _insertingEntry = false;
1347 }
1348 return *this;
1349 }
1350
1351
1352 bool
operator ==(const IDManifest::ChannelGroupManifest & other) const1353 IDManifest::ChannelGroupManifest::operator==(const IDManifest::ChannelGroupManifest& other) const
1354 {
1355 return (_lifeTime == other._lifeTime &&
1356 _components == other._components &&
1357 _hashScheme == other._hashScheme &&
1358 _components == other._components &&
1359 _table == other._table);
1360
1361 }
1362
1363
1364 size_t
size() const1365 IDManifest::size() const
1366 {
1367 return _manifest.size();
1368 }
1369
1370 size_t
find(const string & channel) const1371 IDManifest::find(const string& channel) const
1372 {
1373 // search the set of channels for each ChannelGroupManifest searching for
1374 // one that contains 'channel'
1375 for( size_t i = 0 ; i < _manifest.size() ; ++i )
1376 {
1377
1378 if( _manifest[i].getChannels().find(channel) != _manifest[i].getChannels().end())
1379 {
1380 return i;
1381 }
1382 }
1383 // not find, return size()
1384 return _manifest.size();
1385
1386 }
1387
1388 IDManifest::ChannelGroupManifest&
add(const set<string> & group)1389 IDManifest::add(const set< string >& group)
1390 {
1391 _manifest.push_back(ChannelGroupManifest());
1392 ChannelGroupManifest& mfst = _manifest.back();
1393 mfst._channels = group;
1394 return mfst;
1395 }
1396
1397 IDManifest::ChannelGroupManifest&
add(const string & channel)1398 IDManifest::add(const string& channel)
1399 {
1400 _manifest.push_back(ChannelGroupManifest());
1401 ChannelGroupManifest& mfst = _manifest.back();
1402 mfst._channels.insert(channel);
1403 return mfst;
1404 }
1405
1406 IDManifest::ChannelGroupManifest&
add(const IDManifest::ChannelGroupManifest & table)1407 IDManifest::add(const IDManifest::ChannelGroupManifest& table)
1408 {
1409 _manifest.push_back(table);
1410 return _manifest.back();
1411 }
1412
operator [](size_t index)1413 IDManifest::ChannelGroupManifest& IDManifest::operator[](size_t index)
1414 {
1415 return _manifest[index];
1416 }
1417
operator [](size_t index) const1418 const IDManifest::ChannelGroupManifest& IDManifest::operator[](size_t index) const
1419 {
1420 return _manifest[index];
1421 }
1422
1423
1424
1425
1426
1427 namespace
1428 {
1429
1430 //-----------------------------------------------------------------------------
1431 // MurmurHash3 was written by Austin Appleby, and is placed in the public
1432 // domain. The author hereby disclaims copyright to this source code.
1433 //
1434 // smhasher provides two different 128 bit hash schemes, optimised for either
1435 // 32 or 64 bit architectures. IDManifest uses only the 64 bit optimised version
1436 // of the 128 bit hash function to generate '64 bit hashes'
1437 //-----------------------------------------------------------------------------
1438 // Platform-specific functions and macros
1439 // Microsoft Visual Studio
1440 #if defined(_MSC_VER)
1441 #define FORCE_INLINE __forceinline
1442 #define ROTL32(x,y) _rotl(x,y)
1443 #define ROTL64(x,y) _rotl64(x,y)
1444 #define BIG_CONSTANT(x) (x)
1445 // Other compilers
1446 #else // defined(_MSC_VER)
1447 #define FORCE_INLINE inline __attribute__((always_inline))
1448 inline uint32_t rotl32 ( uint32_t x, int8_t r )
1449 {
1450 return (x << r) | (x >> (32 - r));
1451 }
1452 inline uint64_t rotl64 ( uint64_t x, int8_t r )
1453 {
1454 return (x << r) | (x >> (64 - r));
1455 }
1456 #define ROTL32(x,y) rotl32(x,y)
1457 #define ROTL64(x,y) rotl64(x,y)
1458 #define BIG_CONSTANT(x) (x##LLU)
1459 #endif // !defined(_MSC_VER)
1460 //-----------------------------------------------------------------------------
1461 // Block read - if your platform needs to do endian-swapping or can only
1462 // handle aligned reads, do the conversion here
getblock32(const uint32_t * p,int i)1463 FORCE_INLINE uint32_t getblock32 ( const uint32_t * p, int i )
1464 {
1465 return p[i];
1466
1467 }
getblock64(const uint64_t * p,int i)1468 FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i )
1469 {
1470 return p[i];
1471 }
1472 //-----------------------------------------------------------------------------
1473 // Finalization mix - force all bits of a hash block to avalanche
fmix32(uint32_t h)1474 FORCE_INLINE uint32_t fmix32 ( uint32_t h )
1475 {
1476 h ^= h >> 16;
1477 h *= 0x85ebca6b;
1478 h ^= h >> 13;
1479 h *= 0xc2b2ae35;
1480 h ^= h >> 16;
1481 return h;
1482 }
1483 //----------
fmix64(uint64_t k)1484 FORCE_INLINE uint64_t fmix64 ( uint64_t k )
1485 {
1486 k ^= k >> 33;
1487 k *= BIG_CONSTANT(0xff51afd7ed558ccd);
1488 k ^= k >> 33;
1489 k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
1490 k ^= k >> 33;
1491 return k;
1492 }
1493 //-----------------------------------------------------------------------------
MurmurHash3_x86_32(const void * key,int len,uint32_t seed,void * out)1494 void MurmurHash3_x86_32 ( const void * key, int len,
1495 uint32_t seed, void * out )
1496 {
1497 const uint8_t * data = (const uint8_t*)key;
1498 const int nblocks = len / 4;
1499 uint32_t h1 = seed;
1500 const uint32_t c1 = 0xcc9e2d51;
1501 const uint32_t c2 = 0x1b873593;
1502 //----------
1503 // body
1504 const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
1505 for(int i = -nblocks; i; i++)
1506 {
1507 uint32_t k1 = getblock32(blocks,i);
1508 k1 *= c1;
1509 k1 = ROTL32(k1,15);
1510 k1 *= c2;
1511
1512 h1 ^= k1;
1513 h1 = ROTL32(h1,13);
1514 h1 = h1*5+0xe6546b64;
1515 }
1516 //----------
1517 // tail
1518 const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
1519 uint32_t k1 = 0;
1520 switch(len & 3)
1521 {
1522 case 3: k1 ^= tail[2] << 16;
1523 case 2: k1 ^= tail[1] << 8;
1524 case 1: k1 ^= tail[0];
1525 k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
1526 };
1527 //----------
1528 // finalization
1529 h1 ^= len;
1530 h1 = fmix32(h1);
1531 *(uint32_t*)out = h1;
1532 }
1533
1534 //-----------------------------------------------------------------------------
MurmurHash3_x64_128(const void * key,const int len,const uint32_t seed,void * out)1535 void MurmurHash3_x64_128 ( const void * key, const int len,
1536 const uint32_t seed, void * out )
1537 {
1538 const uint8_t * data = (const uint8_t*)key;
1539 const int nblocks = len / 16;
1540 uint64_t h1 = seed;
1541 uint64_t h2 = seed;
1542 const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
1543 const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
1544 //----------
1545 // body
1546 const uint64_t * blocks = (const uint64_t *)(data);
1547 for(int i = 0; i < nblocks; i++)
1548 {
1549 uint64_t k1 = getblock64(blocks,i*2+0);
1550 uint64_t k2 = getblock64(blocks,i*2+1);
1551 k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
1552 h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
1553 k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
1554 h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
1555 }
1556 //----------
1557 // tail
1558 const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
1559 uint64_t k1 = 0;
1560 uint64_t k2 = 0;
1561 switch(len & 15)
1562 {
1563 case 15: k2 ^= ((uint64_t)tail[14]) << 48;
1564 case 14: k2 ^= ((uint64_t)tail[13]) << 40;
1565 case 13: k2 ^= ((uint64_t)tail[12]) << 32;
1566 case 12: k2 ^= ((uint64_t)tail[11]) << 24;
1567 case 11: k2 ^= ((uint64_t)tail[10]) << 16;
1568 case 10: k2 ^= ((uint64_t)tail[ 9]) << 8;
1569 case 9: k2 ^= ((uint64_t)tail[ 8]) << 0;
1570 k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
1571 case 8: k1 ^= ((uint64_t)tail[ 7]) << 56;
1572 case 7: k1 ^= ((uint64_t)tail[ 6]) << 48;
1573 case 6: k1 ^= ((uint64_t)tail[ 5]) << 40;
1574 case 5: k1 ^= ((uint64_t)tail[ 4]) << 32;
1575 case 4: k1 ^= ((uint64_t)tail[ 3]) << 24;
1576 case 3: k1 ^= ((uint64_t)tail[ 2]) << 16;
1577 case 2: k1 ^= ((uint64_t)tail[ 1]) << 8;
1578 case 1: k1 ^= ((uint64_t)tail[ 0]) << 0;
1579 k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
1580 };
1581 //----------
1582 // finalization
1583 h1 ^= len; h2 ^= len;
1584 h1 += h2;
1585 h2 += h1;
1586 h1 = fmix64(h1);
1587 h2 = fmix64(h2);
1588 h1 += h2;
1589 h2 += h1;
1590 ((uint64_t*)out)[0] = h1;
1591 ((uint64_t*)out)[1] = h2;
1592 }
1593 //-----------------------------------------------------------------------------
1594
1595 //
1596 // combine the idStrings into a single string, separating each with a ; character
1597 // (use of the ; character is discouraged, though not prohibited)
1598 //
catString(const vector<string> & idString,std::string & str)1599 void catString(const vector< string >& idString, std::string & str)
1600 {
1601 str =idString[0];
1602 for(size_t i = 1 ; i < idString.size() ; ++i)
1603 {
1604 str+=";";
1605 str+=idString[i];
1606 }
1607
1608 }
1609 }
1610
MurmurHash32(const std::string & idString)1611 unsigned int IDManifest::MurmurHash32(const std::string & idString)
1612 {
1613 unsigned int out;
1614 MurmurHash3_x86_32(idString.c_str(),idString.size(),0 , (void*) &out);
1615 return out;
1616 }
1617
MurmurHash64(const std::string & idString)1618 uint64_t IDManifest::MurmurHash64(const std::string& idString)
1619 {
1620
1621 uint64_t out[2];
1622 MurmurHash3_x64_128(idString.c_str(),idString.size(),0 , out);
1623 return out[0];
1624 }
1625
MurmurHash32(const vector<string> & idString)1626 unsigned int IDManifest::MurmurHash32(const vector< string >& idString)
1627 {
1628 if(idString.size()==0)
1629 {
1630 return 0;
1631 }
1632 std::string str;
1633 catString(idString,str);
1634 return MurmurHash32(str);
1635 }
1636
1637
1638
1639
1640
MurmurHash64(const vector<string> & idString)1641 uint64_t IDManifest::MurmurHash64(const vector< string >& idString)
1642 {
1643 if(idString.size()==0)
1644 {
1645 return 0;
1646 }
1647 std::string str;
1648 catString(idString,str);
1649 return MurmurHash64(str);
1650 }
1651
1652
1653
1654 OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_EXIT
1655