1 // SPDX-License-Identifier: BSD-3-Clause
2 // Copyright (c) Contributors to the OpenEXR Project.
3 
4 //-----------------------------------------------------------------------------
5 //
6 //        ID Manifest class implementation
7 //
8 //-----------------------------------------------------------------------------
9 
10 #include <ImfIDManifest.h>
11 #include <Iex.h>
12 #include <zlib.h>
13 #include "ImfXdr.h"
14 #include "ImfIO.h"
15 
16 #include <stdint.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <algorithm>
20 
21 //
22 // debugging only
23 //
24 #ifdef DUMP_TABLE
25 #include <iostream>
26 #endif
27 
28 OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_ENTER
29 
30 
31 using namespace OPENEXR_IMF_INTERNAL_NAMESPACE;
32 using std::make_pair;
33 using std::set;
34 using std::vector;
35 using std::map;
36 using std::string;
37 using std::pair;
38 using std::sort;
39 using std::fill;
40 
41 const std::string IDManifest::UNKNOWN = "unknown";
42 const std::string IDManifest::NOTHASHED = "none";
43 const std::string IDManifest::CUSTOMHASH = "custom";
44 const std::string IDManifest::MURMURHASH3_32 = "MurmurHash3_32";
45 const std::string IDManifest::MURMURHASH3_64 = "MurmurHash3_64";
46 
47 const std::string IDManifest::ID_SCHEME = "id";
48 const std::string IDManifest::ID2_SCHEME = "id2";
49 
50 
IDManifest()51 IDManifest::IDManifest() { }
52 
53 namespace
54 {
55 
56 
57 // map of strings to index of string in table
58     typedef std::map<std::string,int> indexedStringSet;
59 
60 
61 
62     // when handling vectors/sets of strings, the string is got by deferencing the pointer/iterator
stringSize(const T & i)63     template<class T> size_t stringSize(const T &i)
64     {
65        return i->size();
66     }
67 
cStr(const T & i)68     template<class T> const char* cStr(const T &i)
69     {
70        return i->c_str();
71     }
72 
73     /*
74     // but for indexedStringSet the string is the first of the iterator pair
75     size_t stringSize(indexedStringSet::const_iterator &i )
76     {
77         return i->first.size();
78     }
79 
80     const char* cStr(indexedStringSet::const_iterator &i)
81     {
82        return i->first.c_str();
83     }
84     */
85 
getVariableLengthIntegerSize(uint64_t value)86     size_t getVariableLengthIntegerSize(uint64_t value)
87     {
88 
89        if(value < 1llu<<7)
90        {
91            return 1;
92        }
93 
94        if(value < 1llu<<14)
95        {
96            return 2;
97        }
98        if(value < 1llu<<21)
99        {
100            return 3;
101        }
102        if(value < 1llu<<28)
103        {
104            return 4;
105        }
106        if(value < 1llu<<35)
107        {
108            return 5;
109        }
110        if(value < 1llu<<42)
111        {
112            return 6;
113        }
114        if(value < 1llu<<49)
115        {
116            return 7;
117        }
118        if(value < 1llu<<56)
119        {
120            return 8;
121        }
122        if(value < 1llu<<63)
123        {
124            return 9;
125        }
126        return 10;
127 
128     }
129 
readVariableLengthInteger(const char * & readPtr,const char * endPtr)130     uint64_t readVariableLengthInteger(const char*& readPtr,const char* endPtr)
131     {
132         // bytes are stored LSB first, so each byte that is read from the stream must be
133         // shifted before mixing into the existing length
134         int shift=0;
135         unsigned char byte=0;
136         uint64_t value=0;
137         do{
138             if(readPtr>=endPtr)
139             {
140                 throw IEX_NAMESPACE::InputExc ("IDManifest too small for variable length integer");
141             }
142             byte =  *(unsigned char*)readPtr++;
143             // top bit of byte isn't part of actual number, it just indicates there's more info to come
144             // so take bottom 7 bits, shift them to the right place, and insert them
145             //
146             value|=(uint64_t(byte&127)) << shift;
147             shift+=7;
148         }while(byte&128); //while top bit set on previous byte, there is more to come
149         return value;
150     }
151 
writeVariableLengthInteger(char * & outPtr,uint64_t value)152     void writeVariableLengthInteger(char*& outPtr,uint64_t value)
153     {
154         do
155         {
156             unsigned char byte = (unsigned char)(value&127);
157             value>>=7;
158             if(value>0)
159             {
160                 byte|=128;
161             }
162             *(unsigned char*) outPtr++ = byte;
163         }
164         while(value>0);
165     }
166 
167 
168 
169     //
170     // read a list of strings into the given container
171     // format is:
172     // numberOfStrings (unless numberOfStrings already passed in)
173     //  length of string 0
174     //  length of string 1
175     //  ...
176     //  string 0
177     //  string 1
178     //  ...
179     //  (the sizes come first then the strings because that helps compression performance)
180     //  note - updates readPtr to point to first byte after readStrings
181     //
182 
183 
readStringList(const char * & readPtr,const char * endPtr,T & outputVector,int numberOfStrings=0)184     template<class T> void readStringList(const char*& readPtr,const char* endPtr,T & outputVector,int numberOfStrings=0)
185     {
186         if(numberOfStrings==0)
187         {
188             if(readPtr+4>endPtr)
189             {
190                 throw IEX_NAMESPACE::InputExc ("IDManifest too small for string list size");
191             }
192             Xdr::read<CharPtrIO>(readPtr,numberOfStrings);
193         }
194 
195         vector<size_t> lengths(numberOfStrings);
196 
197         for( int i=0 ; i<numberOfStrings ; ++i )
198         {
199             lengths[i] = readVariableLengthInteger(readPtr,endPtr);
200         }
201         for( int i=0 ; i<numberOfStrings ; ++i )
202         {
203 
204             if(readPtr+lengths[i]  > endPtr)
205             {
206                 throw IEX_NAMESPACE::InputExc ("IDManifest too small for string");
207             }
208             outputVector.insert(outputVector.end(),string(readPtr , lengths[i]));
209             readPtr += lengths[i];
210         }
211     }
212 
213     //
214     // computes number of bytes required to serialize vector/set of strings
215     //
getStringListSize(const T & stringList,size_t entries=0)216     template<typename T> int getStringListSize(const T& stringList , size_t entries=0)
217     {
218         int totalSize=0;
219         if(entries==0)
220         {
221             totalSize+=4; // 4 bytes to store number of entries;
222         }
223         else
224         {
225             if(stringList.size()!=entries)
226             {
227                   throw IEX_NAMESPACE::InputExc ("Incorrect number of components stored in ID Manifest");
228             }
229         }
230         for(typename T::const_iterator i = stringList.begin() ; i!=stringList.end() ; ++i)
231         {
232             size_t length = stringSize(i);
233             totalSize+=length;
234             // up to five bytes for variable length encoded size
235 
236             totalSize+=getVariableLengthIntegerSize(length);
237 
238 
239 
240         }
241         return totalSize;
242     }
243 
244     //
245     // write string list to outPtr. if entries nonzero, omits number of entries,
246     // but confirms 'entries' == T.size()
247     //
248     template<typename  T> void
writeStringList(char * & outPtr,const T & stringList,int entries=0)249     writeStringList( char*& outPtr,const T& stringList,int entries=0)
250     {
251         int size = stringList.size();
252         if(entries==0)
253         {
254             Xdr::write<CharPtrIO>(outPtr,size);
255         }
256         else
257         {
258             if(size!=entries)
259             {
260                   throw IEX_NAMESPACE::InputExc ("Incorrect number of components stored in ID Manifest");
261             }
262         }
263         for(typename T::const_iterator i = stringList.begin() ; i!=stringList.end() ; ++i)
264         {
265             int stringLength = stringSize(i);
266             //
267             // variable length encoding:
268             // values between 0 and 127 inclusive are stored in a single byte
269             // values betwwen 128 and 16384 are encoded with two bytes: 1LLLLLLL 0MMMMMMMM where L and M are the least and most significant bits of the value
270             // in general, values are stored least significant values first, with the top bit of each byte indicating more values follow
271             // the top bit is clear in the last byte of the value
272             // (this scheme requires two bytes to store values above 1<<7, and five bytes to store values above 1<<28)
273             //
274 
275             writeVariableLengthInteger(outPtr,stringLength);
276 
277 
278         }
279 
280         for(typename T::const_iterator i = stringList.begin() ; i!=stringList.end() ; ++i)
281         {
282             int stringLength = stringSize(i);
283             Xdr::write<CharPtrIO>(outPtr,(const char*) cStr(i),stringLength);
284         }
285     }
286 
getStringSize(const string & str)287     int getStringSize(const string & str)
288     {
289         return 4+str.size();
290     }
291 
readPascalString(const char * & readPtr,const char * endPtr,string & outputString)292     void readPascalString(const char*& readPtr, const char* endPtr, string& outputString)
293     {
294 
295         if(readPtr+4>endPtr)
296         {
297             throw IEX_NAMESPACE::InputExc ("IDManifest too small for string size");
298 
299         }
300         unsigned int length=0;
301         Xdr::read<CharPtrIO>(readPtr,length);
302 
303         if(readPtr+length>endPtr)
304         {
305              throw IEX_NAMESPACE::InputExc ("IDManifest too small for string");
306         }
307         outputString = string((const char*) readPtr,length);
308         readPtr+=length;
309     }
310 
311     void
writePascalString(char * & outPtr,const string & str)312     writePascalString(char*& outPtr,const string& str)
313     {
314         unsigned int length = str.size();
315         Xdr::write<CharPtrIO>((char*&) outPtr,length);
316         Xdr::write<CharPtrIO>((char*&) outPtr,(const char*) str.c_str(),length);
317     }
318 
319 
320 
321 }
322 
IDManifest(const char * data,const char * endOfData)323 IDManifest::IDManifest(const char* data,const char* endOfData)
324 {
325     init(data,endOfData);
326 }
327 
init(const char * data,const char * endOfData)328 void IDManifest::init(const char* data, const char* endOfData)
329 {
330 
331 
332 
333    unsigned int version;
334    Xdr::read<CharPtrIO>( data,version);
335    if(version!=0)
336    {
337        throw IEX_NAMESPACE::InputExc ("Unrecognized IDmanifest version");
338    }
339 
340    //
341    // first comes list of all strings used in manifest
342    //
343    vector<string> stringList;
344    readStringList(data,endOfData,stringList);
345 
346    //
347    // expand the strings in the stringlist
348    // each string begins with number of characters to copy from the previous string
349    // the remainder is the 'new' bit that appears after that
350    //
351 
352    for(size_t i=1;i<stringList.size();++i)
353    {
354 
355       size_t common; // number of characters in common with previous string
356       int stringStart=1; // first character of string itself;
357       //
358       // previous string had more than 255 characters?
359       //
360       if(stringList[i-1].size()>255)
361       {
362           common = size_t( ((unsigned char) (stringList[i][0])) <<8 ) + size_t( (unsigned char) (stringList[i][1]));
363           stringStart=2;
364       }
365       else
366       {
367           common = (unsigned char) stringList[i][0];
368       }
369       if(common>stringList[i-1].size())
370       {
371           throw IEX_NAMESPACE::InputExc ("Bad common string length in IDmanifest string table");
372       }
373       stringList[i] = stringList[i-1].substr(0,common)+stringList[i].substr(stringStart);
374 
375    }
376 
377    //
378    // decode mapping table from indices in table to indices in string list
379    // the mapping uses smaller indices for more commonly occuring strings, since these are encoded with fewer bits
380    // comments in serialize function describe the format
381    //
382 
383    vector<int> mapping(stringList.size());
384 
385 
386    //
387    // overlapping sequences: A list [(4,5),(3,6)] expands to 4,5,3,6 - because 4 and 5 are including already
388    // they are not included again
389    // the 'seen' list indicates which values have already been used, so they are not re-referenced
390    //
391 
392    vector<char> seen(stringList.size());
393 
394    int rleLength;
395    if(endOfData<data+4)
396    {
397        throw IEX_NAMESPACE::InputExc ("IDManifest too small");
398    }
399 
400    Xdr::read<CharPtrIO>( data,rleLength);
401 
402    int currentIndex=0;
403    for(int i=0;i<rleLength;++i)
404    {
405        int first;
406        int last;
407        if(endOfData<data+8)
408        {
409            throw IEX_NAMESPACE::InputExc ("IDManifest too small");
410        }
411        Xdr::read<CharPtrIO>( data,first);
412        Xdr::read<CharPtrIO>( data,last);
413 
414        if(first<0 || last<0 || first>last || first>= int(stringList.size()) || last>=int(stringList.size()))
415        {
416            throw IEX_NAMESPACE::InputExc ("Bad mapping table entry in IDManifest");
417        }
418        for(int entry=first ; entry<=last;entry++)
419        {
420            // don't remap already mapped values
421            if(seen[entry]==0)
422            {
423                mapping[currentIndex]=entry;
424                seen[entry]=1;
425                currentIndex++;
426            }
427        }
428 
429    }
430 
431 #ifdef DUMP_TABLE
432    //
433    // dump mapping table for debugging
434    //
435    for(size_t i=0;i<mapping.size();++i)
436    {
437        std::cout << i << ' ' << mapping[i] << std::endl;
438    }
439 #endif
440 
441 
442 
443 
444    //
445    // number of manifest entries comes after string list
446    //
447    int manifestEntries;
448 
449    if(endOfData<data+4)
450    {
451        throw IEX_NAMESPACE::InputExc ("IDManifest too small");
452    }
453 
454    Xdr::read<CharPtrIO>( data,manifestEntries);
455 
456    _manifest.clear();
457 
458    _manifest.resize(manifestEntries);
459 
460    for(int manifestEntry = 0 ; manifestEntry < manifestEntries ; ++manifestEntry)
461    {
462 
463        ChannelGroupManifest& m = _manifest[manifestEntry];
464 
465        //
466        // read header of this manifest entry
467        //
468        readStringList(data,endOfData,m._channels);
469        readStringList(data,endOfData,m._components);
470 
471        char lifetime;
472        if(endOfData<data+4)
473        {
474            throw IEX_NAMESPACE::InputExc ("IDManifest too small");
475        }
476        Xdr::read<CharPtrIO>(data,lifetime);
477 
478 
479 
480        m.setLifetime(IdLifetime(lifetime));
481        readPascalString(data,endOfData,m._hashScheme);
482        readPascalString(data,endOfData,m._encodingScheme);
483 
484        if(endOfData<data+5)
485        {
486            throw IEX_NAMESPACE::InputExc ("IDManifest too small");
487        }
488        char storageScheme;
489        Xdr::read<CharPtrIO>(data,storageScheme);
490 
491        int tableSize;
492        Xdr::read<CharPtrIO>(data,tableSize);
493 
494        uint64_t previousId=0;
495 
496        for(int entry = 0 ; entry < tableSize ; ++entry)
497        {
498            uint64_t id;
499 
500            switch(storageScheme)
501            {
502                case 0 :
503                {
504                    if(endOfData<data+8)
505                    {
506                        throw IEX_NAMESPACE::InputExc ("IDManifest too small");
507                    }
508                    Xdr::read<CharPtrIO>(data,id);
509                    break;
510                }
511                case 1 :
512                {
513                    if(endOfData<data+4)
514                    {
515                        throw IEX_NAMESPACE::InputExc ("IDManifest too small");
516                    }
517                    unsigned int id32;
518                    Xdr::read<CharPtrIO>(data,id32);
519                    id = id32;
520                    break;
521                }
522                default :
523                {
524                    id = readVariableLengthInteger(data,endOfData);
525                }
526 
527            }
528 
529            id+=previousId;
530            previousId=id;
531 
532            //
533            // insert into table - insert tells us if it was already there
534            //
535            pair< map< uint64_t, vector<string> >::iterator,bool> insertion = m._table.insert(make_pair(id,vector<string>()));
536            if(insertion.second==false)
537            {
538                throw IEX_NAMESPACE::InputExc("ID manifest contains multiple entries for the same ID");
539            }
540            (insertion.first)->second.resize(m.getComponents().size());
541            for(size_t i=0;i<m.getComponents().size();++i)
542            {
543                int stringIndex = readVariableLengthInteger(data,endOfData);
544                if(size_t(stringIndex)>stringList.size() || stringIndex<0)
545                {
546                    throw IEX_NAMESPACE::InputExc ("Bad string index in IDManifest");
547                }
548                (insertion.first)->second[i]=stringList[ mapping[ stringIndex] ];
549            }
550        }
551    }
552 }
553 
554 
IDManifest(const CompressedIDManifest & compressed)555 IDManifest::IDManifest(const CompressedIDManifest& compressed)
556 {
557    //
558    // decompress the compressed manifest
559    //
560 
561 
562    vector<Bytef> uncomp(compressed._uncompressedDataSize);
563    uLongf outSize = compressed._uncompressedDataSize;
564    if(Z_OK != ::uncompress(&uncomp[0] , &outSize , (const Bytef*) compressed._data, compressed._compressedDataSize))
565    {
566        throw IEX_NAMESPACE::InputExc ("IDManifest decompression (zlib) failed.");
567    }
568    if(outSize!=compressed._uncompressedDataSize)
569    {
570        throw IEX_NAMESPACE::InputExc ("IDManifest decompression (zlib) failed: mismatch in decompressed data size");
571    }
572 
573    init((const char*) &uncomp[0],(const char*) &uncomp[0] + outSize);
574 
575 
576 
577 }
578 
579 
serialize(std::vector<char> & data) const580 void IDManifest::serialize(std::vector< char >& data) const
581 {
582 
583 
584 
585    indexedStringSet stringSet;
586 
587    //
588    // build string map - this turns unique strings into indices
589    // the manifest stores the string indices - this allows duplicated
590    // strings to point to the same place
591    // grabs all the strings regardless of which manifest/mapping they are in
592    //
593    // at this point we just count the manifest entries
594    //
595    {
596         //
597         // over each channel group
598         //
599        for(size_t m=0;m<_manifest.size();++m)
600        {
601            // over each mapping
602            for( IDManifest::ChannelGroupManifest::IDTable::const_iterator i = _manifest[m]._table.begin(); i!=_manifest[m]._table.end(); ++i)
603            {
604                // over each string in the mapping
605 
606                for(size_t s = 0 ; s < i->second.size() ; ++s)
607                {
608                    stringSet[ i->second[s] ]++;
609                }
610            }
611        }
612    }
613 
614 
615 
616    //
617    // build compressed string representation - all but first string starts with number of characters to copy from previous string.
618    // max 65535 bytes - use two bytes to store if previous string was more than 255 characters, big endian
619    //
620    vector<string> prefixedStringList(stringSet.size());
621 
622    //
623    // also make a sorted list so the most common entry appears first. Keep equally likely entries in numerical order
624    //
625    vector< pair<int,int> > sortedIndices(stringSet.size());
626 
627    string prevString;
628    int index=0;
629    for( indexedStringSet::iterator i = stringSet.begin() ; i!= stringSet.end() ; ++i)
630    {
631 
632 
633        // no prefix on first string - map stores index of each string, so use that rather than a counter;
634        if(index==0)
635        {
636            prefixedStringList[index]=i->first;
637        }
638        else
639        {
640            size_t common=0;
641            while(common<65535
642                && common < prevString.size()
643                && common < i->first.size()
644                && prevString[common] == i->first[common])
645            {
646                ++common;
647            }
648 
649            if(prevString.size()>255)
650            {
651                //
652                // long previous string - use two bytes to encode number of common chars
653                //
654                prefixedStringList[index] = string( 1, char(common>>8)) + string(1,char(common&255)) + i->first.substr(common);
655            }
656            else
657            {
658                prefixedStringList[index] = string( 1,char(common)) + i->first.substr(common);
659            }
660 
661        }
662        prevString = i->first;
663        sortedIndices[index].first = -i->second; // use negative of count so largest count appears first
664        sortedIndices[index].second = index;
665 
666        //
667        // also, repurpose stringSet so that it maps from string names to indices in the string table
668        //
669        i->second = index;
670 
671        index++;
672    }
673 
674    sort( sortedIndices.begin(),sortedIndices.end());
675 
676    //
677    // the first 1<<7 characters will all be encoded with 1 byte, regardless of how common they are
678    // the next 1<<14 characters will be encoded with 2 bytes
679    // (a full huffman encode would do this at the bit level, not the byte level)
680    //
681    // the mapping table can be reduced in size by rewriting the IDs to exploit that
682    // can rearrange the IDs to have more long runs by sorting numbers
683    // that will need the same number of bytes to encode together
684    //
685    {
686        size_t i=0;
687 
688        for(;i<sortedIndices.size() && i <1<<7;++i)
689        {
690            sortedIndices[i].first=1;
691        }
692        for(;i<sortedIndices.size() && i <1<<14;++i)
693        {
694            sortedIndices[i].first=2;
695        }
696        for(;i<sortedIndices.size() && i <1<<21;++i)
697        {
698            sortedIndices[i].first=3;
699        }
700        for(;i<sortedIndices.size() && i <1<<28;++i)
701        {
702            sortedIndices[i].first=4;
703        }
704        for(;i<sortedIndices.size();++i)
705        {
706            sortedIndices[i].first=5;
707        }
708    }
709    sort( sortedIndices.begin(),sortedIndices.end());
710 
711 
712 
713    vector<int> stringIndices(sortedIndices.size());
714 
715    //
716    // table will be stored with RLE encoding - store pairs of 'start index,end index'
717    // so, the sequence 10,11,12,1,2,3,4  is stored as [ (10,12) , (1,4)]
718    //
719    // sequential IDs ignore already referenced IDs, so the sequence  11,9,10,12,13 can be stored as [ (11,11) , (9,13)]
720    // on reading, don't reference an entry that has already been seen
721    // on writing, need to track which entries have already been stored to allow this overlapping to occur
722    //
723 
724    vector< pair<int,int> > RLEmapping;
725 
726    if(sortedIndices.size()>0)
727    {
728        RLEmapping.resize(1);
729        RLEmapping[0].first = sortedIndices[0].second;
730        RLEmapping[0].second = sortedIndices[0].second;
731 
732        fill(stringIndices.begin(),stringIndices.end(),-1);
733 
734        stringIndices[sortedIndices[0].second]=0;
735 
736        //
737        // as the loop below runs, nextToInclude tracks the value that can be merged with the current run length
738        // (RLWmapping.back()) - generally this is on more than the current length, but it jumps forward
739        // over values already seen
740        //
741        int nextToInclude=stringIndices[sortedIndices[0].second]+1;
742 
743 
744        for( size_t i=1;i<sortedIndices.size() ; ++i)
745        {
746            if( sortedIndices[i].second == nextToInclude)
747            {
748                //
749                // this index can be treated as part of the current run, so extend the run to include it
750                //
751                RLEmapping.back().second=sortedIndices[i].second;
752 
753            }
754            else
755            {
756                pair<int,int> newEntry(sortedIndices[i].second,sortedIndices[i].second);
757                RLEmapping.push_back(newEntry);
758            }
759            // build mapping for this entry
760            stringIndices[sortedIndices[i].second]=i;
761 
762            // what would the next entry have to be to be included in this run
763            // skip over already mapped strings
764            nextToInclude=sortedIndices[i].second+1;
765 
766 
767            while ( nextToInclude < int(stringIndices.size()) && stringIndices[nextToInclude]>=0)
768            {
769                nextToInclude++;
770            }
771        }
772    }
773 #ifdef DUMP_TABLE
774    // dump RLE table for debugging
775    for( size_t i=1;i<sortedIndices.size() ; ++i)
776    {
777        std::cout << i << ' ' << sortedIndices[i].second << std::endl;
778    }
779 #endif
780 
781 
782 
783     // now compute size of uncompressed memory block for serialization
784 
785    int outputSize = 8; // at least need four bytes for integer to store number of channel manifests, plus four bytes to indicate version pattern
786 
787     outputSize += getStringListSize(prefixedStringList);
788 
789     //
790     // RLE mapping table size - number of entries followed by eight bytes for each run length
791     //
792     outputSize+= RLEmapping.size()*8+4;
793 
794     //
795     // track which storage scheme is optimal for storing the IDs of each type
796     // ID storage scheme: 0 = 8 bytes per ID, 1 = 4 bytes per ID, 2 = variable
797     //
798 
799     std::vector<char> storageSchemes;
800 
801    for(size_t groupNumber = 0 ; groupNumber < _manifest.size() ; ++groupNumber)
802    {
803        const ChannelGroupManifest& m = _manifest[groupNumber];
804        outputSize += getStringListSize(m._channels); //size of channel group
805        outputSize += getStringListSize(m._components); //size of component list
806        outputSize += 1; //size of lifetime enum
807        outputSize += getStringSize(m._hashScheme);
808        outputSize += getStringSize(m._encodingScheme);
809 
810        outputSize += 1; // ID scheme
811        outputSize += 4; // size of storage for number of 32 bit entries in ID table
812 
813        uint64_t previousId=0;
814        uint64_t IdStorageForVariableScheme = 0;
815        bool canUse32Bits = true;
816        for( IDManifest::ChannelGroupManifest::IDTable::const_iterator i = m._table.begin(); i!=m._table.end(); ++i)
817        {
818 
819            uint64_t idToStore = i->first-previousId;
820            IdStorageForVariableScheme+=getVariableLengthIntegerSize(idToStore);
821            if(idToStore >= 1llu<<32)
822            {
823                canUse32Bits = false;
824            }
825            previousId=i->first;
826 
827            for(size_t s=0;s<m._components.size();++s)
828            {
829                int stringID = stringSet[ i->second[s] ];
830                int idToWrite = stringIndices[stringID];
831                outputSize+= getVariableLengthIntegerSize(idToWrite);
832            }
833        }
834        // pick best scheme to use to store IDs
835        if(canUse32Bits)
836        {
837            if(IdStorageForVariableScheme < m._table.size()*4)
838            {
839                //
840                // variable storage smaller than fixed 32 bit, so use that
841                //
842                storageSchemes.push_back(2);
843                outputSize+=IdStorageForVariableScheme;
844            }
845            else
846            {
847                //
848                // variable scheme bigger than fixed 32 bit, but all ID differences fit into 32 bits
849                //
850                storageSchemes.push_back(1);
851                outputSize+=m._table.size()*4;
852            }
853        }
854        else
855        {
856            if(IdStorageForVariableScheme < m._table.size()*8)
857            {
858                //
859                // variable storage smaller than fixed 64 bit, so use that
860                //
861                storageSchemes.push_back(2);
862                outputSize+=IdStorageForVariableScheme;
863            }
864            else
865            {
866                //
867                // variable scheme bigger than fixed 64 bit, and some ID differences bigger than 32 bit
868                //
869                storageSchemes.push_back(0);
870                outputSize+=m._table.size()*8;
871            }
872        }
873 
874    }
875 
876    //
877    // resize output array
878    //
879    data.resize(outputSize);
880 
881 
882 
883    //
884    // populate output array
885    //
886    char* outPtr = &data[0];
887 
888    //
889    // zeroes to indicate this is version 0 of the header
890    //
891    Xdr::write<CharPtrIO>(outPtr, int(0));
892 
893    //
894    // table of strings
895    //
896     writeStringList(outPtr,prefixedStringList);
897 
898 
899    //
900    // RLE block
901    //
902    Xdr::write<CharPtrIO>(outPtr, int(RLEmapping.size()));
903    for(size_t i=0;i<RLEmapping.size();++i)
904    {
905        Xdr::write<CharPtrIO>(outPtr, RLEmapping[i].first);
906        Xdr::write<CharPtrIO>(outPtr, RLEmapping[i].second);
907    }
908 
909 
910 
911    //
912    // number of manifests
913    //
914    Xdr::write<CharPtrIO>(outPtr, int(_manifest.size()));
915    int manifestIndex=0;
916 
917   for(size_t groupNumber = 0 ; groupNumber < _manifest.size() ; ++groupNumber)
918    {
919        const ChannelGroupManifest& m = _manifest[groupNumber];
920        //
921        // manifest header
922        //
923        writeStringList(outPtr,m._channels);
924        writeStringList(outPtr,m._components);
925        Xdr::write<CharPtrIO>(outPtr,char(m._lifeTime));
926        writePascalString(outPtr,m._hashScheme);
927        writePascalString(outPtr,m._encodingScheme);
928 
929        char scheme = storageSchemes[manifestIndex];
930        Xdr::write<CharPtrIO>(outPtr,scheme);
931 
932        Xdr::write<CharPtrIO>(outPtr,int(m._table.size()));
933 
934        uint64_t previousId=0;
935        //
936        // table
937        //
938        for( IDManifest::ChannelGroupManifest::IDTable::const_iterator i = m._table.begin(); i!=m._table.end(); ++i)
939        {
940 
941            uint64_t idToWrite = i->first-previousId;
942            switch(scheme)
943            {
944                case 0 : Xdr::write<CharPtrIO>(outPtr, idToWrite);break;
945                case 1 : Xdr::write<CharPtrIO>(outPtr,(unsigned int) idToWrite);break;
946                case 2 : writeVariableLengthInteger(outPtr,idToWrite);
947            }
948 
949            previousId=i->first;
950 
951            for(size_t s=0;s<m._components.size();++s)
952            {
953                int stringID = stringSet[ i->second[s] ];
954                int idToWrite = stringIndices[stringID];
955                writeVariableLengthInteger(outPtr , idToWrite );
956            }
957        }
958        manifestIndex++;
959    }
960    //
961    // check we've written the ID manifest correctly
962    //
963    if(outPtr!=&data[0]+data.size())
964    {
965        throw IEX_NAMESPACE::ArgExc("Error - IDManifest size error");
966    }
967 }
968 
969 bool
operator ==(const IDManifest & other) const970 IDManifest::operator==(const IDManifest& other) const
971 {
972    return other._manifest == _manifest;
973 }
974 
975 bool
operator !=(const IDManifest & other) const976 IDManifest::operator!=(const IDManifest& other) const
977 {
978    return !(*this==other);
979 }
980 
981 bool
merge(const IDManifest & other)982 IDManifest::merge(const IDManifest& other)
983 {
984    bool conflict = false;
985    for(size_t otherManifest = 0 ; otherManifest < other._manifest.size() ; ++ otherManifest)
986    {
987        bool merged = false;
988        for(size_t thisManifest = 0 ; thisManifest < _manifest.size() ; ++ thisManifest)
989        {
990            if( _manifest[thisManifest]._channels == other._manifest[otherManifest]._channels)
991            {
992                // found same channels
993 
994                merged = true;
995 
996                if(other._manifest[otherManifest]._components !=  _manifest[thisManifest]._components)
997                {
998                    // cannot merge if components are different
999                    conflict = true;
1000                }
1001                else
1002                {
1003 
1004 //                    if(other._manifest[otherManifest]._encodingScheme !=  _manifest[thisManifest]._encodingScheme ||
1005 //                        other._manifest[otherManifest]._hashScheme !=  _manifest[thisManifest]._hashScheme ||
1006 //                        other._manifest[otherManifest]._hashScheme !=  _manifest[thisManifest]._hashScheme ||
1007 //                        other._manifest[otherManifest]._lifeTime !=  _manifest[thisManifest]._lifeTime)
1008 //                    {
1009 //                        conflict = true;
1010 //                    }
1011 
1012                    for( IDManifest::ChannelGroupManifest::ConstIterator it = other._manifest[otherManifest].begin() ; it!= other._manifest[otherManifest].end() ; ++it)
1013                    {
1014                       IDManifest::ChannelGroupManifest::ConstIterator ours = _manifest[thisManifest].find( it.id());
1015                       if( ours ==  _manifest[thisManifest].end())
1016                       {
1017                           _manifest[thisManifest].insert( it.id() , it.text());
1018                       }
1019                       else
1020                       {
1021                           if(ours.text() != it.text())
1022                           {
1023                               conflict = true;
1024                           }
1025                       }
1026                    }
1027                }
1028            }
1029        }
1030 
1031        if(!merged)
1032        {
1033            _manifest.push_back(other._manifest[otherManifest]);
1034        }
1035 
1036 
1037    }
1038 
1039    return conflict;
1040 
1041 }
1042 
1043 
CompressedIDManifest()1044 CompressedIDManifest::CompressedIDManifest() : _compressedDataSize(0) ,  _uncompressedDataSize(0) , _data(NULL) {}
1045 
1046 
CompressedIDManifest(const CompressedIDManifest & other)1047 CompressedIDManifest::CompressedIDManifest(const CompressedIDManifest& other)
1048    : _compressedDataSize(other._compressedDataSize) ,
1049      _uncompressedDataSize(other._uncompressedDataSize) ,
1050      _data((unsigned char*) malloc(other._compressedDataSize) )
1051 {
1052    memcpy(_data,other._data,_compressedDataSize);
1053 }
1054 
1055 CompressedIDManifest&
operator =(const CompressedIDManifest & other)1056 CompressedIDManifest::operator=(const CompressedIDManifest& other)
1057 {
1058     if(this!=&other)
1059     {
1060         if(_data)
1061         {
1062             free(_data);
1063         }
1064         _data = (unsigned char*)  malloc( other._compressedDataSize);
1065         _compressedDataSize = other._compressedDataSize;
1066         _uncompressedDataSize = other._uncompressedDataSize;
1067         memcpy(_data,other._data,_compressedDataSize);
1068     }
1069     return *this;
1070 }
1071 
1072 
1073 
~CompressedIDManifest()1074 CompressedIDManifest::~CompressedIDManifest()
1075 {
1076     if(_data)
1077     {
1078         free(_data);
1079     }
1080     _data = NULL;
1081    _compressedDataSize=0;
1082 }
1083 
1084 
1085 
CompressedIDManifest(const IDManifest & manifest)1086 CompressedIDManifest::CompressedIDManifest(const IDManifest& manifest)
1087 {
1088    //
1089    // make a compressed copy of the manifest by serializing the data into contiguous memory,
1090    // then calling zlib to compress
1091    //
1092 
1093 
1094     std::vector<char> serial;
1095 
1096     manifest.serialize(serial);
1097 
1098     uLong outputSize = serial.size();
1099 
1100     //
1101     // allocate a buffer which is guaranteed to be big enough for compression
1102     //
1103    uLongf compressedDataSize = compressBound(outputSize);
1104    _data = (unsigned char*) malloc(compressedDataSize);
1105    if(Z_OK != ::compress(_data,&compressedDataSize,(Bytef*) &serial[0],outputSize))
1106    {
1107        throw IEX_NAMESPACE::InputExc("ID manifest compression failed");
1108    }
1109 
1110    // now call realloc to reallocate the buffer to a smaller size - this might free up memory
1111    _data = (unsigned char*) realloc(_data,compressedDataSize);
1112 
1113    _uncompressedDataSize = outputSize;
1114    _compressedDataSize = compressedDataSize;
1115 
1116 }
1117 
ChannelGroupManifest()1118 IDManifest::ChannelGroupManifest::ChannelGroupManifest() : _lifeTime(IDManifest::LIFETIME_STABLE) , _hashScheme(IDManifest::UNKNOWN) , _encodingScheme(IDManifest::UNKNOWN) , _insertingEntry(false)
1119 {
1120 
1121 }
1122 
getComponents() const1123 const vector< string >& IDManifest::ChannelGroupManifest::getComponents() const
1124 {
1125    return _components;
1126 }
1127 
getChannels()1128 set< string >& IDManifest::ChannelGroupManifest::getChannels()
1129 {
1130   return _channels;
1131 }
1132 
getChannels() const1133 const set< string >& IDManifest::ChannelGroupManifest::getChannels() const
1134 {
1135    return _channels;
1136 }
1137 
setChannel(const string & channel)1138 void IDManifest::ChannelGroupManifest::setChannel(const string& channel)
1139 {
1140    _channels.clear();
1141    _channels.insert(channel);
1142 }
1143 
setChannels(const set<string> & channels)1144 void IDManifest::ChannelGroupManifest::setChannels(const set< string >& channels)
1145 {
1146    _channels = channels;
1147 }
1148 
1149 
1150 
1151 
1152 
1153 
1154 //
1155 // set number of components of table
1156 //
setComponents(const std::vector<std::string> & components)1157 void IDManifest::ChannelGroupManifest::setComponents(const std::vector< std::string >& components)
1158 {
1159 
1160     // if there are already entries in the table, cannot change the number of components
1161     if(_table.size()!=0 && components.size()!=_components.size())
1162     {
1163         THROW (IEX_NAMESPACE::ArgExc, "attempt to change number of components in manifest once entries have been added");
1164     }
1165     _components = components;
1166 }
1167 
setComponent(const std::string & component)1168 void IDManifest::ChannelGroupManifest::setComponent(const std::string & component)
1169 {
1170     vector<string> components(1);
1171     components[0] = component;
1172     setComponents(components);
1173 }
1174 
1175 IDManifest::ChannelGroupManifest::ConstIterator
begin() const1176 IDManifest::ChannelGroupManifest::begin() const
1177 {
1178     return IDManifest::ChannelGroupManifest::ConstIterator(_table.begin());
1179 }
1180 
1181 IDManifest::ChannelGroupManifest::Iterator
begin()1182 IDManifest::ChannelGroupManifest::begin()
1183 {
1184   return IDManifest::ChannelGroupManifest::Iterator(_table.begin());
1185 }
1186 
1187 IDManifest::ChannelGroupManifest::ConstIterator
end() const1188 IDManifest::ChannelGroupManifest::end() const
1189 {
1190    return IDManifest::ChannelGroupManifest::ConstIterator(_table.end());
1191 }
1192 
end()1193 IDManifest::ChannelGroupManifest::Iterator IDManifest::ChannelGroupManifest::end()
1194 {
1195     return IDManifest::ChannelGroupManifest::Iterator(_table.end());
1196 }
1197 
1198 IDManifest::ChannelGroupManifest::ConstIterator
find(uint64_t idValue) const1199 IDManifest::ChannelGroupManifest::find(uint64_t idValue) const
1200 {
1201     return IDManifest::ChannelGroupManifest::ConstIterator(_table.find(idValue));
1202 }
1203 
1204 void
erase(uint64_t idValue)1205 IDManifest::ChannelGroupManifest::erase(uint64_t idValue)
1206 {
1207    _table.erase(idValue);
1208 }
1209 size_t
size() const1210 IDManifest::ChannelGroupManifest::size() const
1211 {
1212    return _table.size();
1213 }
1214 
1215 
1216 
find(uint64_t idValue)1217 IDManifest::ChannelGroupManifest::Iterator IDManifest::ChannelGroupManifest::find(uint64_t idValue)
1218 {
1219     return IDManifest::ChannelGroupManifest::Iterator(_table.find(idValue));
1220 }
1221 
operator [](uint64_t idValue)1222 std::vector< std::string >& IDManifest::ChannelGroupManifest::operator[](uint64_t idValue)
1223 {
1224      return _table[idValue];
1225 }
1226 
1227 
1228 
1229 IDManifest::ChannelGroupManifest::Iterator
insert(uint64_t idValue,const std::string & text)1230 IDManifest::ChannelGroupManifest::insert(uint64_t idValue, const std::string& text)
1231 {
1232     if(_components.size()!=1)
1233     {
1234         THROW (IEX_NAMESPACE::ArgExc, "Cannot insert single component attribute into manifest with multiple components");
1235     }
1236     vector<string> tempVector(1);
1237     tempVector[0] = text;
1238     return IDManifest::ChannelGroupManifest::Iterator(_table.insert(make_pair(idValue,tempVector)).first);
1239 }
1240 
1241 
1242 IDManifest::ChannelGroupManifest::Iterator
insert(uint64_t idValue,const std::vector<std::string> & text)1243 IDManifest::ChannelGroupManifest::insert(uint64_t idValue, const std::vector< std::string >& text)
1244 {
1245    if(_components.size()!=text.size())
1246    {
1247        THROW (IEX_NAMESPACE::ArgExc, "mismatch between number of components in manifest and number of components in inserted entry");
1248    }
1249     return IDManifest::ChannelGroupManifest::Iterator(_table.insert(make_pair(idValue,text)).first);
1250 }
1251 
1252 
1253 
1254 uint64_t
insert(const std::vector<std::string> & text)1255 IDManifest::ChannelGroupManifest::insert(const std::vector< std::string >& text)
1256 {
1257     uint64_t hash;
1258     if(_hashScheme == MURMURHASH3_32)
1259     {
1260         hash = MurmurHash32(text);
1261     }
1262     else if(_hashScheme == MURMURHASH3_64)
1263     {
1264         hash = MurmurHash64(text);
1265     }
1266     else
1267     {
1268          THROW (IEX_NAMESPACE::ArgExc, "Cannot compute hash: unknown hashing scheme");
1269     }
1270     insert(hash,text);
1271     return hash;
1272 
1273 }
1274 
1275 uint64_t
insert(const std::string & text)1276 IDManifest::ChannelGroupManifest::insert(const std::string & text)
1277 {
1278     uint64_t hash;
1279     if(_hashScheme == MURMURHASH3_32)
1280     {
1281         hash = MurmurHash32(text);
1282     }
1283     else if(_hashScheme == MURMURHASH3_64)
1284     {
1285         hash = MurmurHash64(text);
1286     }
1287     else
1288     {
1289          THROW (IEX_NAMESPACE::ArgExc, "Cannot compute hash: unknown hashing scheme");
1290     }
1291     insert(hash,text);
1292     return hash;
1293 }
1294 
1295 
1296 
1297 IDManifest::ChannelGroupManifest&
operator <<(uint64_t idValue)1298 IDManifest::ChannelGroupManifest::operator<<(uint64_t idValue)
1299 {
1300    if(_insertingEntry)
1301    {
1302         THROW (IEX_NAMESPACE::ArgExc,"not enough components inserted into previous entry in ID table before inserting new entry");
1303    }
1304 
1305    _insertionIterator = _table.insert( make_pair(idValue,std::vector<std::string>() )).first;
1306 
1307    //
1308    // flush out previous entry: reinserting an attribute overwrites previous entry
1309    //
1310    _insertionIterator->second.resize(0);
1311 
1312 
1313    //
1314    // curious edge-case: it's possible to have an ID table with no strings, just a list of IDs
1315    // There's little purpose to this, but it means that this entry is now 'complete'
1316    //
1317    if(_components.size()==0)
1318    {
1319        _insertingEntry = false;
1320    }
1321    else
1322    {
1323        _insertingEntry = true;
1324    }
1325      return *this;
1326 }
1327 
1328 IDManifest::ChannelGroupManifest&
operator <<(const std::string & text)1329 IDManifest::ChannelGroupManifest::operator<<(const std::string& text)
1330 {
1331     if(!_insertingEntry)
1332     {
1333         THROW (IEX_NAMESPACE::ArgExc,"attempt to insert too many strings into entry, or attempt to insert text before ID integer");
1334     }
1335     if(_insertionIterator->second.size() >= _components.size())
1336     {
1337         THROW (IEX_NAMESPACE::ArgExc,"Internal error: too many strings in component");
1338     }
1339     _insertionIterator->second.push_back(text);
1340 
1341     //
1342     // if the last component has been inserted, switch off insertingEntry, to mark all entries as complete
1343     //
1344     if(_insertionIterator->second.size() == _components.size())
1345     {
1346         _insertingEntry = false;
1347     }
1348     return *this;
1349 }
1350 
1351 
1352 bool
operator ==(const IDManifest::ChannelGroupManifest & other) const1353 IDManifest::ChannelGroupManifest::operator==(const IDManifest::ChannelGroupManifest& other) const
1354 {
1355     return (_lifeTime == other._lifeTime &&
1356            _components == other._components &&
1357            _hashScheme == other._hashScheme &&
1358            _components == other._components &&
1359            _table == other._table);
1360 
1361 }
1362 
1363 
1364 size_t
size() const1365 IDManifest::size() const
1366 {
1367   return _manifest.size();
1368 }
1369 
1370 size_t
find(const string & channel) const1371 IDManifest::find(const string& channel) const
1372 {
1373     // search the set of channels for each ChannelGroupManifest searching for
1374     // one that contains 'channel'
1375     for( size_t i = 0 ; i < _manifest.size() ; ++i )
1376     {
1377 
1378         if( _manifest[i].getChannels().find(channel) != _manifest[i].getChannels().end())
1379         {
1380             return i;
1381         }
1382     }
1383     //  not find, return size()
1384     return _manifest.size();
1385 
1386 }
1387 
1388 IDManifest::ChannelGroupManifest&
add(const set<string> & group)1389 IDManifest::add(const set< string >& group)
1390 {
1391    _manifest.push_back(ChannelGroupManifest());
1392    ChannelGroupManifest& mfst = _manifest.back();
1393    mfst._channels = group;
1394    return mfst;
1395 }
1396 
1397 IDManifest::ChannelGroupManifest&
add(const string & channel)1398 IDManifest::add(const string& channel)
1399 {
1400    _manifest.push_back(ChannelGroupManifest());
1401    ChannelGroupManifest& mfst = _manifest.back();
1402    mfst._channels.insert(channel);
1403    return mfst;
1404 }
1405 
1406 IDManifest::ChannelGroupManifest&
add(const IDManifest::ChannelGroupManifest & table)1407 IDManifest::add(const IDManifest::ChannelGroupManifest& table)
1408 {
1409    _manifest.push_back(table);
1410    return _manifest.back();
1411 }
1412 
operator [](size_t index)1413 IDManifest::ChannelGroupManifest& IDManifest::operator[](size_t index)
1414 {
1415    return _manifest[index];
1416 }
1417 
operator [](size_t index) const1418 const IDManifest::ChannelGroupManifest& IDManifest::operator[](size_t index) const
1419 {
1420   return _manifest[index];
1421 }
1422 
1423 
1424 
1425 
1426 
1427 namespace
1428 {
1429 
1430 //-----------------------------------------------------------------------------
1431 // MurmurHash3 was written by Austin Appleby, and is placed in the public
1432 // domain. The author hereby disclaims copyright to this source code.
1433 //
1434 // smhasher provides two different 128 bit hash schemes, optimised for either
1435 // 32 or 64 bit architectures. IDManifest uses only the 64 bit optimised version
1436 // of the 128 bit hash function to generate '64 bit hashes'
1437 //-----------------------------------------------------------------------------
1438 // Platform-specific functions and macros
1439 // Microsoft Visual Studio
1440 #if defined(_MSC_VER)
1441 #define FORCE_INLINE          __forceinline
1442 #define ROTL32(x,y) _rotl(x,y)
1443 #define ROTL64(x,y) _rotl64(x,y)
1444 #define BIG_CONSTANT(x) (x)
1445 // Other compilers
1446 #else     // defined(_MSC_VER)
1447 #define   FORCE_INLINE inline __attribute__((always_inline))
1448 inline uint32_t rotl32 ( uint32_t x, int8_t r )
1449 {
1450   return (x << r) | (x >> (32 - r));
1451 }
1452 inline uint64_t rotl64 ( uint64_t x, int8_t r )
1453 {
1454   return (x << r) | (x >> (64 - r));
1455 }
1456 #define   ROTL32(x,y)         rotl32(x,y)
1457 #define ROTL64(x,y) rotl64(x,y)
1458 #define BIG_CONSTANT(x) (x##LLU)
1459 #endif // !defined(_MSC_VER)
1460 //-----------------------------------------------------------------------------
1461 // Block read - if your platform needs to do endian-swapping or can only
1462 // handle aligned reads, do the conversion here
getblock32(const uint32_t * p,int i)1463 FORCE_INLINE uint32_t getblock32 ( const uint32_t * p, int i )
1464 {
1465   return p[i];
1466 
1467 }
getblock64(const uint64_t * p,int i)1468 FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i )
1469 {
1470   return p[i];
1471 }
1472 //-----------------------------------------------------------------------------
1473 // Finalization mix - force all bits of a hash block to avalanche
fmix32(uint32_t h)1474 FORCE_INLINE uint32_t fmix32 ( uint32_t h )
1475 {
1476   h ^= h >> 16;
1477   h *= 0x85ebca6b;
1478   h ^= h >> 13;
1479   h *= 0xc2b2ae35;
1480   h ^= h >> 16;
1481   return h;
1482 }
1483 //----------
fmix64(uint64_t k)1484 FORCE_INLINE uint64_t fmix64 ( uint64_t k )
1485 {
1486   k ^= k >> 33;
1487   k *= BIG_CONSTANT(0xff51afd7ed558ccd);
1488   k ^= k >> 33;
1489   k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
1490   k ^= k >> 33;
1491   return k;
1492 }
1493 //-----------------------------------------------------------------------------
MurmurHash3_x86_32(const void * key,int len,uint32_t seed,void * out)1494 void MurmurHash3_x86_32 ( const void * key, int len,
1495                           uint32_t seed, void * out )
1496 {
1497   const uint8_t * data = (const uint8_t*)key;
1498   const int nblocks = len / 4;
1499   uint32_t h1 = seed;
1500   const uint32_t c1 = 0xcc9e2d51;
1501   const uint32_t c2 = 0x1b873593;
1502   //----------
1503   // body
1504   const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
1505   for(int i = -nblocks; i; i++)
1506   {
1507     uint32_t k1 = getblock32(blocks,i);
1508     k1 *= c1;
1509     k1 = ROTL32(k1,15);
1510     k1 *= c2;
1511 
1512     h1 ^= k1;
1513     h1 = ROTL32(h1,13);
1514     h1 = h1*5+0xe6546b64;
1515   }
1516   //----------
1517   // tail
1518   const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
1519   uint32_t k1 = 0;
1520   switch(len & 3)
1521   {
1522   case 3: k1 ^= tail[2] << 16;
1523   case 2: k1 ^= tail[1] << 8;
1524   case 1: k1 ^= tail[0];
1525           k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
1526   };
1527   //----------
1528   // finalization
1529   h1 ^= len;
1530   h1 = fmix32(h1);
1531   *(uint32_t*)out = h1;
1532 }
1533 
1534 //-----------------------------------------------------------------------------
MurmurHash3_x64_128(const void * key,const int len,const uint32_t seed,void * out)1535 void MurmurHash3_x64_128 ( const void * key, const int len,
1536                            const uint32_t seed, void * out )
1537 {
1538   const uint8_t * data = (const uint8_t*)key;
1539   const int nblocks = len / 16;
1540   uint64_t h1 = seed;
1541   uint64_t h2 = seed;
1542   const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
1543   const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
1544   //----------
1545   // body
1546   const uint64_t * blocks = (const uint64_t *)(data);
1547   for(int i = 0; i < nblocks; i++)
1548   {
1549     uint64_t k1 = getblock64(blocks,i*2+0);
1550     uint64_t k2 = getblock64(blocks,i*2+1);
1551     k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
1552     h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
1553     k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
1554     h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
1555   }
1556   //----------
1557   // tail
1558   const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
1559   uint64_t k1 = 0;
1560   uint64_t k2 = 0;
1561   switch(len & 15)
1562   {
1563       case 15: k2 ^= ((uint64_t)tail[14]) << 48;
1564       case 14: k2 ^= ((uint64_t)tail[13]) << 40;
1565       case 13: k2 ^= ((uint64_t)tail[12]) << 32;
1566       case 12: k2 ^= ((uint64_t)tail[11]) << 24;
1567       case 11: k2 ^= ((uint64_t)tail[10]) << 16;
1568       case 10: k2 ^= ((uint64_t)tail[ 9]) << 8;
1569       case  9: k2 ^= ((uint64_t)tail[ 8]) << 0;
1570                k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
1571       case  8: k1 ^= ((uint64_t)tail[ 7]) << 56;
1572       case  7: k1 ^= ((uint64_t)tail[ 6]) << 48;
1573       case  6: k1 ^= ((uint64_t)tail[ 5]) << 40;
1574       case  5: k1 ^= ((uint64_t)tail[ 4]) << 32;
1575       case  4: k1 ^= ((uint64_t)tail[ 3]) << 24;
1576       case  3: k1 ^= ((uint64_t)tail[ 2]) << 16;
1577       case  2: k1 ^= ((uint64_t)tail[ 1]) << 8;
1578       case  1: k1 ^= ((uint64_t)tail[ 0]) << 0;
1579                k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
1580   };
1581   //----------
1582   // finalization
1583   h1 ^= len; h2 ^= len;
1584   h1 += h2;
1585   h2 += h1;
1586   h1 = fmix64(h1);
1587   h2 = fmix64(h2);
1588   h1 += h2;
1589   h2 += h1;
1590   ((uint64_t*)out)[0] = h1;
1591   ((uint64_t*)out)[1] = h2;
1592 }
1593 //-----------------------------------------------------------------------------
1594 
1595 //
1596 // combine the idStrings into a single string, separating each with a ; character
1597 // (use of the ; character is discouraged, though not prohibited)
1598 //
catString(const vector<string> & idString,std::string & str)1599 void catString(const vector< string >& idString, std::string & str)
1600 {
1601     str =idString[0];
1602     for(size_t i = 1 ; i < idString.size() ; ++i)
1603     {
1604         str+=";";
1605         str+=idString[i];
1606     }
1607 
1608 }
1609 }
1610 
MurmurHash32(const std::string & idString)1611 unsigned int IDManifest::MurmurHash32(const std::string & idString)
1612 {
1613     unsigned int out;
1614     MurmurHash3_x86_32(idString.c_str(),idString.size(),0 , (void*) &out);
1615     return out;
1616 }
1617 
MurmurHash64(const std::string & idString)1618 uint64_t IDManifest::MurmurHash64(const std::string& idString)
1619 {
1620 
1621     uint64_t out[2];
1622     MurmurHash3_x64_128(idString.c_str(),idString.size(),0 , out);
1623     return out[0];
1624 }
1625 
MurmurHash32(const vector<string> & idString)1626 unsigned int IDManifest::MurmurHash32(const vector< string >& idString)
1627 {
1628     if(idString.size()==0)
1629     {
1630         return 0;
1631     }
1632     std::string str;
1633     catString(idString,str);
1634     return MurmurHash32(str);
1635 }
1636 
1637 
1638 
1639 
1640 
MurmurHash64(const vector<string> & idString)1641 uint64_t IDManifest::MurmurHash64(const vector< string >& idString)
1642 {
1643     if(idString.size()==0)
1644     {
1645         return 0;
1646     }
1647     std::string str;
1648     catString(idString,str);
1649     return MurmurHash64(str);
1650 }
1651 
1652 
1653 
1654 OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_EXIT
1655