1 /* Copyright (C) 2014 InfiniDB, Inc.
2 
3    This program is free software; you can redistribute it and/or
4    modify it under the terms of the GNU General Public License
5    as published by the Free Software Foundation; version 2 of
6    the License.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software
15    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
16    MA 02110-1301, USA. */
17 
18 /******************************************************************************
19  * $Id: elementcompression.h 9655 2013-06-25 23:08:13Z xlou $
20  *
21  *****************************************************************************/
22 
23 /** @file */
24 
25 #ifndef ELEMENTCOMPRESSION_H_
26 #define ELEMENTCOMPRESSION_H_
27 
28 #include <fstream>
29 #include <stdexcept>
30 #include <stdint.h>
31 #include <vector>
32 
33 #include "elementtype.h"
34 
35 #ifndef __GNUC__
36 #  ifndef __attribute__
37 #    define __attribute__(x)
38 #  endif
39 #endif
40 
41 namespace joblist
42 {
43 
44 //------------------------------------------------------------------------------
45 // Structs used for storing compressed element types externally to disk.
46 // Note the use of "packed" to keep the compiler from padding the struct.
47 // This is done so that a vector of these element types will be contiguous,
48 // and can thus be read and written with a single block read/write.
49 //------------------------------------------------------------------------------
50 struct CompElement64Rid32Val
51 {
52     uint64_t first;
53     uint32_t second;
54 } __attribute__((__packed__));
55 
56 struct CompElement32Rid64Val
57 {
58     uint32_t first;
59     uint64_t second;
60 } __attribute__((__packed__));
61 
62 struct CompElement32Rid32Val
63 {
64     uint32_t first;
65     uint32_t second;
66 };
67 
68 struct CompElement32RidOnly
69 {
70     uint32_t first;
71 };
72 
73 //------------------------------------------------------------------------------
74 /** @brief Utilities to compress/expand various element type datalists.
75  *
76  * Purpose of utiltiies is to compress elementtypes as they are saved to disk,
77  * and conversely, to expand them when they are read back from disk.
78  */
79 //------------------------------------------------------------------------------
80 class ElementCompression
81 {
82 public:
83     //
84     //...Utilities to compress from 64 bit to 32 bit for RID and/or Value
85     //
86     template <typename DestType>
87     static void compress(
88         std::vector<ElementType>&             vIn,
89         std::vector<DestType>&                vOut);
compress(std::vector<ElementType> & vIn,std::vector<CompElement32RidOnly> & vOut)90     static void compress(
91         std::vector<ElementType>&             vIn,
92         std::vector<CompElement32RidOnly>&    vOut)
93     {
94         throw std::logic_error(
95             "Compression of ElementType to 32RidOnly not supported");
96     }
97 
compress(std::vector<StringElementType> & vIn,std::vector<CompElement64Rid32Val> & vOut)98     static void compress(
99         std::vector<StringElementType>&       vIn,
100         std::vector<CompElement64Rid32Val>&   vOut)
101     {
102         throw std::logic_error(
103             "Compression of StringElementType to 64/32 not supported");
104     }
compress(std::vector<StringElementType> & vIn,std::vector<CompElement32Rid64Val> & vOut)105     static void compress(
106         std::vector<StringElementType>&       vIn,
107         std::vector<CompElement32Rid64Val>&   vOut)
108     {
109         throw std::logic_error(
110             "Compression of StringElementType to 32/64 not supported");
111     }
compress(std::vector<StringElementType> & vIn,std::vector<CompElement32Rid32Val> & vOut)112     static void compress(
113         std::vector<StringElementType>&       vIn,
114         std::vector<CompElement32Rid32Val>&   vOut)
115     {
116         throw std::logic_error(
117             "Compression of StringElementType to 32/32 not supported");
118     }
compress(std::vector<StringElementType> & vIn,std::vector<CompElement32RidOnly> & vOut)119     static void compress(
120         std::vector<StringElementType>&       vIn,
121         std::vector<CompElement32RidOnly>&    vOut)
122     {
123         throw std::logic_error(
124             "Compression of StringElementType to 32RidOnly not supported");
125     }
126 
compress(std::vector<DoubleElementType> & vIn,std::vector<CompElement64Rid32Val> & vOut)127     static void compress(
128         std::vector<DoubleElementType>&       vIn,
129         std::vector<CompElement64Rid32Val>&   vOut)
130     {
131         throw std::logic_error(
132             "Compression of DoubleElementType to 64/32 not supported");
133     }
compress(std::vector<DoubleElementType> & vIn,std::vector<CompElement32Rid64Val> & vOut)134     static void compress(
135         std::vector<DoubleElementType>&       vIn,
136         std::vector<CompElement32Rid64Val>&   vOut)
137     {
138         throw std::logic_error(
139             "Compression of DoubleElementType to 32/64 not supported");
140     }
compress(std::vector<DoubleElementType> & vIn,std::vector<CompElement32Rid32Val> & vOut)141     static void compress(
142         std::vector<DoubleElementType>&       vIn,
143         std::vector<CompElement32Rid32Val>&   vOut)
144     {
145         throw std::logic_error(
146             "Compression of DoubleElementType to 32/32 not supported");
147     }
compress(std::vector<DoubleElementType> & vIn,std::vector<CompElement32RidOnly> & vOut)148     static void compress(
149         std::vector<DoubleElementType>&       vIn,
150         std::vector<CompElement32RidOnly>&    vOut)
151     {
152         throw std::logic_error(
153             "Compression of DoubleElementType to 32RidOnly not supported");
154     }
155 
compress(std::vector<RIDElementType> & vIn,std::vector<CompElement64Rid32Val> & vOut)156     static void compress(
157         std::vector<RIDElementType>&          vIn,
158         std::vector<CompElement64Rid32Val>&   vOut)
159     {
160         throw std::logic_error(
161             "Compression of RIDElementType to 64/32 not supported");
162     }
compress(std::vector<RIDElementType> & vIn,std::vector<CompElement32Rid64Val> & vOut)163     static void compress(
164         std::vector<RIDElementType>&          vIn,
165         std::vector<CompElement32Rid64Val>&   vOut)
166     {
167         throw std::logic_error(
168             "Compression of RIDElementType to 32/64 not supported");
169     }
compress(std::vector<RIDElementType> & vIn,std::vector<CompElement32Rid32Val> & vOut)170     static void compress(
171         std::vector<RIDElementType>&          vIn,
172         std::vector<CompElement32Rid32Val>&   vOut)
173     {
174         throw std::logic_error(
175             "Compression of RIDElementType to 32/32 not supported");
176     }
177     static void compress(
178         std::vector<RIDElementType>&          vIn,
179         std::vector<CompElement32RidOnly>&    vOut);
180 
compress(std::vector<TupleType> & vIn,std::vector<CompElement64Rid32Val> & vOut)181     static void compress(
182         std::vector<TupleType>&          vIn,
183         std::vector<CompElement64Rid32Val>&   vOut)
184     {
185         throw std::logic_error(
186             "Compression of TupleType to 64/32 not supported");
187     }
compress(std::vector<TupleType> & vIn,std::vector<CompElement32Rid64Val> & vOut)188     static void compress(
189         std::vector<TupleType>&          vIn,
190         std::vector<CompElement32Rid64Val>&   vOut)
191     {
192         throw std::logic_error(
193             "Compression of TupleType to 32/64 not supported");
194     }
compress(std::vector<TupleType> & vIn,std::vector<CompElement32Rid32Val> & vOut)195     static void compress(
196         std::vector<TupleType>&          vIn,
197         std::vector<CompElement32Rid32Val>&   vOut)
198     {
199         throw std::logic_error(
200             "Compression of TupleType to 32/32 not supported");
201     }
compress(std::vector<TupleType> & vIn,std::vector<CompElement32RidOnly> & vOut)202     static void compress(
203         std::vector<TupleType>&          vIn,
204         std::vector<CompElement32RidOnly>&    vOut)
205     {
206         throw std::logic_error(
207             "Compression of TupleType to 32/32 not supported");
208     }
209 
210     //
211     //...Utilities to expand from 32 bit to 64 bit for RID and/or Value
212     //
213     template <typename SrcType>
214     static void expand(
215         std::vector<SrcType>&                 vIn,
216         ElementType*                          vOut);
expand(std::vector<CompElement32RidOnly> & vIn,ElementType * vOut)217     static void expand(
218         std::vector<CompElement32RidOnly>&    vIn,
219         ElementType*                          vOut)
220     {
221         throw std::logic_error(
222             "Expansion to ElementType from 32RidOnly not supported");
223     }
224 
expand(std::vector<CompElement64Rid32Val> & vIn,StringElementType * vOut)225     static void expand(
226         std::vector<CompElement64Rid32Val>&   vIn,
227         StringElementType*                    vOut)
228     {
229         throw std::logic_error(
230             "Expansion to StringElementType from 64/32 not supported");
231     }
expand(std::vector<CompElement32Rid64Val> & vIn,StringElementType * vOut)232     static void expand(
233         std::vector<CompElement32Rid64Val>&   vIn,
234         StringElementType*                    vOut)
235     {
236         throw std::logic_error(
237             "Expansion to StringElementType from 32/64 not supported");
238     }
expand(std::vector<CompElement32Rid32Val> & vIn,StringElementType * vOut)239     static void expand(
240         std::vector<CompElement32Rid32Val>&   vIn,
241         StringElementType*                    vOut)
242     {
243         throw std::logic_error(
244             "Expansion to StringElementType from 32/32 not supported");
245     }
expand(std::vector<CompElement32RidOnly> & vIn,StringElementType * vOut)246     static void expand(
247         std::vector<CompElement32RidOnly>&    vIn,
248         StringElementType*                    vOut)
249     {
250         throw std::logic_error(
251             "Expansion to StringElementType from 32RidOnly not supported");
252     }
253 
expand(std::vector<CompElement64Rid32Val> & vIn,DoubleElementType * vOut)254     static void expand(
255         std::vector<CompElement64Rid32Val>&   vIn,
256         DoubleElementType*                    vOut)
257     {
258         throw std::logic_error(
259             "Expansion to DoubleElementType from 64/32 not supported");
260     }
expand(std::vector<CompElement32Rid64Val> & vIn,DoubleElementType * vOut)261     static void expand(
262         std::vector<CompElement32Rid64Val>&   vIn,
263         DoubleElementType*                    vOut)
264     {
265         throw std::logic_error(
266             "Expansion to DoubleElementType from 32/64 not supported");
267     }
expand(std::vector<CompElement32Rid32Val> & vIn,DoubleElementType * vOut)268     static void expand(
269         std::vector<CompElement32Rid32Val>&   vIn,
270         DoubleElementType*                    vOut)
271     {
272         throw std::logic_error(
273             "Expansion to DoubleElementType from 32/32 not supported");
274     }
expand(std::vector<CompElement32RidOnly> & vIn,DoubleElementType * vOut)275     static void expand(
276         std::vector<CompElement32RidOnly>&    vIn,
277         DoubleElementType*                    vOut)
278     {
279         throw std::logic_error(
280             "Expansion to DoubleElementType from 32RidOnly not supported");
281     }
282 
expand(std::vector<CompElement64Rid32Val> & vIn,RIDElementType * vOut)283     static void expand(
284         std::vector<CompElement64Rid32Val>&   vIn,
285         RIDElementType*                       vOut)
286     {
287         throw std::logic_error(
288             "Expansion to RIDElementType from 64/32 not supported");
289     }
expand(std::vector<CompElement32Rid64Val> & vIn,RIDElementType * vOut)290     static void expand(
291         std::vector<CompElement32Rid64Val>&   vIn,
292         RIDElementType*                       vOut)
293     {
294         throw std::logic_error(
295             "Expansion to RIDElementType from 32/64 not supported");
296     }
expand(std::vector<CompElement32Rid32Val> & vIn,RIDElementType * vOut)297     static void expand(
298         std::vector<CompElement32Rid32Val>&   vIn,
299         RIDElementType*                       vOut)
300     {
301         throw std::logic_error(
302             "Expansion to RIDElementType from 32/32 not supported");
303     }
304     static void expand(
305         std::vector<CompElement32RidOnly>&    vIn,
306         RIDElementType*                       vOut);
307 
expand(std::vector<CompElement64Rid32Val> & vIn,TupleType * vOut)308     static void expand(
309         std::vector<CompElement64Rid32Val>&   vIn,
310         TupleType*                       vOut)
311     {
312         throw std::logic_error(
313             "Expansion to TupleType from 64/32 not supported");
314     }
expand(std::vector<CompElement32Rid64Val> & vIn,TupleType * vOut)315     static void expand(
316         std::vector<CompElement32Rid64Val>&   vIn,
317         TupleType*                       vOut)
318     {
319         throw std::logic_error(
320             "Expansion to TupleType from 32/64 not supported");
321     }
expand(std::vector<CompElement32Rid32Val> & vIn,TupleType * vOut)322     static void expand(
323         std::vector<CompElement32Rid32Val>&   vIn,
324         TupleType*                       vOut)
325     {
326         throw std::logic_error(
327             "Expansion to TupleType from 32/32 not supported");
328     }
expand(std::vector<CompElement32RidOnly> & vIn,TupleType * vOut)329     static void expand(
330         std::vector<CompElement32RidOnly>&    vIn,
331         TupleType*                       vOut)
332     {
333         throw std::logic_error(
334             "Expansion to TupleType from 32/32 not supported");
335     }
336 
337     //
338     //...Utilities to write a single element with a compressed 32 bit RID.
339     //
340     static void writeWith32Rid(
341         const ElementType& e,
342         std::fstream&      fFile);
writeWith32Rid(const DoubleElementType & e,std::fstream & fFile)343     static void writeWith32Rid(
344         const DoubleElementType& e,
345         std::fstream&            fFile)
346     {
347         throw std::logic_error(
348             "Compress/Write of 32 RID DoubleElementType not supported");
349     }
350     static void writeWith32Rid(
351         const StringElementType& e,
352         std::fstream&            fFile);
353     static void writeWith32Rid(
354         const RIDElementType& e,
355         std::fstream&         fFile);
writeWith32Rid(const TupleType & e,std::fstream & fFile)356     static void writeWith32Rid(
357         const TupleType& e,
358         std::fstream&         fFile)
359     {
360         throw std::logic_error(
361             "Compress/Write of 32 RID TupleType not supported");
362     }
363 
364     //
365     //...Utilities to read a single element with a compressed 32 bit RID.
366     //
367     static void readWith32Rid(
368         ElementType&  e,
369         std::fstream& fFile);
readWith32Rid(DoubleElementType & e,std::fstream & fFile)370     static void readWith32Rid(
371         DoubleElementType& e,
372         std::fstream&      fFile)
373     {
374         throw std::logic_error(
375             "Read/Expand of 32 RID DoubleElementType not supported");
376     }
377     static void readWith32Rid(
378         StringElementType& e,
379         std::fstream&      fFile);
380     static void readWith32Rid(
381         RIDElementType& e,
382         std::fstream&   fFile);
readWith32Rid(TupleType & e,std::fstream & fFile)383     static void readWith32Rid(
384         TupleType& e,
385         std::fstream&   fFile)
386     {
387         throw std::logic_error(
388             "Read/Expand of 32 RID TupleType not supported");
389     }
390 };
391 
392 //------------------------------------------------------------------------------
393 // Inline utilities to compress from 64 bit to 32 bit for RID and/or Value
394 //------------------------------------------------------------------------------
395 
396 //
397 //...Compress RID/Value to 64 bit RID, 32 bit value
398 //...Compress RID/Value to 32 bit RID, 64 bit value
399 //...Compress RID/Value to 32 bit RID, 32 bit value
400 //
401 template <typename DestType>
402 /* static */ inline void
compress(std::vector<ElementType> & vIn,std::vector<DestType> & vOut)403 ElementCompression::compress(
404     std::vector<ElementType>&          vIn,
405     std::vector<DestType>&             vOut)
406 {
407     uint64_t count = vIn.size();
408 
409     for (unsigned int i = 0; i < count; i++)
410     {
411         vOut[i].first = vIn[i].first;
412         vOut[i].second = vIn[i].second;
413     }
414 }
415 
416 //
417 //...Compress RID only to 32 bit RID
418 //...This method has to be defined, rather than relying on the template ver-
419 //...sion of compress() because this version does not access or copy the
420 //..."second" data member.
421 //
422 /* static */ inline void
compress(std::vector<RIDElementType> & vIn,std::vector<CompElement32RidOnly> & vOut)423 ElementCompression::compress(
424     std::vector<RIDElementType>&       vIn,
425     std::vector<CompElement32RidOnly>& vOut)
426 {
427     uint64_t count = vIn.size();
428 
429     for (unsigned int i = 0; i < count; i++)
430     {
431         vOut[i].first = vIn[i].first;
432     }
433 }
434 
435 //------------------------------------------------------------------------------
436 // Inline utilities to expand from 32 bit to 64 bit for RID and/or Value
437 //------------------------------------------------------------------------------
438 
439 //
440 //...Expand RID/Value from 64 bit RID, 32 bit value
441 //...Expand RID/Value from 32 bit RID, 64 bit value
442 //...Expand RID/Value from 32 bit RID, 32 bit value
443 //
444 template <typename SrcType>
445 /* static */ inline void
expand(std::vector<SrcType> & vIn,ElementType * vOut)446 ElementCompression::expand(
447     std::vector<SrcType>&              vIn,
448     ElementType*                       vOut)
449 {
450     uint64_t count = vIn.size();
451 
452     for (unsigned int i = 0; i < count; i++)
453     {
454         vOut[i].first = vIn[i].first;
455         vOut[i].second = vIn[i].second;
456     }
457 }
458 
459 //
460 //...Expand RID only from 32 bit RID
461 //...This method has to be defined, rather than relying on the template ver-
462 //...sion of expand() because this version does not access or copy the
463 //..."second" data member.
464 //
465 /* static */ inline void
expand(std::vector<CompElement32RidOnly> & vIn,RIDElementType * vOut)466 ElementCompression::expand(
467     std::vector<CompElement32RidOnly>& vIn,
468     RIDElementType*                    vOut)
469 {
470     uint64_t count = vIn.size();
471 
472     for (unsigned int i = 0; i < count; i++)
473     {
474         vOut[i].first = vIn[i].first;
475     }
476 }
477 
478 //------------------------------------------------------------------------------
479 // Inline utilities to write a single element with a compressed 32 bit RID.
480 //------------------------------------------------------------------------------
481 
482 /* static */ inline void
writeWith32Rid(const ElementType & e,std::fstream & fFile)483 ElementCompression::writeWith32Rid(
484     const ElementType& e,
485     std::fstream&      fFile)
486 {
487     CompElement32Rid64Val eCompressed;
488     eCompressed.first  = e.first;
489     eCompressed.second = e.second;
490     fFile.write((char*) &eCompressed, sizeof(CompElement32Rid64Val));
491 }
492 
493 /* static */ inline void
writeWith32Rid(const StringElementType & e,std::fstream & fFile)494 ElementCompression::writeWith32Rid(
495     const StringElementType& e,
496     std::fstream&            fFile)
497 {
498     uint32_t rid  = e.first;
499     uint16_t dlen = e.second.length();
500 
501     fFile.write((char*)&rid, sizeof(rid) );
502     fFile.write((char*)&dlen, sizeof(dlen));
503     fFile.write( e.second.c_str(),  dlen );
504 }
505 
506 /* static */ inline void
writeWith32Rid(const RIDElementType & e,std::fstream & fFile)507 ElementCompression::writeWith32Rid(
508     const RIDElementType& e,
509     std::fstream&         fFile)
510 {
511     CompElement32RidOnly eCompressed;
512     eCompressed.first  = e.first;
513     fFile.write((char*) &eCompressed, sizeof(CompElement32RidOnly));
514 }
515 
516 //------------------------------------------------------------------------------
517 // Inline utilities to read a single element with a compressed 32 bit RID.
518 //------------------------------------------------------------------------------
519 
520 /* static */ inline void
readWith32Rid(ElementType & e,std::fstream & fFile)521 ElementCompression::readWith32Rid(
522     ElementType&  e,
523     std::fstream& fFile)
524 {
525     CompElement32Rid64Val eCompressed;
526     fFile.read((char*) &eCompressed, sizeof(CompElement32Rid64Val));
527     e.first  = eCompressed.first;
528     e.second = eCompressed.second;
529 }
530 
531 /* static */ inline void
readWith32Rid(StringElementType & e,std::fstream & fFile)532 ElementCompression::readWith32Rid(
533     StringElementType& e,
534     std::fstream&      fFile)
535 {
536     uint32_t rid  = 0;
537     uint16_t dlen = 0;
538     char d[32768];
539 
540     fFile.read((char*)&rid, sizeof(rid) );
541     fFile.read((char*)&dlen, sizeof(dlen));
542     fFile.read( d,  dlen );
543 
544     e.first  = rid;
545     e.second.assign(d, dlen);
546 }
547 
548 /* static */ inline void
readWith32Rid(RIDElementType & e,std::fstream & fFile)549 ElementCompression::readWith32Rid(
550     RIDElementType& e,
551     std::fstream&   fFile)
552 {
553     CompElement32RidOnly eCompressed;
554     fFile.read((char*) &eCompressed, sizeof(CompElement32RidOnly));
555     e.first  = eCompressed.first;
556 }
557 
558 } // end of joblist namespace
559 
560 #endif
561