1 /*
2    Copyright (C) 2014 InfiniDB, Inc.
3    Copyright (c) 2019 MariaDB Corporation
4 
5    This program is free software; you can redistribute it and/or
6    modify it under the terms of the GNU General Public License
7    as published by the Free Software Foundation; version 2 of
8    the License.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software
17    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
18    MA 02110-1301, USA.
19 */
20 
21 //
22 // C++ Interface: rowgroup
23 //
24 // Description:
25 //
26 // Author: Patrick LeBlanc <pleblanc@calpont.com>, (C) 2008
27 
28 #ifndef ROWGROUP_H_
29 #define ROWGROUP_H_
30 
31 #include <vector>
32 #include <string>
33 #include <stdexcept>
34 //#define NDEBUG
35 #include <cassert>
36 #include <boost/shared_ptr.hpp>
37 #include <boost/shared_array.hpp>
38 #include <boost/thread/mutex.hpp>
39 #include <cmath>
40 #include <cfloat>
41 #ifdef __linux__
42 #include <execinfo.h>
43 #endif
44 
45 #if defined(_MSC_VER) && !defined(isnan)
46 #define isnan _isnan
47 #endif
48 
49 #include "hasher.h"
50 
51 #include "joblisttypes.h"
52 #include "bytestream.h"
53 #include "calpontsystemcatalog.h"
54 #include "exceptclasses.h"
55 #include "mcsv1_udaf.h"
56 
57 #include "branchpred.h"
58 
59 #include "../winport/winport.h"
60 
61 #include "collation.h"
62 #include "common/hashfamily.h"
63 
64 
65 // Workaround for my_global.h #define of isnan(X) causing a std::std namespace
66 
67 namespace rowgroup
68 {
69 
70 const int16_t rgCommonSize = 8192;
71 
72 /*
73     The RowGroup family of classes encapsulate the data moved through the
74     system.
75 
76      - RowGroup specifies the format of the data primarily (+ some other metadata),
77      - RGData (aka RowGroup Data) encapsulates the data,
78      - Row is used to extract fields from the data and iterate.
79 
80     JobListFactory instantiates the RowGroups to be used by each stage of processing.
81     RGDatas are passed between stages, and their RowGroup instances are used
82     to interpret them.
83 
84     Historically, row data was just a chunk of contiguous memory, a uint8_t *.
85     Every field had a fixed width, which allowed for quick offset
86     calculation when assigning or retrieving individual fields.  That worked
87     well for a few years, but at some point it became common to declare
88     all strings as max-length, and to manipulate them in queries.
89 
90     Having fixed-width fields, even for strings, required an unreasonable
91     amount of memory.  RGData & StringStore were introduced to handle strings
92     more efficiently, at least with respect to memory.  The row data would
93     still be a uint8_t *, and columns would be fixed-width, but string fields
94     above a certain width would contain a 'Pointer' that referenced a string in
95     StringStore.  Strings are stored efficiently in StringStore, so there is
96     no longer wasted space.
97 
98     StringStore comes with a different inefficiency however.  When a value
99     is overwritten, the original string cannot be freed independently of the
100     others, so it continues to use space.  If values are only set once, as is
101     the typical case, then StringStore is efficient.  When it is necessary
102     to overwrite string fields, it is possible to configure these classes
103     to use the original data format so that old string fields do not accumulate
104     in memory.  Of course, be careful, because blobs and text fields in CS are
105     declared as 2GB strings!
106 
107     A single RGData contains up to one 'logical block' worth of data,
108     which is 8192 rows.  One RGData is usually treated as one unit of work by
109     PrimProc and the JobSteps, but the rows an RGData contains and how many are
110     treated as a work unit depend on the operation being done.
111 
112     For example, PrimProc works in units of 8192 contiguous rows
113     that come from disk.  If half of the rows were filtered out, then the
114     RGData it passes to the next stage would only contain 4096 rows.
115 
116     Others build results incrementally before passing them along, such as
117     group-by.  If one group contains 11111 values, then group-by will
118     return 2 RGDatas for that group, one with 8192 rows, and one with 2919.
119 
120     Note: There is no synchronization in any of these classes for obvious
121     performance reasons.  Likewise, although it's technically safe for many
122     readers to access an RGData simultaneously, that would not be an
123     efficient thing to do.  Try to stick to designs where a single RGData
124     is used by a single thread at a time.
125 */
126 
127 // VS'08 carps that struct MemChunk is not default copyable because of the zero-length array.
128 // This may be so, and we'll get link errors if someone trys, but so far no one has.
129 #ifdef _MSC_VER
130 #pragma warning (push)
131 #pragma warning (disable : 4200)
132 #endif
133 
134 class StringStore
135 {
136 public:
137     StringStore();
138     virtual ~StringStore();
139 
140     inline std::string getString(uint64_t offset) const;
141     uint64_t storeString(const uint8_t* data, uint32_t length);  //returns the offset
142     inline const uint8_t* getPointer(uint64_t offset) const;
143     inline uint32_t getStringLength(uint64_t offset) const;
getConstString(uint64_t offset)144     inline utils::ConstString getConstString(uint64_t offset) const
145     {
146       return utils::ConstString((const char *) getPointer(offset),
147                                 getStringLength(offset));
148     }
149     inline bool isEmpty() const;
150     inline uint64_t getSize() const;
151     inline bool isNullValue(uint64_t offset) const;
152     bool equals(const std::string& str, uint64_t offset, CHARSET_INFO* cs) const;
153 
154     void clear();
155 
156     void serialize(messageqcpp::ByteStream&) const;
157     void deserialize(messageqcpp::ByteStream&);
158 
159     //@bug6065, make StringStore::storeString() thread safe
useStoreStringMutex(bool b)160     void useStoreStringMutex(bool b)
161     {
162         fUseStoreStringMutex = b;
163     }
useStoreStringMutex()164     bool useStoreStringMutex() const
165     {
166         return fUseStoreStringMutex;
167     }
168 
169 private:
170     std::string empty_str;
171 
172     StringStore(const StringStore&);
173     StringStore& operator=(const StringStore&);
174     static const uint32_t CHUNK_SIZE = 64 * 1024;  // allocators like powers of 2
175 
176     // This is an overlay b/c the underlying data needs to be any size,
177     // and alloc'd in one chunk.  data can't be a separate dynamic chunk.
178     struct MemChunk
179     {
180         uint32_t currentSize;
181         uint32_t capacity;
182         uint8_t data[];
183     };
184 
185     std::vector<boost::shared_array<uint8_t> > mem;
186 
187     // To store strings > 64KB (BLOB/TEXT)
188     std::vector<boost::shared_array<uint8_t> > longStrings;
189     bool empty;
190     bool fUseStoreStringMutex; //@bug6065, make StringStore::storeString() thread safe
191     boost::mutex fMutex;
192 };
193 
194 // Where we store user data for UDA(n)F
195 class UserDataStore
196 {
197     // length represents the fixed portion length of userData.
198     // There may be variable length data in containers or other
199     // user created structures.
200     struct StoreData
201     {
202         int32_t length;
203         std::string functionName;
204         boost::shared_ptr<mcsv1sdk::UserData> userData;
StoreDataStoreData205         StoreData() : length(0) { }
StoreDataStoreData206         StoreData(const StoreData& rhs)
207         {
208             length = rhs.length;
209             functionName = rhs.functionName;
210             userData = rhs.userData;
211         }
212     };
213 
214 public:
215     UserDataStore();
216     virtual ~UserDataStore();
217 
218     void serialize(messageqcpp::ByteStream&) const;
219     void deserialize(messageqcpp::ByteStream&);
220 
221     //Set to make UserDataStore thread safe
useUserDataMutex(bool b)222     void useUserDataMutex(bool b)
223     {
224         fUseUserDataMutex = b;
225     }
useUserDataMutex()226     bool useUserDataMutex() const
227     {
228         return fUseUserDataMutex;
229     }
230 
231     // Returns the offset
232     uint32_t storeUserData(mcsv1sdk::mcsv1Context& context,
233                            boost::shared_ptr<mcsv1sdk::UserData> data,
234                            uint32_t length);
235 
236     boost::shared_ptr<mcsv1sdk::UserData>  getUserData(uint32_t offset) const;
237 
238 private:
239     UserDataStore(const UserDataStore&);
240     UserDataStore& operator=(const UserDataStore&);
241 
242     std::vector<StoreData> vStoreData;
243 
244     bool fUseUserDataMutex;
245     boost::mutex fMutex;
246 };
247 
248 #ifdef _MSC_VER
249 #pragma warning (pop)
250 #endif
251 
252 class RowGroup;
253 class Row;
254 
255 /* TODO: OO the rowgroup data to the extent there's no measurable performance hit. */
256 class RGData
257 {
258 public:
259     RGData();   // useless unless followed by an = or a deserialize operation
260     RGData(const RowGroup& rg, uint32_t rowCount);   // allocates memory for rowData
261     explicit RGData(const RowGroup& rg);
262     RGData(const RGData&);
263     virtual ~RGData();
264 
265     inline RGData& operator=(const RGData&);
266 
267     // amount should be the # returned by RowGroup::getDataSize()
268     void serialize(messageqcpp::ByteStream&, uint32_t amount) const;
269 
270     // the 'hasLengthField' is there b/c PM aggregation (and possibly others) currently sends
271     // inline data with a length field.  Once that's converted to string table format, that
272     // option can go away.
273     void deserialize(messageqcpp::ByteStream&, uint32_t amount = 0); // returns the # of bytes read
274 
275     inline uint64_t getStringTableMemUsage();
276     void clear();
277     void reinit(const RowGroup& rg);
278     void reinit(const RowGroup& rg, uint32_t rowCount);
setStringStore(boost::shared_ptr<StringStore> & ss)279     inline void setStringStore(boost::shared_ptr<StringStore>& ss)
280     {
281         strings = ss;
282     }
283 
284     // this will use the pre-configured Row to figure out where row # num is, then set the Row
285     // to point to it.  It's a shortcut around using a RowGroup to do the same thing for cases
286     // where it's inconvenient to instantiate one.
287     inline void getRow(uint32_t num, Row* row);
288 
289     //@bug6065, make StringStore::storeString() thread safe
useStoreStringMutex(bool b)290     void useStoreStringMutex(bool b)
291     {
292         if (strings) strings->useStoreStringMutex(b);
293     }
useStoreStringMutex()294     bool useStoreStringMutex() const
295     {
296         return (strings ? (strings->useStoreStringMutex()) : false);
297     }
298 
299     UserDataStore* getUserDataStore();
300     // make UserDataStore::storeData() thread safe
useUserDataMutex(bool b)301     void useUserDataMutex(bool b)
302     {
303         if (userDataStore) userDataStore->useUserDataMutex(b);
304     }
useUserDataMutex()305     bool useUserDataMutex() const
306     {
307         return (userDataStore ? (userDataStore->useUserDataMutex()) : false);
308     }
309 
310     boost::shared_array<uint8_t> rowData;
311     boost::shared_ptr<StringStore> strings;
312     boost::shared_ptr<UserDataStore> userDataStore;
313 private:
314     //boost::shared_array<uint8_t> rowData;
315     //boost::shared_ptr<StringStore> strings;
316 
317     // Need sig to support backward compat.  RGData can deserialize both forms.
318     static const uint32_t RGDATA_SIG = 0xffffffff;  //won't happen for 'old' Rowgroup data
319 
320     friend class RowGroup;
321 };
322 
323 
324 class Row
325 {
326 public:
327     struct Pointer
328     {
PointerPointer329         inline Pointer() : data(NULL), strings(NULL), userDataStore(NULL) { }
330 
331         // Pointer(uint8_t*) implicitly makes old code compatible with the string table impl;
PointerPointer332         inline Pointer(uint8_t* d) : data(d), strings(NULL), userDataStore(NULL) { }
PointerPointer333         inline Pointer(uint8_t* d, StringStore* s) : data(d), strings(s), userDataStore(NULL) { }
PointerPointer334         inline Pointer(uint8_t* d, StringStore* s, UserDataStore* u) :
335             data(d), strings(s), userDataStore(u) { }
336         uint8_t* data;
337         StringStore* strings;
338         UserDataStore* userDataStore;
339     };
340 
341     Row();
342     Row(const Row&);
343     ~Row();
344 
345     Row& operator=(const Row&);
346     bool operator==(const Row&) const;
347 
348     //void setData(uint8_t *rowData, StringStore *ss);
349     inline void setData(const Pointer&);    // convenience fcn, can go away
350     inline uint8_t* getData() const;
351 
352     inline void setPointer(const Pointer&);
353     inline Pointer getPointer() const;
354 
355     inline void nextRow();
356     inline uint32_t getColumnWidth(uint32_t colIndex) const;
357     inline uint32_t getColumnCount() const;
358     inline uint32_t getSize() const;		// this is only accurate if there is no string table
359     // if a string table is being used, getRealSize() takes into account variable-length strings
360     inline uint32_t getRealSize() const;
361     inline uint32_t getOffset(uint32_t colIndex) const;
362     inline uint32_t getScale(uint32_t colIndex) const;
363     inline uint32_t getPrecision(uint32_t colIndex) const;
364     inline execplan::CalpontSystemCatalog::ColDataType getColType(uint32_t colIndex) const;
365     inline execplan::CalpontSystemCatalog::ColDataType* getColTypes();
366     inline const execplan::CalpontSystemCatalog::ColDataType* getColTypes() const;
367     inline uint32_t getCharsetNumber(uint32_t colIndex) const;
368 
369     // this returns true if the type is not CHAR or VARCHAR
370     inline bool isCharType(uint32_t colIndex) const;
371     inline bool isUnsigned(uint32_t colIndex) const;
372     inline bool isShortString(uint32_t colIndex) const;
373     inline bool isLongString(uint32_t colIndex) const;
374 
colHasCollation(uint32_t colIndex)375     bool colHasCollation(uint32_t colIndex) const
376     {
377         return execplan::typeHasCollation(getColType(colIndex));
378     }
379 
380     template<int len> inline uint64_t getUintField(uint32_t colIndex) const;
381     inline uint64_t getUintField(uint32_t colIndex) const;
382     template<int len> inline int64_t getIntField(uint32_t colIndex) const;
383     inline int64_t getIntField(uint32_t colIndex) const;
384     template<int len> inline bool equals(uint64_t val, uint32_t colIndex) const;
385     inline bool equals(long double val, uint32_t colIndex) const;
386     bool equals(const std::string& val, uint32_t colIndex) const;
387 
388     inline double getDoubleField(uint32_t colIndex) const;
389     inline float getFloatField(uint32_t colIndex) const;
getDecimalField(uint32_t colIndex)390     inline double getDecimalField(uint32_t colIndex) const
391     {
392         return 0.0;   // TODO: Do something here
393     }
394     inline long double getLongDoubleField(uint32_t colIndex) const;
395 
396     inline uint64_t getBaseRid() const;
397     inline uint64_t getRid() const;
398     inline uint16_t getRelRid() const;   // returns a rid relative to this logical block
399     inline uint64_t getExtentRelativeRid() const;   // returns a rid relative to the extent it's in
400     inline uint64_t getFileRelativeRid() const; // returns a file-relative rid
401     inline void getLocation(uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum,
402                             uint16_t* blockNum, uint16_t* rowNum);
403 
404     template<int len> void setUintField(uint64_t val, uint32_t colIndex);
405 
406     /* Note: these 2 fcns avoid 1 array lookup per call.  Using them only
407     in projection on the PM resulted in a 2.8% performance gain on
408     the queries listed in bug 2223.
409     TODO: apply them everywhere else possible, and write equivalents
410     for the other types as well as the getters.
411     */
412     template<int len> void setUintField_offset(uint64_t val, uint32_t offset);
413     inline void nextRow(uint32_t size);
414     inline void prevRow(uint32_t size, uint64_t number);
415 
416     inline void setUintField(uint64_t val, uint32_t colIndex);
417     template<int len> void setIntField(int64_t, uint32_t colIndex);
418     inline void setIntField(int64_t, uint32_t colIndex);
419 
420     inline void setDoubleField(double val, uint32_t colIndex);
421     inline void setFloatField(float val, uint32_t colIndex);
setDecimalField(double val,uint32_t colIndex)422     inline void setDecimalField(double val, uint32_t colIndex) { };  // TODO: Do something here
423     inline void setLongDoubleField(long double val, uint32_t colIndex);
424 
425     inline void setRid(uint64_t rid);
426 
427     // is string efficient for this?
428     inline std::string getStringField(uint32_t colIndex) const;
429     inline const uint8_t* getStringPointer(uint32_t colIndex) const;
430     inline uint32_t getStringLength(uint32_t colIndex) const;
431     inline utils::ConstString getConstString(uint32_t colIndex) const;
432     inline utils::ConstString getShortConstString(uint32_t colIndex) const;
433     void setStringField(const std::string& val, uint32_t colIndex);
434     inline void setStringField(const uint8_t*, uint32_t len, uint32_t colIndex);
435 
436     // support VARBINARY
437     // Add 2-byte length at the CHARSET_INFO*beginning of the field.  NULL and zero length field are
438     // treated the same, could use one of the length bit to distinguish these two cases.
439     inline std::string getVarBinaryStringField(uint32_t colIndex) const;
440     inline void setVarBinaryField(const std::string& val, uint32_t colIndex);
441     // No string construction is necessary for better performance.
442     inline uint32_t getVarBinaryLength(uint32_t colIndex) const;
443     inline const uint8_t* getVarBinaryField(uint32_t colIndex) const;
444     inline const uint8_t* getVarBinaryField(uint32_t& len, uint32_t colIndex) const;
445     inline void setVarBinaryField(const uint8_t* val, uint32_t len, uint32_t colIndex);
446 
447     inline boost::shared_ptr<mcsv1sdk::UserData> getUserData(uint32_t colIndex) const;
448     inline void setUserData(mcsv1sdk::mcsv1Context& context,
449                             boost::shared_ptr<mcsv1sdk::UserData> userData,
450                             uint32_t len, uint32_t colIndex);
451 
452     uint64_t getNullValue(uint32_t colIndex) const;
453     bool isNullValue(uint32_t colIndex) const;
454 
455     // when NULLs are pulled out via getIntField(), they come out with these values.
456     // Ex: the 1-byte int null value is 0x80.  When it gets cast to an int64_t
457     // it becomes 0xffffffffffffff80, which won't match anything returned by getNullValue().
458     int64_t getSignedNullValue(uint32_t colIndex) const;
459 
460     // copy data in srcIndex field to destIndex, all data type
461     inline void copyField(uint32_t destIndex, uint32_t srcIndex) const;
462 
463     // copy data in srcIndex field to destAddr, all data type
464     //inline void copyField(uint8_t* destAddr, uint32_t srcIndex) const;
465 
466     // an adapter for code that uses the copyField call above;
467     // that's not string-table safe, this one is
468     inline void copyField(Row& dest, uint32_t destIndex, uint32_t srcIndex) const;
469 
470     std::string toString(uint32_t rownum = 0) const;
471     std::string toCSV() const;
472 
473     /* These fcns are used only in joins.  The RID doesn't matter on the side that
474     gets hashed.  We steal that field here to "mark" a row. */
475     inline void markRow();
476     inline void zeroRid();
477     inline bool isMarked();
478     void initToNull();
479 
usesStringTable(bool b)480     inline void usesStringTable(bool b)
481     {
482         useStringTable = b;
483     }
usesStringTable()484     inline bool usesStringTable() const
485     {
486         return useStringTable;
487     }
hasLongString()488     inline bool hasLongString() const
489     {
490         return hasLongStringField;
491     }
492 
493     // these are for cases when you already know the type definitions are the same.
494     // a fcn to check the type defs seperately doesn't exist yet.  No normalization.
495     inline uint64_t hash(uint32_t lastCol) const;  // generates a hash for cols [0-lastCol]
496     inline uint64_t hash() const;  // generates a hash for all cols
497     inline void colUpdateMariaDBHasher(datatypes::MariaDBHasher &hM,
498                                        const utils::Hasher_r& h,
499                                        const uint32_t col,
500                                        uint32_t& intermediateHash) const;
501 
502     bool equals(const Row&, uint32_t lastCol) const;
503     inline bool equals(const Row&) const;
504 
setUserDataStore(UserDataStore * u)505     inline void setUserDataStore(UserDataStore* u)
506     {
507         userDataStore = u;
508     }
509 
510     const CHARSET_INFO* getCharset(uint32_t col) const;
511 
512 private:
513     uint32_t columnCount;
514     uint64_t baseRid;
515 
516     // Note, the mem behind these pointer fields is owned by RowGroup not Row
517     uint32_t* oldOffsets;
518     uint32_t* stOffsets;
519     uint32_t* offsets;
520     uint32_t* colWidths;
521     execplan::CalpontSystemCatalog::ColDataType* types;
522     uint32_t* charsetNumbers;
523     CHARSET_INFO** charsets;
524     uint8_t* data;
525     uint32_t* scale;
526     uint32_t* precision;
527 
528     StringStore* strings;
529     bool useStringTable;
530     bool hasCollation;
531     bool hasLongStringField;
532     uint32_t sTableThreshold;
533     boost::shared_array<bool> forceInline;
534     inline bool inStringTable(uint32_t col) const;
535 
536     UserDataStore* userDataStore; // For UDAF
537 
538     friend class RowGroup;
539 };
540 
getPointer()541 inline Row::Pointer Row::getPointer() const
542 {
543     return Pointer(data, strings, userDataStore);
544 }
getData()545 inline uint8_t* Row::getData() const
546 {
547     return data;
548 }
549 
setPointer(const Pointer & p)550 inline void Row::setPointer(const Pointer& p)
551 {
552     data = p.data;
553     strings = p.strings;
554     bool hasStrings = (strings != 0);
555 
556     if (useStringTable != hasStrings)
557     {
558         useStringTable = hasStrings;
559         offsets = (useStringTable ? stOffsets : oldOffsets);
560     }
561 
562     userDataStore = p.userDataStore;
563 }
564 
setData(const Pointer & p)565 inline void Row::setData(const Pointer& p)
566 {
567     setPointer(p);
568 }
569 
nextRow()570 inline void Row::nextRow()
571 {
572     data += offsets[columnCount];
573 }
574 
getColumnCount()575 inline uint32_t Row::getColumnCount() const
576 {
577     return columnCount;
578 }
579 
getColumnWidth(uint32_t col)580 inline uint32_t Row::getColumnWidth(uint32_t col) const
581 {
582     return colWidths[col];
583 }
584 
getSize()585 inline uint32_t Row::getSize() const
586 {
587     return offsets[columnCount];
588 }
589 
getRealSize()590 inline uint32_t Row::getRealSize() const
591 {
592     if (!useStringTable)
593         return getSize();
594 
595     uint32_t ret = 2;
596 
597     for (uint32_t i = 0; i < columnCount; i++)
598     {
599         if (!inStringTable(i))
600             ret += getColumnWidth(i);
601         else
602             ret += getStringLength(i);
603     }
604 
605     return ret;
606 }
607 
getScale(uint32_t col)608 inline uint32_t Row::getScale(uint32_t col) const
609 {
610     return scale[col];
611 }
612 
getPrecision(uint32_t col)613 inline uint32_t Row::getPrecision(uint32_t col) const
614 {
615     return precision[col];
616 }
617 
getColType(uint32_t colIndex)618 inline execplan::CalpontSystemCatalog::ColDataType Row::getColType(uint32_t colIndex) const
619 {
620     return types[colIndex];
621 }
622 
getColTypes()623 inline execplan::CalpontSystemCatalog::ColDataType* Row::getColTypes()
624 {
625     return types;
626 }
627 
getColTypes()628 inline const execplan::CalpontSystemCatalog::ColDataType* Row::getColTypes() const
629 {
630     return types;
631 }
632 
getCharsetNumber(uint32_t col)633 inline uint32_t Row::getCharsetNumber(uint32_t col) const
634 {
635     return charsetNumbers[col];
636 }
637 
isCharType(uint32_t colIndex)638 inline bool Row::isCharType(uint32_t colIndex) const
639 {
640     return execplan::isCharType(types[colIndex]);
641 }
642 
isUnsigned(uint32_t colIndex)643 inline bool Row::isUnsigned(uint32_t colIndex) const
644 {
645     return execplan::isUnsigned(types[colIndex]);
646 }
647 
isShortString(uint32_t colIndex)648 inline bool Row::isShortString(uint32_t colIndex) const
649 {
650     return (getColumnWidth(colIndex) <= 8 && isCharType(colIndex));
651 }
652 
isLongString(uint32_t colIndex)653 inline bool Row::isLongString(uint32_t colIndex) const
654 {
655     return (getColumnWidth(colIndex) > 8 && isCharType(colIndex));
656 }
657 
inStringTable(uint32_t col)658 inline bool Row::inStringTable(uint32_t col) const
659 {
660     return strings && getColumnWidth(col) >= sTableThreshold && !forceInline[col];
661 }
662 
663 template<int len>
equals(uint64_t val,uint32_t colIndex)664 inline bool Row::equals(uint64_t val, uint32_t colIndex) const
665 {
666     /* I think the compiler will optimize away the switch stmt */
667     switch (len)
668     {
669         case 1:
670             return data[offsets[colIndex]] == val;
671 
672         case 2:
673             return *((uint16_t*) &data[offsets[colIndex]]) == val;
674 
675         case 4:
676             return *((uint32_t*) &data[offsets[colIndex]]) == val;
677 
678         case 8:
679             return *((uint64_t*) &data[offsets[colIndex]]) == val;
680 
681         default:
682             idbassert(0);
683             throw std::logic_error("Row::equals(): bad length.");
684     }
685 }
686 
equals(long double val,uint32_t colIndex)687 inline bool Row::equals(long double val, uint32_t colIndex) const
688 {
689     return *((long double*) &data[offsets[colIndex]]) == val;
690 }
691 template<int len>
getUintField(uint32_t colIndex)692 inline uint64_t Row::getUintField(uint32_t colIndex) const
693 {
694     /* I think the compiler will optimize away the switch stmt */
695     switch (len)
696     {
697         case 1:
698             return data[offsets[colIndex]];
699 
700         case 2:
701             return *((uint16_t*) &data[offsets[colIndex]]);
702 
703         case 4:
704             return *((uint32_t*) &data[offsets[colIndex]]);
705 
706         case 8:
707             return *((uint64_t*) &data[offsets[colIndex]]);
708 
709         default:
710             idbassert(0);
711             throw std::logic_error("Row::getUintField(): bad length.");
712     }
713 }
714 
getUintField(uint32_t colIndex)715 inline uint64_t Row::getUintField(uint32_t colIndex) const
716 {
717     switch (getColumnWidth(colIndex))
718     {
719         case 1:
720             return data[offsets[colIndex]];
721 
722         case 2:
723             return *((uint16_t*) &data[offsets[colIndex]]);
724 
725         case 4:
726             return *((uint32_t*) &data[offsets[colIndex]]);
727 
728         case 8:
729             return *((uint64_t*) &data[offsets[colIndex]]);
730 
731         default:
732             idbassert(0);
733             throw std::logic_error("Row::getUintField(): bad length.");
734     }
735 }
736 
737 template<int len>
getIntField(uint32_t colIndex)738 inline int64_t Row::getIntField(uint32_t colIndex) const
739 {
740     /* I think the compiler will optimize away the switch stmt */
741     switch (len)
742     {
743         case 1:
744             return (int8_t) data[offsets[colIndex]];
745 
746         case 2:
747             return *((int16_t*) &data[offsets[colIndex]]);
748 
749         case 4:
750             return *((int32_t*) &data[offsets[colIndex]]);
751 
752         case 8:
753             return *((int64_t*) &data[offsets[colIndex]]);
754 
755         default:
756             idbassert(0);
757             throw std::logic_error("Row::getIntField(): bad length.");
758     }
759 }
760 
getIntField(uint32_t colIndex)761 inline int64_t Row::getIntField(uint32_t colIndex) const
762 {
763     /* I think the compiler will optimize away the switch stmt */
764     switch (getColumnWidth(colIndex))
765     {
766         case 1:
767             return (int8_t) data[offsets[colIndex]];
768 
769         case 2:
770             return *((int16_t*) &data[offsets[colIndex]]);
771 
772         case 4:
773             return *((int32_t*) &data[offsets[colIndex]]);
774 
775         case 8:
776             return *((int64_t*) &data[offsets[colIndex]]);
777 
778         default:
779             idbassert(0);
780             throw std::logic_error("Row::getIntField(): bad length.");
781     }
782 }
783 
getStringPointer(uint32_t colIndex)784 inline const uint8_t* Row::getStringPointer(uint32_t colIndex) const
785 {
786     if (inStringTable(colIndex))
787         return strings->getPointer(*((uint64_t*) &data[offsets[colIndex]]));
788 
789     return &data[offsets[colIndex]];
790 }
791 
getStringLength(uint32_t colIndex)792 inline uint32_t Row::getStringLength(uint32_t colIndex) const
793 {
794     if (inStringTable(colIndex))
795         return strings->getStringLength(*((uint64_t*) &data[offsets[colIndex]]));
796 
797     return strnlen((char*) &data[offsets[colIndex]], getColumnWidth(colIndex));
798 }
799 
800 
getShortConstString(uint32_t colIndex)801 inline utils::ConstString Row::getShortConstString(uint32_t colIndex) const
802 {
803     const char *src= (const char *) &data[offsets[colIndex]];
804     return utils::ConstString(src, strnlen(src, getColumnWidth(colIndex)));
805 }
806 
807 
getConstString(uint32_t colIndex)808 inline utils::ConstString Row::getConstString(uint32_t colIndex) const
809 {
810     return inStringTable(colIndex) ?
811            strings->getConstString(*((uint64_t*) &data[offsets[colIndex]])) :
812            getShortConstString(colIndex);
813 }
814 
815 
colUpdateMariaDBHasher(datatypes::MariaDBHasher & hM,const utils::Hasher_r & h,const uint32_t col,uint32_t & intermediateHash)816 inline void Row::colUpdateMariaDBHasher(datatypes::MariaDBHasher &hM,
817                                         const utils::Hasher_r& h,
818                                         const uint32_t col,
819                                         uint32_t& intermediateHash) const
820 {
821     switch (getColType(col))
822     {
823         case execplan::CalpontSystemCatalog::CHAR:
824         case execplan::CalpontSystemCatalog::VARCHAR:
825         case execplan::CalpontSystemCatalog::BLOB:
826         case execplan::CalpontSystemCatalog::TEXT:
827         {
828             CHARSET_INFO *cs = getCharset(col);
829             hM.add(cs, getConstString(col));
830             break;
831         }
832         default:
833         {
834             intermediateHash = h((const char*) &data[offsets[col]], colWidths[col], intermediateHash);
835             break;
836         }
837     }
838 }
839 
840 
setStringField(const uint8_t * strdata,uint32_t length,uint32_t colIndex)841 inline void Row::setStringField(const uint8_t* strdata, uint32_t length, uint32_t colIndex)
842 {
843     uint64_t offset;
844 
845     if (length > getColumnWidth(colIndex))
846         length = getColumnWidth(colIndex);
847 
848     if (inStringTable(colIndex))
849     {
850         offset = strings->storeString(strdata, length);
851         *((uint64_t*) &data[offsets[colIndex]]) = offset;
852 //		cout << " -- stored offset " << *((uint32_t *) &data[offsets[colIndex]])
853 //				<< " length " << *((uint32_t *) &data[offsets[colIndex] + 4])
854 //				<< endl;
855     }
856     else
857     {
858         memcpy(&data[offsets[colIndex]], strdata, length);
859         memset(&data[offsets[colIndex] + length], 0,
860                offsets[colIndex + 1] - (offsets[colIndex] + length));
861     }
862 }
863 
getStringField(uint32_t colIndex)864 inline std::string Row::getStringField(uint32_t colIndex) const
865 {
866     if (inStringTable(colIndex))
867         return strings->getString(*((uint64_t*) &data[offsets[colIndex]]));
868 
869     // Not all CHAR/VARCHAR are NUL terminated so use length
870     return std::string((char*) &data[offsets[colIndex]],
871                        strnlen((char*) &data[offsets[colIndex]], getColumnWidth(colIndex)));
872 }
873 
getVarBinaryStringField(uint32_t colIndex)874 inline std::string Row::getVarBinaryStringField(uint32_t colIndex) const
875 {
876     if (inStringTable(colIndex))
877         return getStringField(colIndex);
878 
879     return std::string((char*) &data[offsets[colIndex] + 2], *((uint16_t*) &data[offsets[colIndex]]));
880 }
881 
getVarBinaryLength(uint32_t colIndex)882 inline uint32_t Row::getVarBinaryLength(uint32_t colIndex) const
883 {
884     if (inStringTable(colIndex))
885         return strings->getStringLength(*((uint64_t*) &data[offsets[colIndex]]));;
886 
887     return *((uint16_t*) &data[offsets[colIndex]]);
888 }
889 
getVarBinaryField(uint32_t colIndex)890 inline const uint8_t* Row::getVarBinaryField(uint32_t colIndex) const
891 {
892     if (inStringTable(colIndex))
893         return strings->getPointer(*((uint64_t*) &data[offsets[colIndex]]));
894 
895     return &data[offsets[colIndex] + 2];
896 }
897 
getVarBinaryField(uint32_t & len,uint32_t colIndex)898 inline const uint8_t* Row::getVarBinaryField(uint32_t& len, uint32_t colIndex) const
899 {
900     if (inStringTable(colIndex))
901     {
902         len = strings->getStringLength(*((uint64_t*) &data[offsets[colIndex]]));
903         return getVarBinaryField(colIndex);
904     }
905     else
906     {
907         len = *((uint16_t*) &data[offsets[colIndex]]);
908         return &data[offsets[colIndex] + 2];
909     }
910 }
911 
getUserData(uint32_t colIndex)912 inline boost::shared_ptr<mcsv1sdk::UserData> Row::getUserData(uint32_t colIndex) const
913 {
914     if (!userDataStore)
915     {
916         return boost::shared_ptr<mcsv1sdk::UserData>();
917     }
918 
919     return userDataStore->getUserData(*((uint32_t*) &data[offsets[colIndex]]));
920 }
921 
getDoubleField(uint32_t colIndex)922 inline double Row::getDoubleField(uint32_t colIndex) const
923 {
924     return *((double*) &data[offsets[colIndex]]);
925 }
926 
getFloatField(uint32_t colIndex)927 inline float Row::getFloatField(uint32_t colIndex) const
928 {
929     return *((float*) &data[offsets[colIndex]]);
930 }
931 
getLongDoubleField(uint32_t colIndex)932 inline long double Row::getLongDoubleField(uint32_t colIndex) const
933 {
934     return *((long double*) &data[offsets[colIndex]]);
935 }
936 
getRid()937 inline uint64_t Row::getRid() const
938 {
939     return baseRid + *((uint16_t*) data);
940 }
941 
getRelRid()942 inline uint16_t Row::getRelRid() const
943 {
944     return *((uint16_t*) data);
945 }
946 
getBaseRid()947 inline uint64_t Row::getBaseRid() const
948 {
949     return baseRid;
950 }
951 
markRow()952 inline void Row::markRow()
953 {
954     *((uint16_t*) data) = 0xffff;
955 }
956 
zeroRid()957 inline void Row::zeroRid()
958 {
959     *((uint16_t*) data) = 0;
960 }
961 
isMarked()962 inline bool Row::isMarked()
963 {
964     return *((uint16_t*) data) == 0xffff;
965 }
966 
967 /* Begin speculative code! */
getOffset(uint32_t colIndex)968 inline uint32_t Row::getOffset(uint32_t colIndex) const
969 {
970     return offsets[colIndex];
971 }
972 
973 template<int len>
setUintField_offset(uint64_t val,uint32_t offset)974 inline void Row::setUintField_offset(uint64_t val, uint32_t offset)
975 {
976     switch (len)
977     {
978         case 1:
979             data[offset] = val;
980             break;
981 
982         case 2:
983             *((uint16_t*) &data[offset]) = val;
984             break;
985 
986         case 4:
987             *((uint32_t*) &data[offset]) = val;
988             break;
989 
990         case 8:
991             *((uint64_t*) &data[offset]) = val;
992             break;
993 
994         default:
995             idbassert(0);
996             throw std::logic_error("Row::setUintField called on a non-uint32_t field");
997     }
998 }
999 
nextRow(uint32_t size)1000 inline void Row::nextRow(uint32_t size)
1001 {
1002     data += size;
1003 }
1004 
1005 
1006 inline void Row::prevRow(uint32_t size, uint64_t number = 1)
1007 {
1008     data -= size * number;
1009 }
1010 
1011 template<int len>
setUintField(uint64_t val,uint32_t colIndex)1012 inline void Row::setUintField(uint64_t val, uint32_t colIndex)
1013 {
1014     switch (len)
1015     {
1016         case 1:
1017             data[offsets[colIndex]] = val;
1018             break;
1019 
1020         case 2:
1021             *((uint16_t*) &data[offsets[colIndex]]) = val;
1022             break;
1023 
1024         case 4:
1025             *((uint32_t*) &data[offsets[colIndex]]) = val;
1026             break;
1027 
1028         case 8:
1029             *((uint64_t*) &data[offsets[colIndex]]) = val;
1030             break;
1031 
1032         default:
1033             idbassert(0);
1034             throw std::logic_error("Row::setUintField called on a non-uint32_t field");
1035     }
1036 }
1037 
setUintField(uint64_t val,uint32_t colIndex)1038 inline void Row::setUintField(uint64_t val, uint32_t colIndex)
1039 {
1040     switch (getColumnWidth(colIndex))
1041     {
1042         case 1:
1043             data[offsets[colIndex]] = val;
1044             break;
1045 
1046         case 2:
1047             *((uint16_t*) &data[offsets[colIndex]]) = val;
1048             break;
1049 
1050         case 4:
1051             *((uint32_t*) &data[offsets[colIndex]]) = val;
1052             break;
1053 
1054         case 8:
1055             *((uint64_t*) &data[offsets[colIndex]]) = val;
1056             break;
1057 
1058         default:
1059             idbassert(0);
1060             throw std::logic_error("Row::setUintField: bad length");
1061     }
1062 }
1063 
1064 template<int len>
setIntField(int64_t val,uint32_t colIndex)1065 inline void Row::setIntField(int64_t val, uint32_t colIndex)
1066 {
1067     switch (len)
1068     {
1069         case 1:
1070             *((int8_t*) &data[offsets[colIndex]]) = val;
1071             break;
1072 
1073         case 2:
1074             *((int16_t*) &data[offsets[colIndex]]) = val;
1075             break;
1076 
1077         case 4:
1078             *((int32_t*) &data[offsets[colIndex]]) = val;
1079             break;
1080 
1081         case 8:
1082             *((int64_t*) &data[offsets[colIndex]]) = val;
1083             break;
1084 
1085         default:
1086             idbassert(0);
1087             throw std::logic_error("Row::setIntField: bad length");
1088     }
1089 }
1090 
setIntField(int64_t val,uint32_t colIndex)1091 inline void Row::setIntField(int64_t val, uint32_t colIndex)
1092 {
1093     switch (getColumnWidth(colIndex))
1094     {
1095         case 1:
1096             *((int8_t*) &data[offsets[colIndex]]) = val;
1097             break;
1098 
1099         case 2:
1100             *((int16_t*) &data[offsets[colIndex]]) = val;
1101             break;
1102 
1103         case 4:
1104             *((int32_t*) &data[offsets[colIndex]]) = val;
1105             break;
1106 
1107         case 8:
1108             *((int64_t*) &data[offsets[colIndex]]) = val;
1109             break;
1110 
1111         default:
1112             idbassert(0);
1113             throw std::logic_error("Row::setIntField: bad length");
1114     }
1115 }
1116 
setDoubleField(double val,uint32_t colIndex)1117 inline void Row::setDoubleField(double val, uint32_t colIndex)
1118 {
1119     *((double*) &data[offsets[colIndex]]) = val;
1120 }
1121 
setFloatField(float val,uint32_t colIndex)1122 inline void Row::setFloatField(float val, uint32_t colIndex)
1123 {
1124     //N.B. There is a bug in boost::any or in gcc where, if you store a nan, you will get back a nan,
1125     //  but not necessarily the same bits that you put in. This only seems to be for float (double seems
1126     //  to work).
1127     if (std::isnan(val))
1128         setUintField<4>(joblist::FLOATNULL, colIndex);
1129     else
1130         *((float*) &data[offsets[colIndex]]) = val;
1131 }
1132 
setLongDoubleField(long double val,uint32_t colIndex)1133 inline void Row::setLongDoubleField(long double val, uint32_t colIndex)
1134 {
1135     uint8_t* p = &data[offsets[colIndex]];
1136     *((long double*)p) = val;
1137     if (sizeof(long double) == 16)
1138     {
1139         // zero out the unused portion as there may be garbage there.
1140         *((uint64_t*)p+1) &= 0x000000000000FFFFULL;
1141     }
1142 }
1143 
setVarBinaryField(const std::string & val,uint32_t colIndex)1144 inline void Row::setVarBinaryField(const std::string& val, uint32_t colIndex)
1145 {
1146     if (inStringTable(colIndex))
1147         setStringField(val, colIndex);
1148     else
1149     {
1150         *((uint16_t*) &data[offsets[colIndex]]) = static_cast<uint16_t>(val.length());
1151         memcpy(&data[offsets[colIndex] + 2], val.data(), val.length());
1152     }
1153 }
1154 
setVarBinaryField(const uint8_t * val,uint32_t len,uint32_t colIndex)1155 inline void Row::setVarBinaryField(const uint8_t* val, uint32_t len, uint32_t colIndex)
1156 {
1157     if (len > getColumnWidth(colIndex))
1158         len = getColumnWidth(colIndex);
1159 
1160     if (inStringTable(colIndex))
1161     {
1162         uint64_t offset = strings->storeString(val, len);
1163         *((uint64_t*) &data[offsets[colIndex]]) = offset;
1164     }
1165     else
1166     {
1167         *((uint16_t*) &data[offsets[colIndex]]) = len;
1168         memcpy(&data[offsets[colIndex] + 2], val, len);
1169     }
1170 }
1171 
setUserData(mcsv1sdk::mcsv1Context & context,boost::shared_ptr<mcsv1sdk::UserData> userData,uint32_t len,uint32_t colIndex)1172 inline void Row::setUserData(mcsv1sdk::mcsv1Context& context,
1173                              boost::shared_ptr<mcsv1sdk::UserData> userData,
1174                              uint32_t len, uint32_t colIndex)
1175 {
1176     if (!userDataStore)
1177     {
1178         return;
1179     }
1180 
1181     uint32_t offset = userDataStore->storeUserData(context, userData, len);
1182     *((uint32_t*) &data[offsets[colIndex]]) = offset;
1183     *((uint32_t*) &data[offsets[colIndex] + 4]) = len;
1184 }
1185 
copyField(uint32_t destIndex,uint32_t srcIndex)1186 inline void Row::copyField(uint32_t destIndex, uint32_t srcIndex) const
1187 {
1188     uint32_t n = offsets[destIndex + 1] - offsets[destIndex];
1189     memmove(&data[offsets[destIndex]], &data[offsets[srcIndex]], n);
1190 }
1191 
copyField(Row & out,uint32_t destIndex,uint32_t srcIndex)1192 inline void Row::copyField(Row& out, uint32_t destIndex, uint32_t srcIndex) const
1193 {
1194     if (UNLIKELY(types[srcIndex] == execplan::CalpontSystemCatalog::VARBINARY ||
1195                  types[srcIndex] == execplan::CalpontSystemCatalog::BLOB ||
1196                  types[srcIndex] == execplan::CalpontSystemCatalog::TEXT))
1197         out.setVarBinaryField(getVarBinaryStringField(srcIndex), destIndex);
1198     else if (UNLIKELY(isLongString(srcIndex)))
1199         out.setStringField(getStringPointer(srcIndex), getStringLength(srcIndex), destIndex);
1200     //out.setStringField(getStringField(srcIndex), destIndex);
1201     else if (UNLIKELY(isShortString(srcIndex)))
1202         out.setUintField(getUintField(srcIndex), destIndex);
1203     else if (UNLIKELY(types[srcIndex] == execplan::CalpontSystemCatalog::LONGDOUBLE))
1204         out.setLongDoubleField(getLongDoubleField(srcIndex), destIndex);
1205     else
1206         out.setIntField(getIntField(srcIndex), destIndex);
1207 }
1208 
setRid(uint64_t rid)1209 inline void Row::setRid(uint64_t rid)
1210 {
1211     *((uint16_t*) data) = rid & 0xffff;
1212 }
1213 
hash()1214 inline uint64_t Row::hash() const
1215 {
1216     return hash(columnCount - 1);
1217 }
1218 
1219 
hash(uint32_t lastCol)1220 inline uint64_t Row::hash(uint32_t lastCol) const
1221 {
1222     // Use two hash classes. MariaDBHasher for text-based
1223     // collation-aware data types and Hasher_r for all other data types.
1224     // We deliver a hash that is a combination of both hashers' results.
1225     utils::Hasher_r h;
1226     datatypes::MariaDBHasher hM;
1227     uint32_t intermediateHash = 0;
1228     // Sometimes we ask this to hash 0 bytes, and it comes through looking like
1229     // lastCol = -1.  Return 0.
1230     if (lastCol >= columnCount)
1231         return 0;
1232 
1233     for (uint32_t i = 0; i <= lastCol; i++)
1234         colUpdateMariaDBHasher(hM, h, i, intermediateHash);
1235 
1236     return utils::HashFamily(h, intermediateHash, lastCol << 2, hM).finalize();
1237 }
1238 
equals(const Row & r2)1239 inline bool Row::equals(const Row& r2) const
1240 {
1241     return equals(r2, columnCount - 1);
1242 }
1243 
1244 
1245 /** @brief RowGroup is a lightweight interface for processing packed row data
1246 
1247 	A RowGroup is an interface for parsing and/or modifying row data as described at the top
1248 	of this file.  Its lifecycle can be tied to a producer or consumer's lifecycle.
1249 	Only one instance is required to process any number of blocks with a
1250 	given column configuration.  The column configuration is specified in the
1251 	constructor, and the block data to process is specified through the
1252 	setData() function.	 It will not copy or take ownership of the data it processes;
1253 	the caller should do that.
1254 
1255 	Row and RowGroup share some bits.  RowGroup owns the memory they share.
1256 */
1257 class RowGroup : public messageqcpp::Serializeable
1258 {
1259 public:
1260     /** @brief The default ctor.  It does nothing.  Need to init by assignment or deserialization */
1261     RowGroup();
1262 
1263     /** @brief The RowGroup ctor, which specifies the column config to process
1264 
1265     @param colCount The number of columns
1266     @param positions An array specifying the offsets within the packed data
1267     		of a row where each column begins.  It should have colCount + 1
1268     		entries.  The first offset is 2, because a row begins with a 2-byte
1269     		RID.  The last entry should be the offset of the last column +
1270     		its length, which is also the size of the entire row including the rid.
1271     @param coids An array of oids for each column.
1272     @param tkeys An array of unique id for each column.
1273     @param colTypes An array of COLTYPEs for each column.
1274     @param charsetNumbers an Array of the lookup numbers for the charset/collation object.
1275     @param scale An array specifying the scale of DECIMAL types (0 for non-decimal)
1276     @param precision An array specifying the precision of DECIMAL types (0 for non-decimal)
1277     */
1278 
1279     RowGroup(uint32_t colCount,
1280              const std::vector<uint32_t>& positions,
1281              const std::vector<uint32_t>& cOids,
1282              const std::vector<uint32_t>& tkeys,
1283              const std::vector<execplan::CalpontSystemCatalog::ColDataType>& colTypes,
1284              const std::vector<uint32_t>& charsetNumbers,
1285              const std::vector<uint32_t>& scale,
1286              const std::vector<uint32_t>& precision,
1287              uint32_t stringTableThreshold,
1288              bool useStringTable = true,
1289              const std::vector<bool>& forceInlineData = std::vector<bool>()
1290             );
1291 
1292     /** @brief The copiers.  It copies metadata, not the row data */
1293     RowGroup(const RowGroup&);
1294 
1295     /** @brief Assignment operator.  It copies metadata, not the row data */
1296     RowGroup& operator=(const RowGroup&);
1297 
1298     ~RowGroup();
1299 
1300     inline void initRow(Row*, bool forceInlineData = false) const;
1301     inline uint32_t getRowCount() const;
1302     inline void incRowCount();
1303     inline void setRowCount(uint32_t num);
1304     inline void getRow(uint32_t rowNum, Row*) const;
1305     inline uint32_t getRowSize() const;
1306     inline uint32_t getRowSizeWithStrings() const;
1307     inline uint64_t getBaseRid() const;
1308     void setData(RGData* rgd);
1309     inline void setData(uint8_t* d);
1310     inline uint8_t* getData() const;
1311     inline RGData* getRGData() const;
1312 
1313     uint32_t getStatus() const;
1314     void setStatus(uint16_t);
1315 
1316     uint32_t getDBRoot() const;
1317     void setDBRoot(uint32_t);
1318 
1319     uint32_t getDataSize() const;
1320     uint32_t getDataSize(uint64_t n) const;
1321     uint32_t getMaxDataSize() const;
1322     uint32_t getMaxDataSizeWithStrings() const;
1323     uint32_t getEmptySize() const;
1324 
1325     // this returns the size of the row data with the string table
1326     inline uint64_t getSizeWithStrings() const;
1327     inline uint64_t getSizeWithStrings(uint64_t n) const;
1328 
1329     // sets the row count to 0 and the baseRid to something
1330     // effectively initializing whatever chunk of memory
1331     // data points to
1332     void resetRowGroup(uint64_t baseRid);
1333 
1334     /* The Serializeable interface */
1335     void serialize(messageqcpp::ByteStream&) const;
1336     void deserialize(messageqcpp::ByteStream&);
1337 
1338     uint32_t getColumnWidth(uint32_t col) const;
1339     uint32_t getColumnCount() const;
1340     inline const std::vector<uint32_t>& getOffsets() const;
1341     inline const std::vector<uint32_t>& getOIDs() const;
1342     inline const std::vector<uint32_t>& getKeys() const;
1343     inline const std::vector<uint32_t>& getColWidths() const;
1344     inline execplan::CalpontSystemCatalog::ColDataType getColType(uint32_t colIndex) const;
1345     inline const std::vector<execplan::CalpontSystemCatalog::ColDataType>& getColTypes() const;
1346     inline std::vector<execplan::CalpontSystemCatalog::ColDataType>& getColTypes();
1347     inline const std::vector<uint32_t>& getCharsetNumbers() const;
1348     inline uint32_t getCharsetNumber(uint32_t colIndex) const;
1349     inline boost::shared_array<bool>& getForceInline();
getHeaderSize()1350     static inline uint32_t getHeaderSize()
1351     {
1352         return headerSize;
1353     }
1354 
1355     // this returns true if the type is CHAR or VARCHAR
1356     inline bool isCharType(uint32_t colIndex) const;
1357     inline bool isUnsigned(uint32_t colIndex) const;
1358     inline bool isShortString(uint32_t colIndex) const;
1359     inline bool isLongString(uint32_t colIndex) const;
1360 
colHasCollation(uint32_t colIndex)1361     bool colHasCollation(uint32_t colIndex) const
1362     {
1363         return execplan::typeHasCollation(getColType(colIndex));
1364     }
1365 
1366     inline const std::vector<uint32_t>& getScale() const;
1367     inline const std::vector<uint32_t>& getPrecision() const;
1368 
1369     inline bool usesStringTable() const;
1370     inline void setUseStringTable(bool);
1371 
1372 //	RGData *convertToInlineData(uint64_t *size = NULL) const;  // caller manages the memory returned by this
1373 //	void convertToInlineDataInPlace();
1374 //	RGData *convertToStringTable(uint64_t *size = NULL) const;
1375 //	void convertToStringTableInPlace();
1376     void serializeRGData(messageqcpp::ByteStream&) const;
1377     inline uint32_t getStringTableThreshold() const;
1378 
1379     void append(RGData&);
1380     void append(RowGroup&);
1381     void append(RGData&, uint pos);    // insert starting at position 'pos'
1382     void append(RowGroup&, uint pos);
1383 
1384     RGData duplicate();   // returns a copy of the attached RGData
1385 
1386     std::string toString(const std::vector<uint64_t>& used = {}) const;
1387 
1388     /** operator+=
1389     *
1390     * append the metadata of another RowGroup to this RowGroup
1391     */
1392     RowGroup& operator+=(const RowGroup& rhs);
1393 
1394     // returns a RowGroup with only the first cols columns.  Useful for generating a
1395     // RowGroup where the first cols make up a key of some kind, and the rest is irrelevant.
1396     RowGroup truncate(uint32_t cols);
1397 
1398     /** operator<
1399      *
1400      * Orders RG's based on baseRid
1401      */
1402     inline bool operator<(const RowGroup& rhs) const;
1403 
1404     void addToSysDataList(execplan::CalpontSystemCatalog::NJLSysDataList& sysDataList);
1405 
1406     /* Base RIDs are now a combination of partition#, segment#, extent#, and block#. */
1407     inline void setBaseRid(const uint32_t& partNum, const uint16_t& segNum,
1408                            const uint8_t& extentNum, const uint16_t& blockNum);
1409     inline void getLocation(uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum,
1410                             uint16_t* blockNum);
1411 
1412     inline void setStringStore(boost::shared_ptr<StringStore>);
1413 
1414     const CHARSET_INFO* getCharset(uint32_t col);
1415 
1416 private:
1417     uint32_t columnCount;
1418     uint8_t* data;
1419 
1420     std::vector<uint32_t> oldOffsets; //inline data offsets
1421     std::vector<uint32_t> stOffsets;  //string table offsets
1422     uint32_t* offsets;   //offsets either points to oldOffsets or stOffsets
1423     std::vector<uint32_t> colWidths;
1424     // oids: the real oid of the column, may have duplicates with alias.
1425     // This oid is necessary for front-end to decide the real column width.
1426     std::vector<uint32_t> oids;
1427     // keys: the unique id for pair(oid, alias). bug 1632.
1428     // Used to map the projected column and rowgroup index
1429     std::vector<uint32_t> keys;
1430     std::vector<execplan::CalpontSystemCatalog::ColDataType> types;
1431     // For string collation
1432     std::vector<uint32_t> charsetNumbers;
1433     std::vector<CHARSET_INFO*> charsets;
1434 
1435     // DECIMAL support.  For non-decimal fields, the values are 0.
1436     std::vector<uint32_t> scale;
1437     std::vector<uint32_t> precision;
1438 
1439     // string table impl
1440     RGData* rgData;
1441     StringStore* strings;   // note, strings and data belong to rgData
1442     bool useStringTable;
1443     bool hasCollation;
1444     bool hasLongStringField;
1445     uint32_t sTableThreshold;
1446     boost::shared_array<bool> forceInline;
1447 
1448     static const uint32_t headerSize = 18;
1449     static const uint32_t rowCountOffset = 0;
1450     static const uint32_t baseRidOffset = 4;
1451     static const uint32_t statusOffset = 12;
1452     static const uint32_t dbRootOffset = 14;
1453 };
1454 
1455 inline uint64_t convertToRid(const uint32_t& partNum, const uint16_t& segNum,
1456                              const uint8_t& extentNum, const uint16_t& blockNum);
1457 inline void getLocationFromRid(uint64_t rid, uint32_t* partNum,
1458                                uint16_t* segNum, uint8_t* extentNum, uint16_t* blockNum);
1459 
1460 // returns the first rid of the logical block specified by baseRid
1461 inline uint64_t getExtentRelativeRid(uint64_t baseRid);
1462 
1463 // returns the first rid of the logical block specified by baseRid
1464 inline uint64_t getFileRelativeRid(uint64_t baseRid);
1465 
1466 /** operator+
1467 *
1468 * add the metadata of 2 RowGroups together and return a new RowGroup
1469 */
1470 RowGroup operator+(const RowGroup& lhs, const RowGroup& rhs);
1471 
1472 boost::shared_array<int> makeMapping(const RowGroup& r1, const RowGroup& r2);
1473 void applyMapping(const boost::shared_array<int>& mapping, const Row& in, Row* out);
1474 void applyMapping(const std::vector<int>& mapping, const Row& in, Row* out);
1475 void applyMapping(const int* mapping, const Row& in, Row* out);
1476 
1477 /* PL 8/10/09: commented the asserts for now b/c for the fcns that are called
1478 every row, they're a measurable performance penalty */
getRowCount()1479 inline uint32_t RowGroup::getRowCount() const
1480 {
1481 // 	idbassert(data);
1482 // 	if (!data) throw std::logic_error("RowGroup::getRowCount(): data is NULL!");
1483     return *((uint32_t*) &data[rowCountOffset]);
1484 }
1485 
incRowCount()1486 inline void RowGroup::incRowCount()
1487 {
1488 // 	idbassert(data);
1489     ++(*((uint32_t*) &data[rowCountOffset]));
1490 }
1491 
setRowCount(uint32_t num)1492 inline void RowGroup::setRowCount(uint32_t num)
1493 {
1494 // 	idbassert(data);
1495     *((uint32_t*) &data[rowCountOffset]) = num;
1496 }
1497 
getRow(uint32_t rowNum,Row * r)1498 inline void RowGroup::getRow(uint32_t rowNum, Row* r) const
1499 {
1500 // 	idbassert(data);
1501     if (useStringTable != r->usesStringTable())
1502         initRow(r);
1503 
1504     r->baseRid = getBaseRid();
1505     r->data = &(data[headerSize + (rowNum * offsets[columnCount])]);
1506     r->strings = strings;
1507     r->userDataStore = rgData->userDataStore.get();
1508 }
1509 
setData(uint8_t * d)1510 inline void RowGroup::setData(uint8_t* d)
1511 {
1512     data = d;
1513     strings = NULL;
1514     rgData = NULL;
1515     setUseStringTable(false);
1516 }
1517 
setData(RGData * rgd)1518 inline void RowGroup::setData(RGData* rgd)
1519 {
1520     data = rgd->rowData.get();
1521     strings = rgd->strings.get();
1522     rgData = rgd;
1523 }
1524 
getData()1525 inline uint8_t* RowGroup::getData() const
1526 {
1527     //assert(!useStringTable);
1528     return data;
1529 }
1530 
getRGData()1531 inline RGData* RowGroup::getRGData() const
1532 {
1533     return rgData;
1534 }
1535 
setUseStringTable(bool b)1536 inline void RowGroup::setUseStringTable(bool b)
1537 {
1538     useStringTable = (b && hasLongStringField);
1539     //offsets = (useStringTable ? &stOffsets[0] : &oldOffsets[0]);
1540     offsets = 0;
1541 
1542     if (useStringTable && !stOffsets.empty())
1543         offsets = &stOffsets[0];
1544     else if (!useStringTable && !oldOffsets.empty())
1545         offsets = &oldOffsets[0];
1546 
1547     if (!useStringTable)
1548         strings = NULL;
1549 }
1550 
getBaseRid()1551 inline uint64_t RowGroup::getBaseRid() const
1552 {
1553     return *((uint64_t*) &data[baseRidOffset]);
1554 }
1555 
1556 inline bool RowGroup::operator<(const RowGroup& rhs) const
1557 {
1558     return (getBaseRid() < rhs.getBaseRid());
1559 }
1560 
initRow(Row * r,bool forceInlineData)1561 void RowGroup::initRow(Row* r, bool forceInlineData) const
1562 {
1563     r->columnCount = columnCount;
1564 
1565     if (LIKELY(!types.empty()))
1566     {
1567         r->colWidths = (uint32_t*) &colWidths[0];
1568         r->types = (execplan::CalpontSystemCatalog::ColDataType*) & (types[0]);
1569         r->charsetNumbers = (uint32_t*) & (charsetNumbers[0]);
1570         r->charsets = (CHARSET_INFO**) & (charsets[0]);
1571         r->scale = (uint32_t*) & (scale[0]);
1572         r->precision = (uint32_t*) & (precision[0]);
1573     }
1574 
1575     if (forceInlineData)
1576     {
1577         r->useStringTable = false;
1578         r->oldOffsets = (uint32_t*) & (oldOffsets[0]);
1579         r->stOffsets = (uint32_t*) & (stOffsets[0]);
1580         r->offsets = (uint32_t*) & (oldOffsets[0]);
1581     }
1582     else
1583     {
1584         r->useStringTable = useStringTable;
1585         r->oldOffsets = (uint32_t*) & (oldOffsets[0]);
1586         r->stOffsets = (uint32_t*) & (stOffsets[0]);
1587         r->offsets = offsets;
1588     }
1589 
1590     r->hasLongStringField = hasLongStringField;
1591     r->sTableThreshold = sTableThreshold;
1592     r->forceInline = forceInline;
1593     r->hasCollation = hasCollation;
1594 }
1595 
getRowSize()1596 inline uint32_t RowGroup::getRowSize() const
1597 {
1598     return offsets[columnCount];
1599 }
1600 
getRowSizeWithStrings()1601 inline uint32_t RowGroup::getRowSizeWithStrings() const
1602 {
1603     return oldOffsets[columnCount];
1604 }
1605 
getSizeWithStrings(uint64_t n)1606 inline uint64_t RowGroup::getSizeWithStrings(uint64_t n) const
1607 {
1608     if (strings == NULL)
1609         return getDataSize(n);
1610     else
1611         return getDataSize(n) + strings->getSize();
1612 }
1613 
getSizeWithStrings()1614 inline uint64_t RowGroup::getSizeWithStrings() const
1615 {
1616     return getSizeWithStrings(getRowCount());
1617 }
1618 
isCharType(uint32_t colIndex)1619 inline bool RowGroup::isCharType(uint32_t colIndex) const
1620 {
1621     return execplan::isCharType(types[colIndex]);
1622 }
1623 
isUnsigned(uint32_t colIndex)1624 inline bool RowGroup::isUnsigned(uint32_t colIndex) const
1625 {
1626     return execplan::isUnsigned(types[colIndex]);
1627 }
1628 
isShortString(uint32_t colIndex)1629 inline bool RowGroup::isShortString(uint32_t colIndex) const
1630 {
1631     return ((getColumnWidth(colIndex) <= 7 && types[colIndex] == execplan::CalpontSystemCatalog::VARCHAR) ||
1632             (getColumnWidth(colIndex) <= 8 && types[colIndex] == execplan::CalpontSystemCatalog::CHAR));
1633 }
1634 
isLongString(uint32_t colIndex)1635 inline bool RowGroup::isLongString(uint32_t colIndex) const
1636 {
1637     return ((getColumnWidth(colIndex) > 7 && types[colIndex] == execplan::CalpontSystemCatalog::VARCHAR) ||
1638             (getColumnWidth(colIndex) > 8 && types[colIndex] == execplan::CalpontSystemCatalog::CHAR) ||
1639             types[colIndex] == execplan::CalpontSystemCatalog::VARBINARY ||
1640             types[colIndex] == execplan::CalpontSystemCatalog::BLOB ||
1641             types[colIndex] == execplan::CalpontSystemCatalog::TEXT);
1642 }
1643 
usesStringTable()1644 inline bool RowGroup::usesStringTable() const
1645 {
1646     return useStringTable;
1647 }
1648 
getOffsets()1649 inline const std::vector<uint32_t>& RowGroup::getOffsets() const
1650 {
1651     return oldOffsets;
1652 }
1653 
getOIDs()1654 inline const std::vector<uint32_t>& RowGroup::getOIDs() const
1655 {
1656     return oids;
1657 }
1658 
getKeys()1659 inline const std::vector<uint32_t>& RowGroup::getKeys() const
1660 {
1661     return keys;
1662 }
1663 
getColType(uint32_t colIndex)1664 inline execplan::CalpontSystemCatalog::ColDataType RowGroup::getColType(uint32_t colIndex) const
1665 {
1666     return types[colIndex];
1667 }
1668 
getColTypes()1669 inline const std::vector<execplan::CalpontSystemCatalog::ColDataType>& RowGroup::getColTypes() const
1670 {
1671     return types;
1672 }
1673 
getColTypes()1674 inline std::vector<execplan::CalpontSystemCatalog::ColDataType>& RowGroup::getColTypes()
1675 {
1676     return types;
1677 }
1678 
getCharsetNumbers()1679 inline const std::vector<uint32_t>& RowGroup::getCharsetNumbers() const
1680 {
1681     return charsetNumbers;
1682 }
1683 
getCharsetNumber(uint32_t colIndex)1684 inline uint32_t RowGroup::getCharsetNumber(uint32_t colIndex) const
1685 {
1686     return charsetNumbers[colIndex];
1687 }
1688 
getScale()1689 inline const std::vector<uint32_t>& RowGroup::getScale() const
1690 {
1691     return scale;
1692 }
1693 
getPrecision()1694 inline const std::vector<uint32_t>& RowGroup::getPrecision() const
1695 {
1696     return precision;
1697 }
1698 
getColWidths()1699 inline const std::vector<uint32_t>& RowGroup::getColWidths() const
1700 {
1701     return colWidths;
1702 }
1703 
getForceInline()1704 inline boost::shared_array<bool>& RowGroup::getForceInline()
1705 {
1706     return forceInline;
1707 }
1708 
convertToRid(const uint32_t & partitionNum,const uint16_t & segmentNum,const uint8_t & exNum,const uint16_t & blNum)1709 inline uint64_t convertToRid(const uint32_t& partitionNum,
1710                              const uint16_t& segmentNum, const uint8_t& exNum, const uint16_t& blNum)
1711 {
1712     uint64_t partNum = partitionNum, segNum = segmentNum, extentNum = exNum,
1713              blockNum = blNum;
1714 
1715     // extentNum gets trunc'd to 6 bits, blockNums to 10 bits
1716     extentNum &= 0x3f;
1717     blockNum &= 0x3ff;
1718 
1719     return (partNum << 32) | (segNum << 16) | (extentNum << 10) | blockNum;
1720 }
1721 
setBaseRid(const uint32_t & partNum,const uint16_t & segNum,const uint8_t & extentNum,const uint16_t & blockNum)1722 inline void RowGroup::setBaseRid(const uint32_t& partNum, const uint16_t& segNum,
1723                                  const uint8_t& extentNum, const uint16_t& blockNum)
1724 {
1725     *((uint64_t*) &data[baseRidOffset]) = convertToRid(partNum, segNum,
1726                                           extentNum, blockNum);
1727 }
1728 
getStringTableThreshold()1729 inline uint32_t RowGroup::getStringTableThreshold() const
1730 {
1731     return sTableThreshold;
1732 }
1733 
setStringStore(boost::shared_ptr<StringStore> ss)1734 inline void RowGroup::setStringStore(boost::shared_ptr<StringStore> ss)
1735 {
1736     if (useStringTable)
1737     {
1738         rgData->setStringStore(ss);
1739         strings = rgData->strings.get();
1740     }
1741 }
1742 
getLocationFromRid(uint64_t rid,uint32_t * partNum,uint16_t * segNum,uint8_t * extentNum,uint16_t * blockNum)1743 inline void getLocationFromRid(uint64_t rid, uint32_t* partNum,
1744                                uint16_t* segNum, uint8_t* extentNum, uint16_t* blockNum)
1745 {
1746     if (partNum) *partNum = rid >> 32;
1747 
1748     if (segNum) *segNum = rid >> 16;
1749 
1750     if (extentNum) *extentNum = (rid >> 10) & 0x3f;
1751 
1752     if (blockNum) *blockNum = rid & 0x3ff;
1753 }
1754 
getLocation(uint32_t * partNum,uint16_t * segNum,uint8_t * extentNum,uint16_t * blockNum)1755 inline void RowGroup::getLocation(uint32_t* partNum, uint16_t* segNum,
1756                                   uint8_t* extentNum, uint16_t* blockNum)
1757 {
1758     getLocationFromRid(getBaseRid(), partNum, segNum, extentNum, blockNum);
1759 }
1760 
1761 // returns the first RID of the logical block identified by baseRid
getExtentRelativeRid(uint64_t baseRid)1762 inline uint64_t getExtentRelativeRid(uint64_t baseRid)
1763 {
1764     uint64_t blockNum = baseRid & 0x3ff;
1765     return (blockNum << 13);
1766 }
1767 
getExtentRelativeRid()1768 inline uint64_t Row::getExtentRelativeRid() const
1769 {
1770     return rowgroup::getExtentRelativeRid(baseRid) | (getRelRid() & 0x1fff);
1771 }
1772 
1773 // returns the first RID of the logical block identified by baseRid
getFileRelativeRid(uint64_t baseRid)1774 inline uint64_t getFileRelativeRid(uint64_t baseRid)
1775 {
1776     uint64_t extentNum = (baseRid >> 10) & 0x3f;
1777     uint64_t blockNum = baseRid & 0x3ff;
1778     return (extentNum << 23) | (blockNum << 13);
1779 }
1780 
getFileRelativeRid()1781 inline uint64_t Row::getFileRelativeRid() const
1782 {
1783     return rowgroup::getFileRelativeRid(baseRid) | (getRelRid() & 0x1fff);
1784 }
1785 
getLocation(uint32_t * partNum,uint16_t * segNum,uint8_t * extentNum,uint16_t * blockNum,uint16_t * rowNum)1786 inline void Row::getLocation(uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum,
1787                              uint16_t* blockNum, uint16_t* rowNum)
1788 {
1789     getLocationFromRid(baseRid, partNum, segNum, extentNum, blockNum);
1790 
1791     if (rowNum) *rowNum = getRelRid();
1792 }
1793 
copyRow(const Row & in,Row * out,uint32_t colCount)1794 inline void copyRow(const Row& in, Row* out, uint32_t colCount)
1795 {
1796     if (&in == out)
1797         return;
1798 
1799     out->setRid(in.getRelRid());
1800 
1801     if (!in.usesStringTable() && !out->usesStringTable())
1802     {
1803         memcpy(out->getData(), in.getData(), std::min(in.getOffset(colCount), out->getOffset(colCount)));
1804         return;
1805     }
1806 
1807     for (uint32_t i = 0; i < colCount; i++)
1808     {
1809         if (UNLIKELY(in.getColTypes()[i] == execplan::CalpontSystemCatalog::VARBINARY ||
1810                      in.getColTypes()[i] == execplan::CalpontSystemCatalog::BLOB ||
1811                      in.getColTypes()[i] == execplan::CalpontSystemCatalog::TEXT ||
1812                      in.getColTypes()[i] == execplan::CalpontSystemCatalog::CLOB))
1813             out->setVarBinaryField(in.getVarBinaryStringField(i), i);
1814         else if (UNLIKELY(in.isLongString(i)))
1815             //out->setStringField(in.getStringField(i), i);
1816             out->setStringField(in.getStringPointer(i), in.getStringLength(i), i);
1817         else if (UNLIKELY(in.isShortString(i)))
1818             out->setUintField(in.getUintField(i), i);
1819         else if (UNLIKELY(in.getColTypes()[i] == execplan::CalpontSystemCatalog::LONGDOUBLE))
1820             out->setLongDoubleField(in.getLongDoubleField(i), i);
1821         else
1822             out->setIntField(in.getIntField(i), i);
1823     }
1824 }
1825 
copyRow(const Row & in,Row * out)1826 inline void copyRow(const Row& in, Row* out)
1827 {
1828     copyRow(in, out, std::min(in.getColumnCount(), out->getColumnCount()));
1829 }
1830 
getString(uint64_t off)1831 inline std::string StringStore::getString(uint64_t off) const
1832 {
1833     uint32_t length;
1834 
1835     if (off == std::numeric_limits<uint64_t>::max())
1836         return joblist::CPNULLSTRMARK;
1837 
1838     MemChunk* mc;
1839 
1840     if (off & 0x8000000000000000)
1841     {
1842         //off = off - 0x8000000000000000;
1843         off &= ~0x8000000000000000;
1844 
1845         if (longStrings.size() <= off)
1846             return joblist::CPNULLSTRMARK;
1847 
1848         mc = (MemChunk*) longStrings[off].get();
1849         memcpy(&length, mc->data, 4);
1850         return std::string((char*) mc->data + 4, length);
1851     }
1852 
1853     uint64_t chunk = off / CHUNK_SIZE;
1854     uint64_t offset = off % CHUNK_SIZE;
1855 
1856     // this has to handle uninitialized data as well.  If it's uninitialized it doesn't matter
1857     // what gets returned, it just can't go out of bounds.
1858     if (mem.size() <= chunk)
1859         return joblist::CPNULLSTRMARK;
1860 
1861     mc = (MemChunk*) mem[chunk].get();
1862 
1863     memcpy(&length, &mc->data[offset], 4);
1864 
1865     if ((offset + length) > mc->currentSize)
1866         return joblist::CPNULLSTRMARK;
1867 
1868     return std::string((char*) & (mc->data[offset]) + 4, length);
1869 }
1870 
getPointer(uint64_t off)1871 inline const uint8_t* StringStore::getPointer(uint64_t off) const
1872 {
1873     if (off == std::numeric_limits<uint64_t>::max())
1874         return (const uint8_t*) joblist::CPNULLSTRMARK.c_str();
1875 
1876     uint64_t chunk = off / CHUNK_SIZE;
1877     uint64_t offset = off % CHUNK_SIZE;
1878     MemChunk* mc;
1879 
1880     if (off & 0x8000000000000000)
1881     {
1882         //off = off - 0x8000000000000000;
1883         off &= ~0x8000000000000000;
1884 
1885         if (longStrings.size() <= off)
1886             return (const uint8_t*) joblist::CPNULLSTRMARK.c_str();
1887 
1888         mc = (MemChunk*) longStrings[off].get();
1889         return mc->data + 4;
1890     }
1891 
1892     // this has to handle uninitialized data as well.  If it's uninitialized it doesn't matter
1893     // what gets returned, it just can't go out of bounds.
1894     if (UNLIKELY(mem.size() <= chunk))
1895         return (const uint8_t*) joblist::CPNULLSTRMARK.c_str();
1896 
1897     mc = (MemChunk*) mem[chunk].get();
1898 
1899     if (offset > mc->currentSize)
1900         return (const uint8_t*) joblist::CPNULLSTRMARK.c_str();
1901 
1902     return &(mc->data[offset]) + 4;
1903 }
1904 
isNullValue(uint64_t off)1905 inline bool StringStore::isNullValue(uint64_t off) const
1906 {
1907     uint32_t length;
1908 
1909     if (off == std::numeric_limits<uint64_t>::max())
1910         return true;
1911 
1912     // Long strings won't be NULL
1913     if (off & 0x8000000000000000)
1914         return false;
1915 
1916     uint32_t chunk = off / CHUNK_SIZE;
1917     uint32_t offset = off % CHUNK_SIZE;
1918     MemChunk* mc;
1919 
1920     if (mem.size() <= chunk)
1921         return true;
1922 
1923     mc = (MemChunk*) mem[chunk].get();
1924     memcpy(&length, &mc->data[offset], 4);
1925 
1926     if (length == 0)
1927         return true;
1928 
1929     if (length < 8)
1930         return false;
1931 
1932     if ((offset + length) > mc->currentSize)
1933         return true;
1934 
1935     if (mc->data[offset + 4] == 0)  // "" = NULL string for some reason...
1936         return true;
1937     return (memcmp(&mc->data[offset+4], joblist::CPNULLSTRMARK.c_str(), 8) == 0);
1938 }
1939 
getStringLength(uint64_t off)1940 inline uint32_t StringStore::getStringLength(uint64_t off) const
1941 {
1942     uint32_t length;
1943     MemChunk* mc;
1944 
1945     if (off == std::numeric_limits<uint64_t>::max())
1946         return 0;
1947 
1948     if (off & 0x8000000000000000)
1949     {
1950         //off = off - 0x8000000000000000;
1951         off &= ~0x8000000000000000;
1952 
1953         if (longStrings.size() <= off)
1954             return 0;
1955 
1956         mc = (MemChunk*) longStrings[off].get();
1957         memcpy(&length, mc->data, 4);
1958     }
1959     else
1960     {
1961         uint64_t chunk = off / CHUNK_SIZE;
1962         uint64_t offset = off % CHUNK_SIZE;
1963 
1964         if (mem.size() <= chunk)
1965             return 0;
1966 
1967         mc = (MemChunk*) mem[chunk].get();
1968         memcpy(&length, &mc->data[offset], 4);
1969     }
1970 
1971     return length;
1972 }
1973 
isEmpty()1974 inline bool StringStore::isEmpty() const
1975 {
1976     return empty;
1977 }
1978 
getSize()1979 inline uint64_t StringStore::getSize() const
1980 {
1981     uint32_t i;
1982     uint64_t ret = 0;
1983     MemChunk* mc;
1984 
1985     ret += sizeof(MemChunk) * mem.size();
1986     for (i = 0; i < mem.size(); i++)
1987     {
1988         mc = (MemChunk*) mem[i].get();
1989         ret += mc->capacity;
1990     }
1991 
1992     ret += sizeof(MemChunk) * longStrings.size();
1993     for (i = 0; i < longStrings.size(); i++)
1994     {
1995         mc = (MemChunk*) longStrings[i].get();
1996         ret += mc->capacity;
1997     }
1998 
1999     return ret;
2000 }
2001 
2002 inline RGData& RGData::operator=(const RGData& r)
2003 {
2004     rowData = r.rowData;
2005     strings = r.strings;
2006     userDataStore = r.userDataStore;
2007     return *this;
2008 }
2009 
getRow(uint32_t num,Row * row)2010 inline void RGData::getRow(uint32_t num, Row* row)
2011 {
2012     uint32_t size = row->getSize();
2013     row->setData(Row::Pointer(&rowData[RowGroup::getHeaderSize() + (num * size)], strings.get(), userDataStore.get()));
2014 }
2015 
2016 }
2017 
2018 #endif
2019 // vim:ts=4 sw=4:
2020