1 /*
2 Copyright (C) 2014 InfiniDB, Inc.
3 Copyright (c) 2019 MariaDB Corporation
4
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License
7 as published by the Free Software Foundation; version 2 of
8 the License.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
18 MA 02110-1301, USA.
19 */
20
21 //
22 // C++ Interface: rowgroup
23 //
24 // Description:
25 //
26 // Author: Patrick LeBlanc <pleblanc@calpont.com>, (C) 2008
27
28 #ifndef ROWGROUP_H_
29 #define ROWGROUP_H_
30
31 #include <vector>
32 #include <string>
33 #include <stdexcept>
34 //#define NDEBUG
35 #include <cassert>
36 #include <boost/shared_ptr.hpp>
37 #include <boost/shared_array.hpp>
38 #include <boost/thread/mutex.hpp>
39 #include <cmath>
40 #include <cfloat>
41 #ifdef __linux__
42 #include <execinfo.h>
43 #endif
44
45 #if defined(_MSC_VER) && !defined(isnan)
46 #define isnan _isnan
47 #endif
48
49 #include "hasher.h"
50
51 #include "joblisttypes.h"
52 #include "bytestream.h"
53 #include "calpontsystemcatalog.h"
54 #include "exceptclasses.h"
55 #include "mcsv1_udaf.h"
56
57 #include "branchpred.h"
58
59 #include "../winport/winport.h"
60
61 #include "collation.h"
62 #include "common/hashfamily.h"
63
64
65 // Workaround for my_global.h #define of isnan(X) causing a std::std namespace
66
67 namespace rowgroup
68 {
69
70 const int16_t rgCommonSize = 8192;
71
72 /*
73 The RowGroup family of classes encapsulate the data moved through the
74 system.
75
76 - RowGroup specifies the format of the data primarily (+ some other metadata),
77 - RGData (aka RowGroup Data) encapsulates the data,
78 - Row is used to extract fields from the data and iterate.
79
80 JobListFactory instantiates the RowGroups to be used by each stage of processing.
81 RGDatas are passed between stages, and their RowGroup instances are used
82 to interpret them.
83
84 Historically, row data was just a chunk of contiguous memory, a uint8_t *.
85 Every field had a fixed width, which allowed for quick offset
86 calculation when assigning or retrieving individual fields. That worked
87 well for a few years, but at some point it became common to declare
88 all strings as max-length, and to manipulate them in queries.
89
90 Having fixed-width fields, even for strings, required an unreasonable
91 amount of memory. RGData & StringStore were introduced to handle strings
92 more efficiently, at least with respect to memory. The row data would
93 still be a uint8_t *, and columns would be fixed-width, but string fields
94 above a certain width would contain a 'Pointer' that referenced a string in
95 StringStore. Strings are stored efficiently in StringStore, so there is
96 no longer wasted space.
97
98 StringStore comes with a different inefficiency however. When a value
99 is overwritten, the original string cannot be freed independently of the
100 others, so it continues to use space. If values are only set once, as is
101 the typical case, then StringStore is efficient. When it is necessary
102 to overwrite string fields, it is possible to configure these classes
103 to use the original data format so that old string fields do not accumulate
104 in memory. Of course, be careful, because blobs and text fields in CS are
105 declared as 2GB strings!
106
107 A single RGData contains up to one 'logical block' worth of data,
108 which is 8192 rows. One RGData is usually treated as one unit of work by
109 PrimProc and the JobSteps, but the rows an RGData contains and how many are
110 treated as a work unit depend on the operation being done.
111
112 For example, PrimProc works in units of 8192 contiguous rows
113 that come from disk. If half of the rows were filtered out, then the
114 RGData it passes to the next stage would only contain 4096 rows.
115
116 Others build results incrementally before passing them along, such as
117 group-by. If one group contains 11111 values, then group-by will
118 return 2 RGDatas for that group, one with 8192 rows, and one with 2919.
119
120 Note: There is no synchronization in any of these classes for obvious
121 performance reasons. Likewise, although it's technically safe for many
122 readers to access an RGData simultaneously, that would not be an
123 efficient thing to do. Try to stick to designs where a single RGData
124 is used by a single thread at a time.
125 */
126
127 // VS'08 carps that struct MemChunk is not default copyable because of the zero-length array.
128 // This may be so, and we'll get link errors if someone trys, but so far no one has.
129 #ifdef _MSC_VER
130 #pragma warning (push)
131 #pragma warning (disable : 4200)
132 #endif
133
134 class StringStore
135 {
136 public:
137 StringStore();
138 virtual ~StringStore();
139
140 inline std::string getString(uint64_t offset) const;
141 uint64_t storeString(const uint8_t* data, uint32_t length); //returns the offset
142 inline const uint8_t* getPointer(uint64_t offset) const;
143 inline uint32_t getStringLength(uint64_t offset) const;
getConstString(uint64_t offset)144 inline utils::ConstString getConstString(uint64_t offset) const
145 {
146 return utils::ConstString((const char *) getPointer(offset),
147 getStringLength(offset));
148 }
149 inline bool isEmpty() const;
150 inline uint64_t getSize() const;
151 inline bool isNullValue(uint64_t offset) const;
152 bool equals(const std::string& str, uint64_t offset, CHARSET_INFO* cs) const;
153
154 void clear();
155
156 void serialize(messageqcpp::ByteStream&) const;
157 void deserialize(messageqcpp::ByteStream&);
158
159 //@bug6065, make StringStore::storeString() thread safe
useStoreStringMutex(bool b)160 void useStoreStringMutex(bool b)
161 {
162 fUseStoreStringMutex = b;
163 }
useStoreStringMutex()164 bool useStoreStringMutex() const
165 {
166 return fUseStoreStringMutex;
167 }
168
169 private:
170 std::string empty_str;
171
172 StringStore(const StringStore&);
173 StringStore& operator=(const StringStore&);
174 static const uint32_t CHUNK_SIZE = 64 * 1024; // allocators like powers of 2
175
176 // This is an overlay b/c the underlying data needs to be any size,
177 // and alloc'd in one chunk. data can't be a separate dynamic chunk.
178 struct MemChunk
179 {
180 uint32_t currentSize;
181 uint32_t capacity;
182 uint8_t data[];
183 };
184
185 std::vector<boost::shared_array<uint8_t> > mem;
186
187 // To store strings > 64KB (BLOB/TEXT)
188 std::vector<boost::shared_array<uint8_t> > longStrings;
189 bool empty;
190 bool fUseStoreStringMutex; //@bug6065, make StringStore::storeString() thread safe
191 boost::mutex fMutex;
192 };
193
194 // Where we store user data for UDA(n)F
195 class UserDataStore
196 {
197 // length represents the fixed portion length of userData.
198 // There may be variable length data in containers or other
199 // user created structures.
200 struct StoreData
201 {
202 int32_t length;
203 std::string functionName;
204 boost::shared_ptr<mcsv1sdk::UserData> userData;
StoreDataStoreData205 StoreData() : length(0) { }
StoreDataStoreData206 StoreData(const StoreData& rhs)
207 {
208 length = rhs.length;
209 functionName = rhs.functionName;
210 userData = rhs.userData;
211 }
212 };
213
214 public:
215 UserDataStore();
216 virtual ~UserDataStore();
217
218 void serialize(messageqcpp::ByteStream&) const;
219 void deserialize(messageqcpp::ByteStream&);
220
221 //Set to make UserDataStore thread safe
useUserDataMutex(bool b)222 void useUserDataMutex(bool b)
223 {
224 fUseUserDataMutex = b;
225 }
useUserDataMutex()226 bool useUserDataMutex() const
227 {
228 return fUseUserDataMutex;
229 }
230
231 // Returns the offset
232 uint32_t storeUserData(mcsv1sdk::mcsv1Context& context,
233 boost::shared_ptr<mcsv1sdk::UserData> data,
234 uint32_t length);
235
236 boost::shared_ptr<mcsv1sdk::UserData> getUserData(uint32_t offset) const;
237
238 private:
239 UserDataStore(const UserDataStore&);
240 UserDataStore& operator=(const UserDataStore&);
241
242 std::vector<StoreData> vStoreData;
243
244 bool fUseUserDataMutex;
245 boost::mutex fMutex;
246 };
247
248 #ifdef _MSC_VER
249 #pragma warning (pop)
250 #endif
251
252 class RowGroup;
253 class Row;
254
255 /* TODO: OO the rowgroup data to the extent there's no measurable performance hit. */
256 class RGData
257 {
258 public:
259 RGData(); // useless unless followed by an = or a deserialize operation
260 RGData(const RowGroup& rg, uint32_t rowCount); // allocates memory for rowData
261 explicit RGData(const RowGroup& rg);
262 RGData(const RGData&);
263 virtual ~RGData();
264
265 inline RGData& operator=(const RGData&);
266
267 // amount should be the # returned by RowGroup::getDataSize()
268 void serialize(messageqcpp::ByteStream&, uint32_t amount) const;
269
270 // the 'hasLengthField' is there b/c PM aggregation (and possibly others) currently sends
271 // inline data with a length field. Once that's converted to string table format, that
272 // option can go away.
273 void deserialize(messageqcpp::ByteStream&, uint32_t amount = 0); // returns the # of bytes read
274
275 inline uint64_t getStringTableMemUsage();
276 void clear();
277 void reinit(const RowGroup& rg);
278 void reinit(const RowGroup& rg, uint32_t rowCount);
setStringStore(boost::shared_ptr<StringStore> & ss)279 inline void setStringStore(boost::shared_ptr<StringStore>& ss)
280 {
281 strings = ss;
282 }
283
284 // this will use the pre-configured Row to figure out where row # num is, then set the Row
285 // to point to it. It's a shortcut around using a RowGroup to do the same thing for cases
286 // where it's inconvenient to instantiate one.
287 inline void getRow(uint32_t num, Row* row);
288
289 //@bug6065, make StringStore::storeString() thread safe
useStoreStringMutex(bool b)290 void useStoreStringMutex(bool b)
291 {
292 if (strings) strings->useStoreStringMutex(b);
293 }
useStoreStringMutex()294 bool useStoreStringMutex() const
295 {
296 return (strings ? (strings->useStoreStringMutex()) : false);
297 }
298
299 UserDataStore* getUserDataStore();
300 // make UserDataStore::storeData() thread safe
useUserDataMutex(bool b)301 void useUserDataMutex(bool b)
302 {
303 if (userDataStore) userDataStore->useUserDataMutex(b);
304 }
useUserDataMutex()305 bool useUserDataMutex() const
306 {
307 return (userDataStore ? (userDataStore->useUserDataMutex()) : false);
308 }
309
310 boost::shared_array<uint8_t> rowData;
311 boost::shared_ptr<StringStore> strings;
312 boost::shared_ptr<UserDataStore> userDataStore;
313 private:
314 //boost::shared_array<uint8_t> rowData;
315 //boost::shared_ptr<StringStore> strings;
316
317 // Need sig to support backward compat. RGData can deserialize both forms.
318 static const uint32_t RGDATA_SIG = 0xffffffff; //won't happen for 'old' Rowgroup data
319
320 friend class RowGroup;
321 };
322
323
324 class Row
325 {
326 public:
327 struct Pointer
328 {
PointerPointer329 inline Pointer() : data(NULL), strings(NULL), userDataStore(NULL) { }
330
331 // Pointer(uint8_t*) implicitly makes old code compatible with the string table impl;
PointerPointer332 inline Pointer(uint8_t* d) : data(d), strings(NULL), userDataStore(NULL) { }
PointerPointer333 inline Pointer(uint8_t* d, StringStore* s) : data(d), strings(s), userDataStore(NULL) { }
PointerPointer334 inline Pointer(uint8_t* d, StringStore* s, UserDataStore* u) :
335 data(d), strings(s), userDataStore(u) { }
336 uint8_t* data;
337 StringStore* strings;
338 UserDataStore* userDataStore;
339 };
340
341 Row();
342 Row(const Row&);
343 ~Row();
344
345 Row& operator=(const Row&);
346 bool operator==(const Row&) const;
347
348 //void setData(uint8_t *rowData, StringStore *ss);
349 inline void setData(const Pointer&); // convenience fcn, can go away
350 inline uint8_t* getData() const;
351
352 inline void setPointer(const Pointer&);
353 inline Pointer getPointer() const;
354
355 inline void nextRow();
356 inline uint32_t getColumnWidth(uint32_t colIndex) const;
357 inline uint32_t getColumnCount() const;
358 inline uint32_t getSize() const; // this is only accurate if there is no string table
359 // if a string table is being used, getRealSize() takes into account variable-length strings
360 inline uint32_t getRealSize() const;
361 inline uint32_t getOffset(uint32_t colIndex) const;
362 inline uint32_t getScale(uint32_t colIndex) const;
363 inline uint32_t getPrecision(uint32_t colIndex) const;
364 inline execplan::CalpontSystemCatalog::ColDataType getColType(uint32_t colIndex) const;
365 inline execplan::CalpontSystemCatalog::ColDataType* getColTypes();
366 inline const execplan::CalpontSystemCatalog::ColDataType* getColTypes() const;
367 inline uint32_t getCharsetNumber(uint32_t colIndex) const;
368
369 // this returns true if the type is not CHAR or VARCHAR
370 inline bool isCharType(uint32_t colIndex) const;
371 inline bool isUnsigned(uint32_t colIndex) const;
372 inline bool isShortString(uint32_t colIndex) const;
373 inline bool isLongString(uint32_t colIndex) const;
374
colHasCollation(uint32_t colIndex)375 bool colHasCollation(uint32_t colIndex) const
376 {
377 return execplan::typeHasCollation(getColType(colIndex));
378 }
379
380 template<int len> inline uint64_t getUintField(uint32_t colIndex) const;
381 inline uint64_t getUintField(uint32_t colIndex) const;
382 template<int len> inline int64_t getIntField(uint32_t colIndex) const;
383 inline int64_t getIntField(uint32_t colIndex) const;
384 template<int len> inline bool equals(uint64_t val, uint32_t colIndex) const;
385 inline bool equals(long double val, uint32_t colIndex) const;
386 bool equals(const std::string& val, uint32_t colIndex) const;
387
388 inline double getDoubleField(uint32_t colIndex) const;
389 inline float getFloatField(uint32_t colIndex) const;
getDecimalField(uint32_t colIndex)390 inline double getDecimalField(uint32_t colIndex) const
391 {
392 return 0.0; // TODO: Do something here
393 }
394 inline long double getLongDoubleField(uint32_t colIndex) const;
395
396 inline uint64_t getBaseRid() const;
397 inline uint64_t getRid() const;
398 inline uint16_t getRelRid() const; // returns a rid relative to this logical block
399 inline uint64_t getExtentRelativeRid() const; // returns a rid relative to the extent it's in
400 inline uint64_t getFileRelativeRid() const; // returns a file-relative rid
401 inline void getLocation(uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum,
402 uint16_t* blockNum, uint16_t* rowNum);
403
404 template<int len> void setUintField(uint64_t val, uint32_t colIndex);
405
406 /* Note: these 2 fcns avoid 1 array lookup per call. Using them only
407 in projection on the PM resulted in a 2.8% performance gain on
408 the queries listed in bug 2223.
409 TODO: apply them everywhere else possible, and write equivalents
410 for the other types as well as the getters.
411 */
412 template<int len> void setUintField_offset(uint64_t val, uint32_t offset);
413 inline void nextRow(uint32_t size);
414 inline void prevRow(uint32_t size, uint64_t number);
415
416 inline void setUintField(uint64_t val, uint32_t colIndex);
417 template<int len> void setIntField(int64_t, uint32_t colIndex);
418 inline void setIntField(int64_t, uint32_t colIndex);
419
420 inline void setDoubleField(double val, uint32_t colIndex);
421 inline void setFloatField(float val, uint32_t colIndex);
setDecimalField(double val,uint32_t colIndex)422 inline void setDecimalField(double val, uint32_t colIndex) { }; // TODO: Do something here
423 inline void setLongDoubleField(long double val, uint32_t colIndex);
424
425 inline void setRid(uint64_t rid);
426
427 // is string efficient for this?
428 inline std::string getStringField(uint32_t colIndex) const;
429 inline const uint8_t* getStringPointer(uint32_t colIndex) const;
430 inline uint32_t getStringLength(uint32_t colIndex) const;
431 inline utils::ConstString getConstString(uint32_t colIndex) const;
432 inline utils::ConstString getShortConstString(uint32_t colIndex) const;
433 void setStringField(const std::string& val, uint32_t colIndex);
434 inline void setStringField(const uint8_t*, uint32_t len, uint32_t colIndex);
435
436 // support VARBINARY
437 // Add 2-byte length at the CHARSET_INFO*beginning of the field. NULL and zero length field are
438 // treated the same, could use one of the length bit to distinguish these two cases.
439 inline std::string getVarBinaryStringField(uint32_t colIndex) const;
440 inline void setVarBinaryField(const std::string& val, uint32_t colIndex);
441 // No string construction is necessary for better performance.
442 inline uint32_t getVarBinaryLength(uint32_t colIndex) const;
443 inline const uint8_t* getVarBinaryField(uint32_t colIndex) const;
444 inline const uint8_t* getVarBinaryField(uint32_t& len, uint32_t colIndex) const;
445 inline void setVarBinaryField(const uint8_t* val, uint32_t len, uint32_t colIndex);
446
447 inline boost::shared_ptr<mcsv1sdk::UserData> getUserData(uint32_t colIndex) const;
448 inline void setUserData(mcsv1sdk::mcsv1Context& context,
449 boost::shared_ptr<mcsv1sdk::UserData> userData,
450 uint32_t len, uint32_t colIndex);
451
452 uint64_t getNullValue(uint32_t colIndex) const;
453 bool isNullValue(uint32_t colIndex) const;
454
455 // when NULLs are pulled out via getIntField(), they come out with these values.
456 // Ex: the 1-byte int null value is 0x80. When it gets cast to an int64_t
457 // it becomes 0xffffffffffffff80, which won't match anything returned by getNullValue().
458 int64_t getSignedNullValue(uint32_t colIndex) const;
459
460 // copy data in srcIndex field to destIndex, all data type
461 inline void copyField(uint32_t destIndex, uint32_t srcIndex) const;
462
463 // copy data in srcIndex field to destAddr, all data type
464 //inline void copyField(uint8_t* destAddr, uint32_t srcIndex) const;
465
466 // an adapter for code that uses the copyField call above;
467 // that's not string-table safe, this one is
468 inline void copyField(Row& dest, uint32_t destIndex, uint32_t srcIndex) const;
469
470 std::string toString(uint32_t rownum = 0) const;
471 std::string toCSV() const;
472
473 /* These fcns are used only in joins. The RID doesn't matter on the side that
474 gets hashed. We steal that field here to "mark" a row. */
475 inline void markRow();
476 inline void zeroRid();
477 inline bool isMarked();
478 void initToNull();
479
usesStringTable(bool b)480 inline void usesStringTable(bool b)
481 {
482 useStringTable = b;
483 }
usesStringTable()484 inline bool usesStringTable() const
485 {
486 return useStringTable;
487 }
hasLongString()488 inline bool hasLongString() const
489 {
490 return hasLongStringField;
491 }
492
493 // these are for cases when you already know the type definitions are the same.
494 // a fcn to check the type defs seperately doesn't exist yet. No normalization.
495 inline uint64_t hash(uint32_t lastCol) const; // generates a hash for cols [0-lastCol]
496 inline uint64_t hash() const; // generates a hash for all cols
497 inline void colUpdateMariaDBHasher(datatypes::MariaDBHasher &hM,
498 const utils::Hasher_r& h,
499 const uint32_t col,
500 uint32_t& intermediateHash) const;
501
502 bool equals(const Row&, uint32_t lastCol) const;
503 inline bool equals(const Row&) const;
504
setUserDataStore(UserDataStore * u)505 inline void setUserDataStore(UserDataStore* u)
506 {
507 userDataStore = u;
508 }
509
510 const CHARSET_INFO* getCharset(uint32_t col) const;
511
512 private:
513 uint32_t columnCount;
514 uint64_t baseRid;
515
516 // Note, the mem behind these pointer fields is owned by RowGroup not Row
517 uint32_t* oldOffsets;
518 uint32_t* stOffsets;
519 uint32_t* offsets;
520 uint32_t* colWidths;
521 execplan::CalpontSystemCatalog::ColDataType* types;
522 uint32_t* charsetNumbers;
523 CHARSET_INFO** charsets;
524 uint8_t* data;
525 uint32_t* scale;
526 uint32_t* precision;
527
528 StringStore* strings;
529 bool useStringTable;
530 bool hasCollation;
531 bool hasLongStringField;
532 uint32_t sTableThreshold;
533 boost::shared_array<bool> forceInline;
534 inline bool inStringTable(uint32_t col) const;
535
536 UserDataStore* userDataStore; // For UDAF
537
538 friend class RowGroup;
539 };
540
getPointer()541 inline Row::Pointer Row::getPointer() const
542 {
543 return Pointer(data, strings, userDataStore);
544 }
getData()545 inline uint8_t* Row::getData() const
546 {
547 return data;
548 }
549
setPointer(const Pointer & p)550 inline void Row::setPointer(const Pointer& p)
551 {
552 data = p.data;
553 strings = p.strings;
554 bool hasStrings = (strings != 0);
555
556 if (useStringTable != hasStrings)
557 {
558 useStringTable = hasStrings;
559 offsets = (useStringTable ? stOffsets : oldOffsets);
560 }
561
562 userDataStore = p.userDataStore;
563 }
564
setData(const Pointer & p)565 inline void Row::setData(const Pointer& p)
566 {
567 setPointer(p);
568 }
569
nextRow()570 inline void Row::nextRow()
571 {
572 data += offsets[columnCount];
573 }
574
getColumnCount()575 inline uint32_t Row::getColumnCount() const
576 {
577 return columnCount;
578 }
579
getColumnWidth(uint32_t col)580 inline uint32_t Row::getColumnWidth(uint32_t col) const
581 {
582 return colWidths[col];
583 }
584
getSize()585 inline uint32_t Row::getSize() const
586 {
587 return offsets[columnCount];
588 }
589
getRealSize()590 inline uint32_t Row::getRealSize() const
591 {
592 if (!useStringTable)
593 return getSize();
594
595 uint32_t ret = 2;
596
597 for (uint32_t i = 0; i < columnCount; i++)
598 {
599 if (!inStringTable(i))
600 ret += getColumnWidth(i);
601 else
602 ret += getStringLength(i);
603 }
604
605 return ret;
606 }
607
getScale(uint32_t col)608 inline uint32_t Row::getScale(uint32_t col) const
609 {
610 return scale[col];
611 }
612
getPrecision(uint32_t col)613 inline uint32_t Row::getPrecision(uint32_t col) const
614 {
615 return precision[col];
616 }
617
getColType(uint32_t colIndex)618 inline execplan::CalpontSystemCatalog::ColDataType Row::getColType(uint32_t colIndex) const
619 {
620 return types[colIndex];
621 }
622
getColTypes()623 inline execplan::CalpontSystemCatalog::ColDataType* Row::getColTypes()
624 {
625 return types;
626 }
627
getColTypes()628 inline const execplan::CalpontSystemCatalog::ColDataType* Row::getColTypes() const
629 {
630 return types;
631 }
632
getCharsetNumber(uint32_t col)633 inline uint32_t Row::getCharsetNumber(uint32_t col) const
634 {
635 return charsetNumbers[col];
636 }
637
isCharType(uint32_t colIndex)638 inline bool Row::isCharType(uint32_t colIndex) const
639 {
640 return execplan::isCharType(types[colIndex]);
641 }
642
isUnsigned(uint32_t colIndex)643 inline bool Row::isUnsigned(uint32_t colIndex) const
644 {
645 return execplan::isUnsigned(types[colIndex]);
646 }
647
isShortString(uint32_t colIndex)648 inline bool Row::isShortString(uint32_t colIndex) const
649 {
650 return (getColumnWidth(colIndex) <= 8 && isCharType(colIndex));
651 }
652
isLongString(uint32_t colIndex)653 inline bool Row::isLongString(uint32_t colIndex) const
654 {
655 return (getColumnWidth(colIndex) > 8 && isCharType(colIndex));
656 }
657
inStringTable(uint32_t col)658 inline bool Row::inStringTable(uint32_t col) const
659 {
660 return strings && getColumnWidth(col) >= sTableThreshold && !forceInline[col];
661 }
662
663 template<int len>
equals(uint64_t val,uint32_t colIndex)664 inline bool Row::equals(uint64_t val, uint32_t colIndex) const
665 {
666 /* I think the compiler will optimize away the switch stmt */
667 switch (len)
668 {
669 case 1:
670 return data[offsets[colIndex]] == val;
671
672 case 2:
673 return *((uint16_t*) &data[offsets[colIndex]]) == val;
674
675 case 4:
676 return *((uint32_t*) &data[offsets[colIndex]]) == val;
677
678 case 8:
679 return *((uint64_t*) &data[offsets[colIndex]]) == val;
680
681 default:
682 idbassert(0);
683 throw std::logic_error("Row::equals(): bad length.");
684 }
685 }
686
equals(long double val,uint32_t colIndex)687 inline bool Row::equals(long double val, uint32_t colIndex) const
688 {
689 return *((long double*) &data[offsets[colIndex]]) == val;
690 }
691 template<int len>
getUintField(uint32_t colIndex)692 inline uint64_t Row::getUintField(uint32_t colIndex) const
693 {
694 /* I think the compiler will optimize away the switch stmt */
695 switch (len)
696 {
697 case 1:
698 return data[offsets[colIndex]];
699
700 case 2:
701 return *((uint16_t*) &data[offsets[colIndex]]);
702
703 case 4:
704 return *((uint32_t*) &data[offsets[colIndex]]);
705
706 case 8:
707 return *((uint64_t*) &data[offsets[colIndex]]);
708
709 default:
710 idbassert(0);
711 throw std::logic_error("Row::getUintField(): bad length.");
712 }
713 }
714
getUintField(uint32_t colIndex)715 inline uint64_t Row::getUintField(uint32_t colIndex) const
716 {
717 switch (getColumnWidth(colIndex))
718 {
719 case 1:
720 return data[offsets[colIndex]];
721
722 case 2:
723 return *((uint16_t*) &data[offsets[colIndex]]);
724
725 case 4:
726 return *((uint32_t*) &data[offsets[colIndex]]);
727
728 case 8:
729 return *((uint64_t*) &data[offsets[colIndex]]);
730
731 default:
732 idbassert(0);
733 throw std::logic_error("Row::getUintField(): bad length.");
734 }
735 }
736
737 template<int len>
getIntField(uint32_t colIndex)738 inline int64_t Row::getIntField(uint32_t colIndex) const
739 {
740 /* I think the compiler will optimize away the switch stmt */
741 switch (len)
742 {
743 case 1:
744 return (int8_t) data[offsets[colIndex]];
745
746 case 2:
747 return *((int16_t*) &data[offsets[colIndex]]);
748
749 case 4:
750 return *((int32_t*) &data[offsets[colIndex]]);
751
752 case 8:
753 return *((int64_t*) &data[offsets[colIndex]]);
754
755 default:
756 idbassert(0);
757 throw std::logic_error("Row::getIntField(): bad length.");
758 }
759 }
760
getIntField(uint32_t colIndex)761 inline int64_t Row::getIntField(uint32_t colIndex) const
762 {
763 /* I think the compiler will optimize away the switch stmt */
764 switch (getColumnWidth(colIndex))
765 {
766 case 1:
767 return (int8_t) data[offsets[colIndex]];
768
769 case 2:
770 return *((int16_t*) &data[offsets[colIndex]]);
771
772 case 4:
773 return *((int32_t*) &data[offsets[colIndex]]);
774
775 case 8:
776 return *((int64_t*) &data[offsets[colIndex]]);
777
778 default:
779 idbassert(0);
780 throw std::logic_error("Row::getIntField(): bad length.");
781 }
782 }
783
getStringPointer(uint32_t colIndex)784 inline const uint8_t* Row::getStringPointer(uint32_t colIndex) const
785 {
786 if (inStringTable(colIndex))
787 return strings->getPointer(*((uint64_t*) &data[offsets[colIndex]]));
788
789 return &data[offsets[colIndex]];
790 }
791
getStringLength(uint32_t colIndex)792 inline uint32_t Row::getStringLength(uint32_t colIndex) const
793 {
794 if (inStringTable(colIndex))
795 return strings->getStringLength(*((uint64_t*) &data[offsets[colIndex]]));
796
797 return strnlen((char*) &data[offsets[colIndex]], getColumnWidth(colIndex));
798 }
799
800
getShortConstString(uint32_t colIndex)801 inline utils::ConstString Row::getShortConstString(uint32_t colIndex) const
802 {
803 const char *src= (const char *) &data[offsets[colIndex]];
804 return utils::ConstString(src, strnlen(src, getColumnWidth(colIndex)));
805 }
806
807
getConstString(uint32_t colIndex)808 inline utils::ConstString Row::getConstString(uint32_t colIndex) const
809 {
810 return inStringTable(colIndex) ?
811 strings->getConstString(*((uint64_t*) &data[offsets[colIndex]])) :
812 getShortConstString(colIndex);
813 }
814
815
colUpdateMariaDBHasher(datatypes::MariaDBHasher & hM,const utils::Hasher_r & h,const uint32_t col,uint32_t & intermediateHash)816 inline void Row::colUpdateMariaDBHasher(datatypes::MariaDBHasher &hM,
817 const utils::Hasher_r& h,
818 const uint32_t col,
819 uint32_t& intermediateHash) const
820 {
821 switch (getColType(col))
822 {
823 case execplan::CalpontSystemCatalog::CHAR:
824 case execplan::CalpontSystemCatalog::VARCHAR:
825 case execplan::CalpontSystemCatalog::BLOB:
826 case execplan::CalpontSystemCatalog::TEXT:
827 {
828 CHARSET_INFO *cs = getCharset(col);
829 hM.add(cs, getConstString(col));
830 break;
831 }
832 default:
833 {
834 intermediateHash = h((const char*) &data[offsets[col]], colWidths[col], intermediateHash);
835 break;
836 }
837 }
838 }
839
840
setStringField(const uint8_t * strdata,uint32_t length,uint32_t colIndex)841 inline void Row::setStringField(const uint8_t* strdata, uint32_t length, uint32_t colIndex)
842 {
843 uint64_t offset;
844
845 if (length > getColumnWidth(colIndex))
846 length = getColumnWidth(colIndex);
847
848 if (inStringTable(colIndex))
849 {
850 offset = strings->storeString(strdata, length);
851 *((uint64_t*) &data[offsets[colIndex]]) = offset;
852 // cout << " -- stored offset " << *((uint32_t *) &data[offsets[colIndex]])
853 // << " length " << *((uint32_t *) &data[offsets[colIndex] + 4])
854 // << endl;
855 }
856 else
857 {
858 memcpy(&data[offsets[colIndex]], strdata, length);
859 memset(&data[offsets[colIndex] + length], 0,
860 offsets[colIndex + 1] - (offsets[colIndex] + length));
861 }
862 }
863
getStringField(uint32_t colIndex)864 inline std::string Row::getStringField(uint32_t colIndex) const
865 {
866 if (inStringTable(colIndex))
867 return strings->getString(*((uint64_t*) &data[offsets[colIndex]]));
868
869 // Not all CHAR/VARCHAR are NUL terminated so use length
870 return std::string((char*) &data[offsets[colIndex]],
871 strnlen((char*) &data[offsets[colIndex]], getColumnWidth(colIndex)));
872 }
873
getVarBinaryStringField(uint32_t colIndex)874 inline std::string Row::getVarBinaryStringField(uint32_t colIndex) const
875 {
876 if (inStringTable(colIndex))
877 return getStringField(colIndex);
878
879 return std::string((char*) &data[offsets[colIndex] + 2], *((uint16_t*) &data[offsets[colIndex]]));
880 }
881
getVarBinaryLength(uint32_t colIndex)882 inline uint32_t Row::getVarBinaryLength(uint32_t colIndex) const
883 {
884 if (inStringTable(colIndex))
885 return strings->getStringLength(*((uint64_t*) &data[offsets[colIndex]]));;
886
887 return *((uint16_t*) &data[offsets[colIndex]]);
888 }
889
getVarBinaryField(uint32_t colIndex)890 inline const uint8_t* Row::getVarBinaryField(uint32_t colIndex) const
891 {
892 if (inStringTable(colIndex))
893 return strings->getPointer(*((uint64_t*) &data[offsets[colIndex]]));
894
895 return &data[offsets[colIndex] + 2];
896 }
897
getVarBinaryField(uint32_t & len,uint32_t colIndex)898 inline const uint8_t* Row::getVarBinaryField(uint32_t& len, uint32_t colIndex) const
899 {
900 if (inStringTable(colIndex))
901 {
902 len = strings->getStringLength(*((uint64_t*) &data[offsets[colIndex]]));
903 return getVarBinaryField(colIndex);
904 }
905 else
906 {
907 len = *((uint16_t*) &data[offsets[colIndex]]);
908 return &data[offsets[colIndex] + 2];
909 }
910 }
911
getUserData(uint32_t colIndex)912 inline boost::shared_ptr<mcsv1sdk::UserData> Row::getUserData(uint32_t colIndex) const
913 {
914 if (!userDataStore)
915 {
916 return boost::shared_ptr<mcsv1sdk::UserData>();
917 }
918
919 return userDataStore->getUserData(*((uint32_t*) &data[offsets[colIndex]]));
920 }
921
getDoubleField(uint32_t colIndex)922 inline double Row::getDoubleField(uint32_t colIndex) const
923 {
924 return *((double*) &data[offsets[colIndex]]);
925 }
926
getFloatField(uint32_t colIndex)927 inline float Row::getFloatField(uint32_t colIndex) const
928 {
929 return *((float*) &data[offsets[colIndex]]);
930 }
931
getLongDoubleField(uint32_t colIndex)932 inline long double Row::getLongDoubleField(uint32_t colIndex) const
933 {
934 return *((long double*) &data[offsets[colIndex]]);
935 }
936
getRid()937 inline uint64_t Row::getRid() const
938 {
939 return baseRid + *((uint16_t*) data);
940 }
941
getRelRid()942 inline uint16_t Row::getRelRid() const
943 {
944 return *((uint16_t*) data);
945 }
946
getBaseRid()947 inline uint64_t Row::getBaseRid() const
948 {
949 return baseRid;
950 }
951
markRow()952 inline void Row::markRow()
953 {
954 *((uint16_t*) data) = 0xffff;
955 }
956
zeroRid()957 inline void Row::zeroRid()
958 {
959 *((uint16_t*) data) = 0;
960 }
961
isMarked()962 inline bool Row::isMarked()
963 {
964 return *((uint16_t*) data) == 0xffff;
965 }
966
967 /* Begin speculative code! */
getOffset(uint32_t colIndex)968 inline uint32_t Row::getOffset(uint32_t colIndex) const
969 {
970 return offsets[colIndex];
971 }
972
973 template<int len>
setUintField_offset(uint64_t val,uint32_t offset)974 inline void Row::setUintField_offset(uint64_t val, uint32_t offset)
975 {
976 switch (len)
977 {
978 case 1:
979 data[offset] = val;
980 break;
981
982 case 2:
983 *((uint16_t*) &data[offset]) = val;
984 break;
985
986 case 4:
987 *((uint32_t*) &data[offset]) = val;
988 break;
989
990 case 8:
991 *((uint64_t*) &data[offset]) = val;
992 break;
993
994 default:
995 idbassert(0);
996 throw std::logic_error("Row::setUintField called on a non-uint32_t field");
997 }
998 }
999
nextRow(uint32_t size)1000 inline void Row::nextRow(uint32_t size)
1001 {
1002 data += size;
1003 }
1004
1005
1006 inline void Row::prevRow(uint32_t size, uint64_t number = 1)
1007 {
1008 data -= size * number;
1009 }
1010
1011 template<int len>
setUintField(uint64_t val,uint32_t colIndex)1012 inline void Row::setUintField(uint64_t val, uint32_t colIndex)
1013 {
1014 switch (len)
1015 {
1016 case 1:
1017 data[offsets[colIndex]] = val;
1018 break;
1019
1020 case 2:
1021 *((uint16_t*) &data[offsets[colIndex]]) = val;
1022 break;
1023
1024 case 4:
1025 *((uint32_t*) &data[offsets[colIndex]]) = val;
1026 break;
1027
1028 case 8:
1029 *((uint64_t*) &data[offsets[colIndex]]) = val;
1030 break;
1031
1032 default:
1033 idbassert(0);
1034 throw std::logic_error("Row::setUintField called on a non-uint32_t field");
1035 }
1036 }
1037
setUintField(uint64_t val,uint32_t colIndex)1038 inline void Row::setUintField(uint64_t val, uint32_t colIndex)
1039 {
1040 switch (getColumnWidth(colIndex))
1041 {
1042 case 1:
1043 data[offsets[colIndex]] = val;
1044 break;
1045
1046 case 2:
1047 *((uint16_t*) &data[offsets[colIndex]]) = val;
1048 break;
1049
1050 case 4:
1051 *((uint32_t*) &data[offsets[colIndex]]) = val;
1052 break;
1053
1054 case 8:
1055 *((uint64_t*) &data[offsets[colIndex]]) = val;
1056 break;
1057
1058 default:
1059 idbassert(0);
1060 throw std::logic_error("Row::setUintField: bad length");
1061 }
1062 }
1063
1064 template<int len>
setIntField(int64_t val,uint32_t colIndex)1065 inline void Row::setIntField(int64_t val, uint32_t colIndex)
1066 {
1067 switch (len)
1068 {
1069 case 1:
1070 *((int8_t*) &data[offsets[colIndex]]) = val;
1071 break;
1072
1073 case 2:
1074 *((int16_t*) &data[offsets[colIndex]]) = val;
1075 break;
1076
1077 case 4:
1078 *((int32_t*) &data[offsets[colIndex]]) = val;
1079 break;
1080
1081 case 8:
1082 *((int64_t*) &data[offsets[colIndex]]) = val;
1083 break;
1084
1085 default:
1086 idbassert(0);
1087 throw std::logic_error("Row::setIntField: bad length");
1088 }
1089 }
1090
setIntField(int64_t val,uint32_t colIndex)1091 inline void Row::setIntField(int64_t val, uint32_t colIndex)
1092 {
1093 switch (getColumnWidth(colIndex))
1094 {
1095 case 1:
1096 *((int8_t*) &data[offsets[colIndex]]) = val;
1097 break;
1098
1099 case 2:
1100 *((int16_t*) &data[offsets[colIndex]]) = val;
1101 break;
1102
1103 case 4:
1104 *((int32_t*) &data[offsets[colIndex]]) = val;
1105 break;
1106
1107 case 8:
1108 *((int64_t*) &data[offsets[colIndex]]) = val;
1109 break;
1110
1111 default:
1112 idbassert(0);
1113 throw std::logic_error("Row::setIntField: bad length");
1114 }
1115 }
1116
setDoubleField(double val,uint32_t colIndex)1117 inline void Row::setDoubleField(double val, uint32_t colIndex)
1118 {
1119 *((double*) &data[offsets[colIndex]]) = val;
1120 }
1121
setFloatField(float val,uint32_t colIndex)1122 inline void Row::setFloatField(float val, uint32_t colIndex)
1123 {
1124 //N.B. There is a bug in boost::any or in gcc where, if you store a nan, you will get back a nan,
1125 // but not necessarily the same bits that you put in. This only seems to be for float (double seems
1126 // to work).
1127 if (std::isnan(val))
1128 setUintField<4>(joblist::FLOATNULL, colIndex);
1129 else
1130 *((float*) &data[offsets[colIndex]]) = val;
1131 }
1132
setLongDoubleField(long double val,uint32_t colIndex)1133 inline void Row::setLongDoubleField(long double val, uint32_t colIndex)
1134 {
1135 uint8_t* p = &data[offsets[colIndex]];
1136 *((long double*)p) = val;
1137 if (sizeof(long double) == 16)
1138 {
1139 // zero out the unused portion as there may be garbage there.
1140 *((uint64_t*)p+1) &= 0x000000000000FFFFULL;
1141 }
1142 }
1143
setVarBinaryField(const std::string & val,uint32_t colIndex)1144 inline void Row::setVarBinaryField(const std::string& val, uint32_t colIndex)
1145 {
1146 if (inStringTable(colIndex))
1147 setStringField(val, colIndex);
1148 else
1149 {
1150 *((uint16_t*) &data[offsets[colIndex]]) = static_cast<uint16_t>(val.length());
1151 memcpy(&data[offsets[colIndex] + 2], val.data(), val.length());
1152 }
1153 }
1154
setVarBinaryField(const uint8_t * val,uint32_t len,uint32_t colIndex)1155 inline void Row::setVarBinaryField(const uint8_t* val, uint32_t len, uint32_t colIndex)
1156 {
1157 if (len > getColumnWidth(colIndex))
1158 len = getColumnWidth(colIndex);
1159
1160 if (inStringTable(colIndex))
1161 {
1162 uint64_t offset = strings->storeString(val, len);
1163 *((uint64_t*) &data[offsets[colIndex]]) = offset;
1164 }
1165 else
1166 {
1167 *((uint16_t*) &data[offsets[colIndex]]) = len;
1168 memcpy(&data[offsets[colIndex] + 2], val, len);
1169 }
1170 }
1171
setUserData(mcsv1sdk::mcsv1Context & context,boost::shared_ptr<mcsv1sdk::UserData> userData,uint32_t len,uint32_t colIndex)1172 inline void Row::setUserData(mcsv1sdk::mcsv1Context& context,
1173 boost::shared_ptr<mcsv1sdk::UserData> userData,
1174 uint32_t len, uint32_t colIndex)
1175 {
1176 if (!userDataStore)
1177 {
1178 return;
1179 }
1180
1181 uint32_t offset = userDataStore->storeUserData(context, userData, len);
1182 *((uint32_t*) &data[offsets[colIndex]]) = offset;
1183 *((uint32_t*) &data[offsets[colIndex] + 4]) = len;
1184 }
1185
copyField(uint32_t destIndex,uint32_t srcIndex)1186 inline void Row::copyField(uint32_t destIndex, uint32_t srcIndex) const
1187 {
1188 uint32_t n = offsets[destIndex + 1] - offsets[destIndex];
1189 memmove(&data[offsets[destIndex]], &data[offsets[srcIndex]], n);
1190 }
1191
copyField(Row & out,uint32_t destIndex,uint32_t srcIndex)1192 inline void Row::copyField(Row& out, uint32_t destIndex, uint32_t srcIndex) const
1193 {
1194 if (UNLIKELY(types[srcIndex] == execplan::CalpontSystemCatalog::VARBINARY ||
1195 types[srcIndex] == execplan::CalpontSystemCatalog::BLOB ||
1196 types[srcIndex] == execplan::CalpontSystemCatalog::TEXT))
1197 out.setVarBinaryField(getVarBinaryStringField(srcIndex), destIndex);
1198 else if (UNLIKELY(isLongString(srcIndex)))
1199 out.setStringField(getStringPointer(srcIndex), getStringLength(srcIndex), destIndex);
1200 //out.setStringField(getStringField(srcIndex), destIndex);
1201 else if (UNLIKELY(isShortString(srcIndex)))
1202 out.setUintField(getUintField(srcIndex), destIndex);
1203 else if (UNLIKELY(types[srcIndex] == execplan::CalpontSystemCatalog::LONGDOUBLE))
1204 out.setLongDoubleField(getLongDoubleField(srcIndex), destIndex);
1205 else
1206 out.setIntField(getIntField(srcIndex), destIndex);
1207 }
1208
setRid(uint64_t rid)1209 inline void Row::setRid(uint64_t rid)
1210 {
1211 *((uint16_t*) data) = rid & 0xffff;
1212 }
1213
hash()1214 inline uint64_t Row::hash() const
1215 {
1216 return hash(columnCount - 1);
1217 }
1218
1219
hash(uint32_t lastCol)1220 inline uint64_t Row::hash(uint32_t lastCol) const
1221 {
1222 // Use two hash classes. MariaDBHasher for text-based
1223 // collation-aware data types and Hasher_r for all other data types.
1224 // We deliver a hash that is a combination of both hashers' results.
1225 utils::Hasher_r h;
1226 datatypes::MariaDBHasher hM;
1227 uint32_t intermediateHash = 0;
1228 // Sometimes we ask this to hash 0 bytes, and it comes through looking like
1229 // lastCol = -1. Return 0.
1230 if (lastCol >= columnCount)
1231 return 0;
1232
1233 for (uint32_t i = 0; i <= lastCol; i++)
1234 colUpdateMariaDBHasher(hM, h, i, intermediateHash);
1235
1236 return utils::HashFamily(h, intermediateHash, lastCol << 2, hM).finalize();
1237 }
1238
equals(const Row & r2)1239 inline bool Row::equals(const Row& r2) const
1240 {
1241 return equals(r2, columnCount - 1);
1242 }
1243
1244
1245 /** @brief RowGroup is a lightweight interface for processing packed row data
1246
1247 A RowGroup is an interface for parsing and/or modifying row data as described at the top
1248 of this file. Its lifecycle can be tied to a producer or consumer's lifecycle.
1249 Only one instance is required to process any number of blocks with a
1250 given column configuration. The column configuration is specified in the
1251 constructor, and the block data to process is specified through the
1252 setData() function. It will not copy or take ownership of the data it processes;
1253 the caller should do that.
1254
1255 Row and RowGroup share some bits. RowGroup owns the memory they share.
1256 */
1257 class RowGroup : public messageqcpp::Serializeable
1258 {
1259 public:
1260 /** @brief The default ctor. It does nothing. Need to init by assignment or deserialization */
1261 RowGroup();
1262
1263 /** @brief The RowGroup ctor, which specifies the column config to process
1264
1265 @param colCount The number of columns
1266 @param positions An array specifying the offsets within the packed data
1267 of a row where each column begins. It should have colCount + 1
1268 entries. The first offset is 2, because a row begins with a 2-byte
1269 RID. The last entry should be the offset of the last column +
1270 its length, which is also the size of the entire row including the rid.
1271 @param coids An array of oids for each column.
1272 @param tkeys An array of unique id for each column.
1273 @param colTypes An array of COLTYPEs for each column.
1274 @param charsetNumbers an Array of the lookup numbers for the charset/collation object.
1275 @param scale An array specifying the scale of DECIMAL types (0 for non-decimal)
1276 @param precision An array specifying the precision of DECIMAL types (0 for non-decimal)
1277 */
1278
1279 RowGroup(uint32_t colCount,
1280 const std::vector<uint32_t>& positions,
1281 const std::vector<uint32_t>& cOids,
1282 const std::vector<uint32_t>& tkeys,
1283 const std::vector<execplan::CalpontSystemCatalog::ColDataType>& colTypes,
1284 const std::vector<uint32_t>& charsetNumbers,
1285 const std::vector<uint32_t>& scale,
1286 const std::vector<uint32_t>& precision,
1287 uint32_t stringTableThreshold,
1288 bool useStringTable = true,
1289 const std::vector<bool>& forceInlineData = std::vector<bool>()
1290 );
1291
1292 /** @brief The copiers. It copies metadata, not the row data */
1293 RowGroup(const RowGroup&);
1294
1295 /** @brief Assignment operator. It copies metadata, not the row data */
1296 RowGroup& operator=(const RowGroup&);
1297
1298 ~RowGroup();
1299
1300 inline void initRow(Row*, bool forceInlineData = false) const;
1301 inline uint32_t getRowCount() const;
1302 inline void incRowCount();
1303 inline void setRowCount(uint32_t num);
1304 inline void getRow(uint32_t rowNum, Row*) const;
1305 inline uint32_t getRowSize() const;
1306 inline uint32_t getRowSizeWithStrings() const;
1307 inline uint64_t getBaseRid() const;
1308 void setData(RGData* rgd);
1309 inline void setData(uint8_t* d);
1310 inline uint8_t* getData() const;
1311 inline RGData* getRGData() const;
1312
1313 uint32_t getStatus() const;
1314 void setStatus(uint16_t);
1315
1316 uint32_t getDBRoot() const;
1317 void setDBRoot(uint32_t);
1318
1319 uint32_t getDataSize() const;
1320 uint32_t getDataSize(uint64_t n) const;
1321 uint32_t getMaxDataSize() const;
1322 uint32_t getMaxDataSizeWithStrings() const;
1323 uint32_t getEmptySize() const;
1324
1325 // this returns the size of the row data with the string table
1326 inline uint64_t getSizeWithStrings() const;
1327 inline uint64_t getSizeWithStrings(uint64_t n) const;
1328
1329 // sets the row count to 0 and the baseRid to something
1330 // effectively initializing whatever chunk of memory
1331 // data points to
1332 void resetRowGroup(uint64_t baseRid);
1333
1334 /* The Serializeable interface */
1335 void serialize(messageqcpp::ByteStream&) const;
1336 void deserialize(messageqcpp::ByteStream&);
1337
1338 uint32_t getColumnWidth(uint32_t col) const;
1339 uint32_t getColumnCount() const;
1340 inline const std::vector<uint32_t>& getOffsets() const;
1341 inline const std::vector<uint32_t>& getOIDs() const;
1342 inline const std::vector<uint32_t>& getKeys() const;
1343 inline const std::vector<uint32_t>& getColWidths() const;
1344 inline execplan::CalpontSystemCatalog::ColDataType getColType(uint32_t colIndex) const;
1345 inline const std::vector<execplan::CalpontSystemCatalog::ColDataType>& getColTypes() const;
1346 inline std::vector<execplan::CalpontSystemCatalog::ColDataType>& getColTypes();
1347 inline const std::vector<uint32_t>& getCharsetNumbers() const;
1348 inline uint32_t getCharsetNumber(uint32_t colIndex) const;
1349 inline boost::shared_array<bool>& getForceInline();
getHeaderSize()1350 static inline uint32_t getHeaderSize()
1351 {
1352 return headerSize;
1353 }
1354
1355 // this returns true if the type is CHAR or VARCHAR
1356 inline bool isCharType(uint32_t colIndex) const;
1357 inline bool isUnsigned(uint32_t colIndex) const;
1358 inline bool isShortString(uint32_t colIndex) const;
1359 inline bool isLongString(uint32_t colIndex) const;
1360
colHasCollation(uint32_t colIndex)1361 bool colHasCollation(uint32_t colIndex) const
1362 {
1363 return execplan::typeHasCollation(getColType(colIndex));
1364 }
1365
1366 inline const std::vector<uint32_t>& getScale() const;
1367 inline const std::vector<uint32_t>& getPrecision() const;
1368
1369 inline bool usesStringTable() const;
1370 inline void setUseStringTable(bool);
1371
1372 // RGData *convertToInlineData(uint64_t *size = NULL) const; // caller manages the memory returned by this
1373 // void convertToInlineDataInPlace();
1374 // RGData *convertToStringTable(uint64_t *size = NULL) const;
1375 // void convertToStringTableInPlace();
1376 void serializeRGData(messageqcpp::ByteStream&) const;
1377 inline uint32_t getStringTableThreshold() const;
1378
1379 void append(RGData&);
1380 void append(RowGroup&);
1381 void append(RGData&, uint pos); // insert starting at position 'pos'
1382 void append(RowGroup&, uint pos);
1383
1384 RGData duplicate(); // returns a copy of the attached RGData
1385
1386 std::string toString(const std::vector<uint64_t>& used = {}) const;
1387
1388 /** operator+=
1389 *
1390 * append the metadata of another RowGroup to this RowGroup
1391 */
1392 RowGroup& operator+=(const RowGroup& rhs);
1393
1394 // returns a RowGroup with only the first cols columns. Useful for generating a
1395 // RowGroup where the first cols make up a key of some kind, and the rest is irrelevant.
1396 RowGroup truncate(uint32_t cols);
1397
1398 /** operator<
1399 *
1400 * Orders RG's based on baseRid
1401 */
1402 inline bool operator<(const RowGroup& rhs) const;
1403
1404 void addToSysDataList(execplan::CalpontSystemCatalog::NJLSysDataList& sysDataList);
1405
1406 /* Base RIDs are now a combination of partition#, segment#, extent#, and block#. */
1407 inline void setBaseRid(const uint32_t& partNum, const uint16_t& segNum,
1408 const uint8_t& extentNum, const uint16_t& blockNum);
1409 inline void getLocation(uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum,
1410 uint16_t* blockNum);
1411
1412 inline void setStringStore(boost::shared_ptr<StringStore>);
1413
1414 const CHARSET_INFO* getCharset(uint32_t col);
1415
1416 private:
1417 uint32_t columnCount;
1418 uint8_t* data;
1419
1420 std::vector<uint32_t> oldOffsets; //inline data offsets
1421 std::vector<uint32_t> stOffsets; //string table offsets
1422 uint32_t* offsets; //offsets either points to oldOffsets or stOffsets
1423 std::vector<uint32_t> colWidths;
1424 // oids: the real oid of the column, may have duplicates with alias.
1425 // This oid is necessary for front-end to decide the real column width.
1426 std::vector<uint32_t> oids;
1427 // keys: the unique id for pair(oid, alias). bug 1632.
1428 // Used to map the projected column and rowgroup index
1429 std::vector<uint32_t> keys;
1430 std::vector<execplan::CalpontSystemCatalog::ColDataType> types;
1431 // For string collation
1432 std::vector<uint32_t> charsetNumbers;
1433 std::vector<CHARSET_INFO*> charsets;
1434
1435 // DECIMAL support. For non-decimal fields, the values are 0.
1436 std::vector<uint32_t> scale;
1437 std::vector<uint32_t> precision;
1438
1439 // string table impl
1440 RGData* rgData;
1441 StringStore* strings; // note, strings and data belong to rgData
1442 bool useStringTable;
1443 bool hasCollation;
1444 bool hasLongStringField;
1445 uint32_t sTableThreshold;
1446 boost::shared_array<bool> forceInline;
1447
1448 static const uint32_t headerSize = 18;
1449 static const uint32_t rowCountOffset = 0;
1450 static const uint32_t baseRidOffset = 4;
1451 static const uint32_t statusOffset = 12;
1452 static const uint32_t dbRootOffset = 14;
1453 };
1454
1455 inline uint64_t convertToRid(const uint32_t& partNum, const uint16_t& segNum,
1456 const uint8_t& extentNum, const uint16_t& blockNum);
1457 inline void getLocationFromRid(uint64_t rid, uint32_t* partNum,
1458 uint16_t* segNum, uint8_t* extentNum, uint16_t* blockNum);
1459
1460 // returns the first rid of the logical block specified by baseRid
1461 inline uint64_t getExtentRelativeRid(uint64_t baseRid);
1462
1463 // returns the first rid of the logical block specified by baseRid
1464 inline uint64_t getFileRelativeRid(uint64_t baseRid);
1465
1466 /** operator+
1467 *
1468 * add the metadata of 2 RowGroups together and return a new RowGroup
1469 */
1470 RowGroup operator+(const RowGroup& lhs, const RowGroup& rhs);
1471
1472 boost::shared_array<int> makeMapping(const RowGroup& r1, const RowGroup& r2);
1473 void applyMapping(const boost::shared_array<int>& mapping, const Row& in, Row* out);
1474 void applyMapping(const std::vector<int>& mapping, const Row& in, Row* out);
1475 void applyMapping(const int* mapping, const Row& in, Row* out);
1476
1477 /* PL 8/10/09: commented the asserts for now b/c for the fcns that are called
1478 every row, they're a measurable performance penalty */
getRowCount()1479 inline uint32_t RowGroup::getRowCount() const
1480 {
1481 // idbassert(data);
1482 // if (!data) throw std::logic_error("RowGroup::getRowCount(): data is NULL!");
1483 return *((uint32_t*) &data[rowCountOffset]);
1484 }
1485
incRowCount()1486 inline void RowGroup::incRowCount()
1487 {
1488 // idbassert(data);
1489 ++(*((uint32_t*) &data[rowCountOffset]));
1490 }
1491
setRowCount(uint32_t num)1492 inline void RowGroup::setRowCount(uint32_t num)
1493 {
1494 // idbassert(data);
1495 *((uint32_t*) &data[rowCountOffset]) = num;
1496 }
1497
getRow(uint32_t rowNum,Row * r)1498 inline void RowGroup::getRow(uint32_t rowNum, Row* r) const
1499 {
1500 // idbassert(data);
1501 if (useStringTable != r->usesStringTable())
1502 initRow(r);
1503
1504 r->baseRid = getBaseRid();
1505 r->data = &(data[headerSize + (rowNum * offsets[columnCount])]);
1506 r->strings = strings;
1507 r->userDataStore = rgData->userDataStore.get();
1508 }
1509
setData(uint8_t * d)1510 inline void RowGroup::setData(uint8_t* d)
1511 {
1512 data = d;
1513 strings = NULL;
1514 rgData = NULL;
1515 setUseStringTable(false);
1516 }
1517
setData(RGData * rgd)1518 inline void RowGroup::setData(RGData* rgd)
1519 {
1520 data = rgd->rowData.get();
1521 strings = rgd->strings.get();
1522 rgData = rgd;
1523 }
1524
getData()1525 inline uint8_t* RowGroup::getData() const
1526 {
1527 //assert(!useStringTable);
1528 return data;
1529 }
1530
getRGData()1531 inline RGData* RowGroup::getRGData() const
1532 {
1533 return rgData;
1534 }
1535
setUseStringTable(bool b)1536 inline void RowGroup::setUseStringTable(bool b)
1537 {
1538 useStringTable = (b && hasLongStringField);
1539 //offsets = (useStringTable ? &stOffsets[0] : &oldOffsets[0]);
1540 offsets = 0;
1541
1542 if (useStringTable && !stOffsets.empty())
1543 offsets = &stOffsets[0];
1544 else if (!useStringTable && !oldOffsets.empty())
1545 offsets = &oldOffsets[0];
1546
1547 if (!useStringTable)
1548 strings = NULL;
1549 }
1550
getBaseRid()1551 inline uint64_t RowGroup::getBaseRid() const
1552 {
1553 return *((uint64_t*) &data[baseRidOffset]);
1554 }
1555
1556 inline bool RowGroup::operator<(const RowGroup& rhs) const
1557 {
1558 return (getBaseRid() < rhs.getBaseRid());
1559 }
1560
initRow(Row * r,bool forceInlineData)1561 void RowGroup::initRow(Row* r, bool forceInlineData) const
1562 {
1563 r->columnCount = columnCount;
1564
1565 if (LIKELY(!types.empty()))
1566 {
1567 r->colWidths = (uint32_t*) &colWidths[0];
1568 r->types = (execplan::CalpontSystemCatalog::ColDataType*) & (types[0]);
1569 r->charsetNumbers = (uint32_t*) & (charsetNumbers[0]);
1570 r->charsets = (CHARSET_INFO**) & (charsets[0]);
1571 r->scale = (uint32_t*) & (scale[0]);
1572 r->precision = (uint32_t*) & (precision[0]);
1573 }
1574
1575 if (forceInlineData)
1576 {
1577 r->useStringTable = false;
1578 r->oldOffsets = (uint32_t*) & (oldOffsets[0]);
1579 r->stOffsets = (uint32_t*) & (stOffsets[0]);
1580 r->offsets = (uint32_t*) & (oldOffsets[0]);
1581 }
1582 else
1583 {
1584 r->useStringTable = useStringTable;
1585 r->oldOffsets = (uint32_t*) & (oldOffsets[0]);
1586 r->stOffsets = (uint32_t*) & (stOffsets[0]);
1587 r->offsets = offsets;
1588 }
1589
1590 r->hasLongStringField = hasLongStringField;
1591 r->sTableThreshold = sTableThreshold;
1592 r->forceInline = forceInline;
1593 r->hasCollation = hasCollation;
1594 }
1595
getRowSize()1596 inline uint32_t RowGroup::getRowSize() const
1597 {
1598 return offsets[columnCount];
1599 }
1600
getRowSizeWithStrings()1601 inline uint32_t RowGroup::getRowSizeWithStrings() const
1602 {
1603 return oldOffsets[columnCount];
1604 }
1605
getSizeWithStrings(uint64_t n)1606 inline uint64_t RowGroup::getSizeWithStrings(uint64_t n) const
1607 {
1608 if (strings == NULL)
1609 return getDataSize(n);
1610 else
1611 return getDataSize(n) + strings->getSize();
1612 }
1613
getSizeWithStrings()1614 inline uint64_t RowGroup::getSizeWithStrings() const
1615 {
1616 return getSizeWithStrings(getRowCount());
1617 }
1618
isCharType(uint32_t colIndex)1619 inline bool RowGroup::isCharType(uint32_t colIndex) const
1620 {
1621 return execplan::isCharType(types[colIndex]);
1622 }
1623
isUnsigned(uint32_t colIndex)1624 inline bool RowGroup::isUnsigned(uint32_t colIndex) const
1625 {
1626 return execplan::isUnsigned(types[colIndex]);
1627 }
1628
isShortString(uint32_t colIndex)1629 inline bool RowGroup::isShortString(uint32_t colIndex) const
1630 {
1631 return ((getColumnWidth(colIndex) <= 7 && types[colIndex] == execplan::CalpontSystemCatalog::VARCHAR) ||
1632 (getColumnWidth(colIndex) <= 8 && types[colIndex] == execplan::CalpontSystemCatalog::CHAR));
1633 }
1634
isLongString(uint32_t colIndex)1635 inline bool RowGroup::isLongString(uint32_t colIndex) const
1636 {
1637 return ((getColumnWidth(colIndex) > 7 && types[colIndex] == execplan::CalpontSystemCatalog::VARCHAR) ||
1638 (getColumnWidth(colIndex) > 8 && types[colIndex] == execplan::CalpontSystemCatalog::CHAR) ||
1639 types[colIndex] == execplan::CalpontSystemCatalog::VARBINARY ||
1640 types[colIndex] == execplan::CalpontSystemCatalog::BLOB ||
1641 types[colIndex] == execplan::CalpontSystemCatalog::TEXT);
1642 }
1643
usesStringTable()1644 inline bool RowGroup::usesStringTable() const
1645 {
1646 return useStringTable;
1647 }
1648
getOffsets()1649 inline const std::vector<uint32_t>& RowGroup::getOffsets() const
1650 {
1651 return oldOffsets;
1652 }
1653
getOIDs()1654 inline const std::vector<uint32_t>& RowGroup::getOIDs() const
1655 {
1656 return oids;
1657 }
1658
getKeys()1659 inline const std::vector<uint32_t>& RowGroup::getKeys() const
1660 {
1661 return keys;
1662 }
1663
getColType(uint32_t colIndex)1664 inline execplan::CalpontSystemCatalog::ColDataType RowGroup::getColType(uint32_t colIndex) const
1665 {
1666 return types[colIndex];
1667 }
1668
getColTypes()1669 inline const std::vector<execplan::CalpontSystemCatalog::ColDataType>& RowGroup::getColTypes() const
1670 {
1671 return types;
1672 }
1673
getColTypes()1674 inline std::vector<execplan::CalpontSystemCatalog::ColDataType>& RowGroup::getColTypes()
1675 {
1676 return types;
1677 }
1678
getCharsetNumbers()1679 inline const std::vector<uint32_t>& RowGroup::getCharsetNumbers() const
1680 {
1681 return charsetNumbers;
1682 }
1683
getCharsetNumber(uint32_t colIndex)1684 inline uint32_t RowGroup::getCharsetNumber(uint32_t colIndex) const
1685 {
1686 return charsetNumbers[colIndex];
1687 }
1688
getScale()1689 inline const std::vector<uint32_t>& RowGroup::getScale() const
1690 {
1691 return scale;
1692 }
1693
getPrecision()1694 inline const std::vector<uint32_t>& RowGroup::getPrecision() const
1695 {
1696 return precision;
1697 }
1698
getColWidths()1699 inline const std::vector<uint32_t>& RowGroup::getColWidths() const
1700 {
1701 return colWidths;
1702 }
1703
getForceInline()1704 inline boost::shared_array<bool>& RowGroup::getForceInline()
1705 {
1706 return forceInline;
1707 }
1708
convertToRid(const uint32_t & partitionNum,const uint16_t & segmentNum,const uint8_t & exNum,const uint16_t & blNum)1709 inline uint64_t convertToRid(const uint32_t& partitionNum,
1710 const uint16_t& segmentNum, const uint8_t& exNum, const uint16_t& blNum)
1711 {
1712 uint64_t partNum = partitionNum, segNum = segmentNum, extentNum = exNum,
1713 blockNum = blNum;
1714
1715 // extentNum gets trunc'd to 6 bits, blockNums to 10 bits
1716 extentNum &= 0x3f;
1717 blockNum &= 0x3ff;
1718
1719 return (partNum << 32) | (segNum << 16) | (extentNum << 10) | blockNum;
1720 }
1721
setBaseRid(const uint32_t & partNum,const uint16_t & segNum,const uint8_t & extentNum,const uint16_t & blockNum)1722 inline void RowGroup::setBaseRid(const uint32_t& partNum, const uint16_t& segNum,
1723 const uint8_t& extentNum, const uint16_t& blockNum)
1724 {
1725 *((uint64_t*) &data[baseRidOffset]) = convertToRid(partNum, segNum,
1726 extentNum, blockNum);
1727 }
1728
getStringTableThreshold()1729 inline uint32_t RowGroup::getStringTableThreshold() const
1730 {
1731 return sTableThreshold;
1732 }
1733
setStringStore(boost::shared_ptr<StringStore> ss)1734 inline void RowGroup::setStringStore(boost::shared_ptr<StringStore> ss)
1735 {
1736 if (useStringTable)
1737 {
1738 rgData->setStringStore(ss);
1739 strings = rgData->strings.get();
1740 }
1741 }
1742
getLocationFromRid(uint64_t rid,uint32_t * partNum,uint16_t * segNum,uint8_t * extentNum,uint16_t * blockNum)1743 inline void getLocationFromRid(uint64_t rid, uint32_t* partNum,
1744 uint16_t* segNum, uint8_t* extentNum, uint16_t* blockNum)
1745 {
1746 if (partNum) *partNum = rid >> 32;
1747
1748 if (segNum) *segNum = rid >> 16;
1749
1750 if (extentNum) *extentNum = (rid >> 10) & 0x3f;
1751
1752 if (blockNum) *blockNum = rid & 0x3ff;
1753 }
1754
getLocation(uint32_t * partNum,uint16_t * segNum,uint8_t * extentNum,uint16_t * blockNum)1755 inline void RowGroup::getLocation(uint32_t* partNum, uint16_t* segNum,
1756 uint8_t* extentNum, uint16_t* blockNum)
1757 {
1758 getLocationFromRid(getBaseRid(), partNum, segNum, extentNum, blockNum);
1759 }
1760
1761 // returns the first RID of the logical block identified by baseRid
getExtentRelativeRid(uint64_t baseRid)1762 inline uint64_t getExtentRelativeRid(uint64_t baseRid)
1763 {
1764 uint64_t blockNum = baseRid & 0x3ff;
1765 return (blockNum << 13);
1766 }
1767
getExtentRelativeRid()1768 inline uint64_t Row::getExtentRelativeRid() const
1769 {
1770 return rowgroup::getExtentRelativeRid(baseRid) | (getRelRid() & 0x1fff);
1771 }
1772
1773 // returns the first RID of the logical block identified by baseRid
getFileRelativeRid(uint64_t baseRid)1774 inline uint64_t getFileRelativeRid(uint64_t baseRid)
1775 {
1776 uint64_t extentNum = (baseRid >> 10) & 0x3f;
1777 uint64_t blockNum = baseRid & 0x3ff;
1778 return (extentNum << 23) | (blockNum << 13);
1779 }
1780
getFileRelativeRid()1781 inline uint64_t Row::getFileRelativeRid() const
1782 {
1783 return rowgroup::getFileRelativeRid(baseRid) | (getRelRid() & 0x1fff);
1784 }
1785
getLocation(uint32_t * partNum,uint16_t * segNum,uint8_t * extentNum,uint16_t * blockNum,uint16_t * rowNum)1786 inline void Row::getLocation(uint32_t* partNum, uint16_t* segNum, uint8_t* extentNum,
1787 uint16_t* blockNum, uint16_t* rowNum)
1788 {
1789 getLocationFromRid(baseRid, partNum, segNum, extentNum, blockNum);
1790
1791 if (rowNum) *rowNum = getRelRid();
1792 }
1793
copyRow(const Row & in,Row * out,uint32_t colCount)1794 inline void copyRow(const Row& in, Row* out, uint32_t colCount)
1795 {
1796 if (&in == out)
1797 return;
1798
1799 out->setRid(in.getRelRid());
1800
1801 if (!in.usesStringTable() && !out->usesStringTable())
1802 {
1803 memcpy(out->getData(), in.getData(), std::min(in.getOffset(colCount), out->getOffset(colCount)));
1804 return;
1805 }
1806
1807 for (uint32_t i = 0; i < colCount; i++)
1808 {
1809 if (UNLIKELY(in.getColTypes()[i] == execplan::CalpontSystemCatalog::VARBINARY ||
1810 in.getColTypes()[i] == execplan::CalpontSystemCatalog::BLOB ||
1811 in.getColTypes()[i] == execplan::CalpontSystemCatalog::TEXT ||
1812 in.getColTypes()[i] == execplan::CalpontSystemCatalog::CLOB))
1813 out->setVarBinaryField(in.getVarBinaryStringField(i), i);
1814 else if (UNLIKELY(in.isLongString(i)))
1815 //out->setStringField(in.getStringField(i), i);
1816 out->setStringField(in.getStringPointer(i), in.getStringLength(i), i);
1817 else if (UNLIKELY(in.isShortString(i)))
1818 out->setUintField(in.getUintField(i), i);
1819 else if (UNLIKELY(in.getColTypes()[i] == execplan::CalpontSystemCatalog::LONGDOUBLE))
1820 out->setLongDoubleField(in.getLongDoubleField(i), i);
1821 else
1822 out->setIntField(in.getIntField(i), i);
1823 }
1824 }
1825
copyRow(const Row & in,Row * out)1826 inline void copyRow(const Row& in, Row* out)
1827 {
1828 copyRow(in, out, std::min(in.getColumnCount(), out->getColumnCount()));
1829 }
1830
getString(uint64_t off)1831 inline std::string StringStore::getString(uint64_t off) const
1832 {
1833 uint32_t length;
1834
1835 if (off == std::numeric_limits<uint64_t>::max())
1836 return joblist::CPNULLSTRMARK;
1837
1838 MemChunk* mc;
1839
1840 if (off & 0x8000000000000000)
1841 {
1842 //off = off - 0x8000000000000000;
1843 off &= ~0x8000000000000000;
1844
1845 if (longStrings.size() <= off)
1846 return joblist::CPNULLSTRMARK;
1847
1848 mc = (MemChunk*) longStrings[off].get();
1849 memcpy(&length, mc->data, 4);
1850 return std::string((char*) mc->data + 4, length);
1851 }
1852
1853 uint64_t chunk = off / CHUNK_SIZE;
1854 uint64_t offset = off % CHUNK_SIZE;
1855
1856 // this has to handle uninitialized data as well. If it's uninitialized it doesn't matter
1857 // what gets returned, it just can't go out of bounds.
1858 if (mem.size() <= chunk)
1859 return joblist::CPNULLSTRMARK;
1860
1861 mc = (MemChunk*) mem[chunk].get();
1862
1863 memcpy(&length, &mc->data[offset], 4);
1864
1865 if ((offset + length) > mc->currentSize)
1866 return joblist::CPNULLSTRMARK;
1867
1868 return std::string((char*) & (mc->data[offset]) + 4, length);
1869 }
1870
getPointer(uint64_t off)1871 inline const uint8_t* StringStore::getPointer(uint64_t off) const
1872 {
1873 if (off == std::numeric_limits<uint64_t>::max())
1874 return (const uint8_t*) joblist::CPNULLSTRMARK.c_str();
1875
1876 uint64_t chunk = off / CHUNK_SIZE;
1877 uint64_t offset = off % CHUNK_SIZE;
1878 MemChunk* mc;
1879
1880 if (off & 0x8000000000000000)
1881 {
1882 //off = off - 0x8000000000000000;
1883 off &= ~0x8000000000000000;
1884
1885 if (longStrings.size() <= off)
1886 return (const uint8_t*) joblist::CPNULLSTRMARK.c_str();
1887
1888 mc = (MemChunk*) longStrings[off].get();
1889 return mc->data + 4;
1890 }
1891
1892 // this has to handle uninitialized data as well. If it's uninitialized it doesn't matter
1893 // what gets returned, it just can't go out of bounds.
1894 if (UNLIKELY(mem.size() <= chunk))
1895 return (const uint8_t*) joblist::CPNULLSTRMARK.c_str();
1896
1897 mc = (MemChunk*) mem[chunk].get();
1898
1899 if (offset > mc->currentSize)
1900 return (const uint8_t*) joblist::CPNULLSTRMARK.c_str();
1901
1902 return &(mc->data[offset]) + 4;
1903 }
1904
isNullValue(uint64_t off)1905 inline bool StringStore::isNullValue(uint64_t off) const
1906 {
1907 uint32_t length;
1908
1909 if (off == std::numeric_limits<uint64_t>::max())
1910 return true;
1911
1912 // Long strings won't be NULL
1913 if (off & 0x8000000000000000)
1914 return false;
1915
1916 uint32_t chunk = off / CHUNK_SIZE;
1917 uint32_t offset = off % CHUNK_SIZE;
1918 MemChunk* mc;
1919
1920 if (mem.size() <= chunk)
1921 return true;
1922
1923 mc = (MemChunk*) mem[chunk].get();
1924 memcpy(&length, &mc->data[offset], 4);
1925
1926 if (length == 0)
1927 return true;
1928
1929 if (length < 8)
1930 return false;
1931
1932 if ((offset + length) > mc->currentSize)
1933 return true;
1934
1935 if (mc->data[offset + 4] == 0) // "" = NULL string for some reason...
1936 return true;
1937 return (memcmp(&mc->data[offset+4], joblist::CPNULLSTRMARK.c_str(), 8) == 0);
1938 }
1939
getStringLength(uint64_t off)1940 inline uint32_t StringStore::getStringLength(uint64_t off) const
1941 {
1942 uint32_t length;
1943 MemChunk* mc;
1944
1945 if (off == std::numeric_limits<uint64_t>::max())
1946 return 0;
1947
1948 if (off & 0x8000000000000000)
1949 {
1950 //off = off - 0x8000000000000000;
1951 off &= ~0x8000000000000000;
1952
1953 if (longStrings.size() <= off)
1954 return 0;
1955
1956 mc = (MemChunk*) longStrings[off].get();
1957 memcpy(&length, mc->data, 4);
1958 }
1959 else
1960 {
1961 uint64_t chunk = off / CHUNK_SIZE;
1962 uint64_t offset = off % CHUNK_SIZE;
1963
1964 if (mem.size() <= chunk)
1965 return 0;
1966
1967 mc = (MemChunk*) mem[chunk].get();
1968 memcpy(&length, &mc->data[offset], 4);
1969 }
1970
1971 return length;
1972 }
1973
isEmpty()1974 inline bool StringStore::isEmpty() const
1975 {
1976 return empty;
1977 }
1978
getSize()1979 inline uint64_t StringStore::getSize() const
1980 {
1981 uint32_t i;
1982 uint64_t ret = 0;
1983 MemChunk* mc;
1984
1985 ret += sizeof(MemChunk) * mem.size();
1986 for (i = 0; i < mem.size(); i++)
1987 {
1988 mc = (MemChunk*) mem[i].get();
1989 ret += mc->capacity;
1990 }
1991
1992 ret += sizeof(MemChunk) * longStrings.size();
1993 for (i = 0; i < longStrings.size(); i++)
1994 {
1995 mc = (MemChunk*) longStrings[i].get();
1996 ret += mc->capacity;
1997 }
1998
1999 return ret;
2000 }
2001
2002 inline RGData& RGData::operator=(const RGData& r)
2003 {
2004 rowData = r.rowData;
2005 strings = r.strings;
2006 userDataStore = r.userDataStore;
2007 return *this;
2008 }
2009
getRow(uint32_t num,Row * row)2010 inline void RGData::getRow(uint32_t num, Row* row)
2011 {
2012 uint32_t size = row->getSize();
2013 row->setData(Row::Pointer(&rowData[RowGroup::getHeaderSize() + (num * size)], strings.get(), userDataStore.get()));
2014 }
2015
2016 }
2017
2018 #endif
2019 // vim:ts=4 sw=4:
2020