1 /* Copyright (C) 2014 InfiniDB, Inc.
2 
3    This program is free software; you can redistribute it and/or
4    modify it under the terms of the GNU General Public License
5    as published by the Free Software Foundation; version 2 of
6    the License.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software
15    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
16    MA 02110-1301, USA. */
17 
18 //  $Id: we_type.h 4737 2013-08-14 20:45:46Z bwilkinson $
19 
20 /** @file */
21 
22 
23 #ifndef _WE_TYPE_H_
24 #define _WE_TYPE_H_
25 
26 #undef EXPORT
27 #undef DELETE
28 #undef NO_ERROR
29 
30 #include <sys/types.h>
31 #include <string>
32 #include <vector>
33 #include <boost/any.hpp>
34 #include <cstring>
35 #include <stdexcept>
36 
37 #include "we_define.h"
38 #include "we_typeext.h"
39 #include "calpontsystemcatalog.h"
40 #include "IDBDataFile.h"
41 #include "IDBPolicy.h"
42 
43 #undef EXPORT
44 #undef DELETE
45 #undef NO_ERROR
46 
47 /** Namespace WriteEngine */
48 namespace WriteEngine
49 {
50 typedef idbdatafile::IDBDataFile IDBDataFile;
51 
52 /************************************************************************
53  * Type definitions
54  ************************************************************************/
55 typedef uint32_t        OID;            /** @brief Object ID */
56 typedef uint32_t        FID;            /** @brief File ID */
57 typedef uint64_t        RID;            /** @brief Row ID */
58 typedef uint32_t        TxnID;          /** @brief Transaction ID (New)*/
59 typedef uint32_t        HWM;            /** @brief high water mark */
60 
61 /************************************************************************
62  * Type enumerations
63  ************************************************************************/
64 enum DebugLevel                         /** @brief Debug level type */
65 {
66     DEBUG_0             = 0,            /** @brief No debug info */
67     DEBUG_1             = 1,            /** @brief Summary level debug */
68     DEBUG_2             = 2,            /** @brief Moderate debug */
69     DEBUG_3             = 3,            /** @brief Detail debug */
70 };
71 
72 // INFO2 only goes to log file unless '-i' cmd line arg is specified,
73 // in which case the msg will also get logged to the console.
74 // All other messages always get logged to the log file and the console.
75 enum MsgLevel                           /** @brief Message level */
76 {
77     MSGLVL_INFO1        = 0,            /** @brief Basic Information level*/
78     MSGLVL_INFO2        = 1,            /** @brief More Information level */
79     MSGLVL_WARNING      = 2,            /** @brief Warning level */
80     MSGLVL_ERROR        = 3,            /** @brief Error level */
81     MSGLVL_CRITICAL     = 4,            /** @brief Critical level */
82 };
83 
84 enum OpType                             /** @brief Operation type */
85 {
86     NOOP                = 0,            /** @brief No oper */
87     INSERT              = 1,            /** @brief Insert */
88     UPDATE              = 2,            /** @brief Update */
89     DELETE              = 4,            /** @brief Delete */
90     QUERY               = 8,            /** @brief Query */
91 };
92 
93 enum ColType                            /** @brief Column type enumeration*/
94 {
95 //      WR_BIT              = 1,            /** @brief Bit */
96     WR_BYTE             = 2,            /** @brief Byte */
97     WR_SHORT            = 3,            /** @brief Short */
98     WR_INT              = 4,            /** @brief Int */
99 //      WR_LONG             = 5,            /** @brief Long */
100     WR_LONGLONG         = 6,            /** @brief Long long*/
101     WR_FLOAT            = 7,            /** @brief Float */
102     WR_DOUBLE           = 8,            /** @brief Double */
103     WR_CHAR             = 9,            /** @brief Char */
104     WR_TOKEN            = 10,           /** @brief Token */
105     WR_BLOB             = 11,           /** @brief BLOB */
106     WR_VARBINARY        = 12,           /** @brief VARBINARY */
107     WR_UBYTE            = 13,           /** @brief Unsigned Byte */
108     WR_USHORT           = 14,           /** @brief Unsigned Short */
109     WR_UINT             = 15,           /** @brief Unsigned Int */
110     WR_ULONGLONG        = 16,           /** @brief Unsigned Long long*/
111     WR_TEXT             = 17,            /** @brief TEXT */
112     WR_MEDINT           = 18,           /** @brief Medium Int */
113     WR_UMEDINT          = 19            /** @brief Unsigned Medium Int */
114 };
115 
116 // Describes relation of field to column for a bulk load
117 enum BulkFldColRel { BULK_FLDCOL_COLUMN_FIELD,  // map input field to db col
118                      BULK_FLDCOL_COLUMN_DEFAULT,// import def val to db col
119                      BULK_FLDCOL_IGNORE_FIELD
120                    };// ignore fld in import file
121 
122 // Bulk Load Mode (ex: local vs remote, single src vs multiple src files)
123 enum BulkModeType  { BULK_MODE_REMOTE_SINGLE_SRC   = 1,
124                      BULK_MODE_REMOTE_MULTIPLE_SRC = 2,
125                      BULK_MODE_LOCAL               = 3
126                    };
127 
128 // Import Mode 0-text Import (default)
129 //             1-Binary Import with NULL values
130 //             2-Binary Import with saturated NULL values
131 enum ImportDataMode { IMPORT_DATA_TEXT            = 0,
132                       IMPORT_DATA_BIN_ACCEPT_NULL = 1,
133                       IMPORT_DATA_BIN_SAT_NULL    = 2
134                     };
135 
136 /**
137  * the set of Calpont column data type names; MUST match ColDataType in
138  * calpontsystemcatalog.h.
139  */
140 const char  ColDataTypeStr[execplan::CalpontSystemCatalog::NUM_OF_COL_DATA_TYPE][20] =
141 {
142     "bit",
143     "tinyint",
144     "char",
145     "smallint",
146     "decimal",
147     "medint",
148     "integer",
149     "float",
150     "date",
151     "bigint",
152     "double",
153     "datetime",
154     "varchar",
155     "varbinary",
156     "clob",
157     "blob",
158     "unsigned-tinyint",
159     "unsigned-smallint",
160     "unsigned-decimal",
161     "unsigned-med int",
162     "unsigned-int",
163     "unsigned-float",
164     "unsigned-bigint",
165     "unsigned-double",
166     "text",
167     "time",
168     "timestamp"
169 };
170 
171 enum FuncType { FUNC_WRITE_ENGINE, FUNC_INDEX, FUNC_DICTIONARY };
172 
173 enum CacheListType { FREE_LIST, LRU_LIST, WRITE_LIST }; /** @brief List type */
174 
175 /************************************************************************
176  * struct data block structure
177  ************************************************************************/
178 struct DataBlock                        /** @brief Data block structure */
179 {
180     long     no;                         /** @brief block number */
181     uint64_t lbid;                       /** @brief lbid */
182     bool     dirty;                      /** @brief block dirty flag */
183     int      state;                      /** @brief initialized 0, read 1 , modified 2 */
184     unsigned char  data[BYTE_PER_BLOCK];/** @brief data buffer */
DataBlockDataBlock185     DataBlock()
186     {
187         dirty = false;     /** @brief constructor */
188         memset( data, 0, BYTE_PER_BLOCK );
189     }
190 };
191 
192 struct DataSubBlock                     /** @brief Data subblock structure*/
193 {
194     long           no;                  /** @brief sub block number */
195     bool           dirty;               /** @brief block dirty flag */
196     unsigned char  data[BYTE_PER_SUBBLOCK]; /** @brief data buffer */
DataSubBlockDataSubBlock197     DataSubBlock()
198     {
199         dirty = false;    /** @brief constructor */
200         memset( data, 0, BYTE_PER_SUBBLOCK );
201     }
202 };
203 
204 /************************************************************************
205  * @brief file structure. Default copy constructor, assignment oper, etc
206  * are in play here, as they are not overridden.  Beware that if copies
207  * of a File object are made, only one user should be closing the pFile.
208  * oid and fid replicate one another.  oid mostly used by index, cache,
209  * and dictionary.  fid mostly used by colop and bulk.
210  ************************************************************************/
211 struct File                             /** @brief File structure */
212 {
213     OID            oid;                 /** @brief Oid */
214     FID            fid;                 /** @brief File id */
215     HWM            hwm;                 /** @brief High water mark */
216     IDBDataFile*   pFile;               /** @brief File handle */
217     uint32_t      fPartition;          /** @brief Partition for pFile*/
218     uint16_t      fSegment;            /** @brief Segment for pFile */
219     uint16_t      fDbRoot;             /** @brief DbRoot for pFile */
220     std::string    fSegFileName;        /** @brief Current seg file path */
FileFile221     File()
222     {
223         clear();    /** @brief constructor */
224     }
clearFile225     void clear()
226     {
227         pFile = NULL;
228         oid = fid = hwm = 0;
229         fPartition = fSegment = fDbRoot = 0;
230         fSegFileName.clear();
231     }
232 };
233 
234 /************************************************************************
235  * @brief Internal communication block structure
236  ************************************************************************/
237 struct CommBlock                        /** @brief Communication Block */
238 {
239     File           file;                /** @brief File structure */
clearCommBlock240     void clear()
241     {
242         file.clear();
243     }
244 };
245 
246 /************************************************************************
247  * @brief column structure used to pass data in/out of we_colop functions
248  ************************************************************************/
249 struct Column                           /** @brief Column structure */
250 {
251     int            colNo;               /** @brief column number */
252     int            colWidth;            /** @brief column width */
253     ColType        colType;             /** @brief column type (internal use)*/
254     execplan::CalpontSystemCatalog::ColDataType colDataType; /** @brief column data type (from interface)*/
255     File           dataFile;            /** @brief column data file */
256     int            compressionType;     /** @brief column compression type*/
ColumnColumn257     Column() : colNo(0), colWidth(0), colType(WR_INT),
258         colDataType(execplan::CalpontSystemCatalog::INT),
259         compressionType(idbdatafile::IDBPolicy::useHdfs() ? 2 : 0) { }
260 };
261 
262 /************************************************************************
263  * @brief dictionary related structures (Token struct is defined in
264  * we_typeext.h to facilitate its use in dbcon and utils/dataconvert).
265  ************************************************************************/
266 typedef struct offset_                  /** @brief Offset structure */
267 {
268     int hdrLoc;                         /** @brief offset postion in hdr */
269     uint16_t offset;                         /** @brief offset in block */
270 } Offset;
271 
272 /************************************************************************
273  * @brief interfaces with DDL/DML
274  ************************************************************************/
275 typedef struct colTuple_struct          /** @brief Column Tuple definition*/
276 {
277     boost::any     data;                /** @brief column value */
278 } ColTuple;
279 
280 typedef std::vector<ColTuple>  ColTupleList; /** @brief column value list */
281 
282 struct ColStruct                        /** @brief Column Interface Struct*/
283 {
284     OID            dataOid;             /** @brief column data file object id */
285     int            colWidth;            /** @brief column width */
286     bool           tokenFlag;           /** @brief column token flag, must be set to true if it is a token column */
287     execplan::CalpontSystemCatalog::ColDataType    colDataType;         /** @brief column data type (for interface)*/
288     ColType        colType;             /** @brief column type (internal use for write engine)*/
289     uint32_t      fColPartition;       /** @brief Partition for column file */
290     uint16_t      fColSegment;         /** @brief Segment for column file*/
291     uint16_t      fColDbRoot;          /** @brief DBRoot for column file */
292     int            fCompressionType;    /** @brief Compression tpye for column file */
ColStructColStruct293     ColStruct() : dataOid(0), colWidth(0),  /** @brief constructor */
294         tokenFlag(false), colDataType(execplan::CalpontSystemCatalog::INT), colType(WR_INT),
295         fColPartition(0), fColSegment(0), fColDbRoot(0),
296         fCompressionType(idbdatafile::IDBPolicy::useHdfs() ? 2 : 0) { }
297 };
298 
299 typedef std::vector<ColStruct>      ColStructList; /** @brief column struct list */
300 typedef std::vector<ColTupleList>   ColValueList;  /** @brief column value list */
301 typedef std::vector<RID>            RIDList;       /** @brief RID list */
302 
303 typedef std::vector<std::string> dictStr;
304 typedef std::vector<dictStr> DictStrList;
305 
306 // dictionary
307 struct DctnryStruct                     /** @brief Dctnry Interface Struct*/
308 {
309     OID            dctnryOid;           /** @brief dictionary signature file */
310     OID            columnOid;           /** @brief corresponding column file */
311     int            colWidth;            /** @brief string width for the dictionary column*/
312     uint32_t      fColPartition;       /** @brief Partition for column file */
313     uint16_t      fColSegment;         /** @brief Segment for column file */
314     uint16_t      fColDbRoot;          /** @brief DBRoot for column file */
315     int            fCompressionType;    /** @brief Compression tpye for column file */
DctnryStructDctnryStruct316     DctnryStruct() : dctnryOid(0), columnOid(0),   /** @brief constructor */
317         colWidth(0),
318         fColPartition(0), fColSegment(0),
319         fColDbRoot(0), fCompressionType(idbdatafile::IDBPolicy::useHdfs() ? 2 : 0) { }
320 };
321 
322 struct DctnryTuple                      /** @brief Dictionary Tuple struct*/
323 {
324     unsigned char*   sigValue;          /** @brief dictionary signature value*/
325     int             sigSize;            /** @brief dictionary signature size */
326     Token           token;              /** @brief dictionary token */
327     bool            isNull;
DctnryTupleDctnryTuple328     DctnryTuple()   { }
~DctnryTupleDctnryTuple329     ~DctnryTuple()  { }
330 };
331 
332 typedef std::vector<DctnryTuple> DctColTupleList;
333 typedef std::vector<DctnryStruct>  DctnryStructList; /** @brief column struct list */
334 typedef std::vector<DctColTupleList>   DctnryValueList;  /** @brief column value list */
335 
336 /************************************************************************
337  * @brief Used by Bulk Load to describe a column
338  ************************************************************************/
339 struct JobColumn                        /** @brief Job Column Structure */
340 {
341     std::string    colName;             /** @brief column name */
342     OID            mapOid;              /** @brief column OID */
343     execplan::CalpontSystemCatalog::ColDataType    dataType;            /** @brief column data type */
344     ColType        weType;              /** @brief write engine data type */
345     std::string    typeName;            /** @brief data type name */
346     uint64_t       emptyVal;            /** @brief default empty value */
347     int            width;               /** @brief column width; for a dictionary column, this is "eventually" the token width */
348     int            definedWidth;        /** @brief column width as defined in the table, used for non-dictionary strings */
349     int            dctnryWidth;         /** @brief dictionary width */
350     int            precision;           /** @brief precision of decimal */
351     int            scale;               /** @brief scale of decimal */
352     bool           fNotNull;            /** @brief not null flag */
353     BulkFldColRel  fFldColRelation;     /** @brief type of field/col relation*/
354     char           colType;             /** @brief column type, blank is regular, D is dictionary */
355     int            compressionType;     /** @brief compression type */
356     bool           autoIncFlag;         /** @brief auto increment flag */
357     DctnryStruct   dctnry;              /** @brief dictionary structure */
358     int64_t        fMinIntSat;          /** @brief For integer type, the min saturation value */
359     uint64_t       fMaxIntSat;          /** @brief For integer type, the max saturation value */
360     double         fMinDblSat;          /** @brief for float/double, the min saturation value */
361     double         fMaxDblSat;          /** @brief for float/double, the max saturation value */
362     bool           fWithDefault;        /** @brief With default */
363     long long      fDefaultInt;         /** @brief Integer column default */
364     unsigned long long fDefaultUInt;    /** @brief UnsignedInt col default*/
365     double         fDefaultDbl;         /** @brief Dbl/Flt column default */
366     std::string    fDefaultChr;         /** @brief Char column default */
JobColumnJobColumn367     JobColumn() : mapOid(0), dataType(execplan::CalpontSystemCatalog::INT), weType(WR_INT),
368         typeName("integer"), emptyVal(0),
369         width(0), definedWidth(0), dctnryWidth(0),
370         precision(0), scale(0), fNotNull(false),
371         fFldColRelation(BULK_FLDCOL_COLUMN_FIELD), colType(' '),
372         compressionType(0), autoIncFlag(false),
373         fMinIntSat(0), fMaxIntSat(0),
374         fMinDblSat(0), fMaxDblSat(0), fWithDefault(false),
375         fDefaultInt(0), fDefaultUInt(0), fDefaultDbl(0.0)
376     { }
377 };
378 
379 typedef std::vector<JobColumn>  JobColList; /** @brief column value list */
380 
381 struct JobFieldRef                   // references field/column in JobTable
382 {
383     BulkFldColRel  fFldColType;      // type of field or column
384     unsigned       fArrayIndex;      // index into colList or fIgnoredFields
385     //   in JobTable based on fFldColType.
JobFieldRefJobFieldRef386     JobFieldRef( ) : fFldColType(BULK_FLDCOL_COLUMN_FIELD), fArrayIndex(0) { }
JobFieldRefJobFieldRef387     JobFieldRef( BulkFldColRel fldColType, unsigned idx ) :
388         fFldColType( fldColType ), fArrayIndex( idx ) { }
389 };
390 typedef std::vector<JobFieldRef>  JobFieldRefList;
391 
392 struct JobTable                         /** @brief Job Table Structure */
393 {
394     std::string    tblName;             /** @brief table name */
395     OID            mapOid;              /** @brief table OID */
396     std::string    loadFileName;        /** @brief table load file name */
397     uint64_t       maxErrNum;           /** @brief max number of error rows before abort */
398     JobColList     colList;             /** @brief list of columns to be loaded; followed by default columns to be loaded */
399     JobColList     fIgnoredFields;      /** @brief list of fields in input file to be ignored */
400     JobFieldRefList fFldRefs;           /** @brief Combined list of refs to entries in colList and fIgnoredFields */
JobTableJobTable401     JobTable() : mapOid(0), maxErrNum(0) { }
402 };
403 
404 typedef std::vector<JobTable>  JobTableList;/** @brief table list */
405 
406 struct Job                              /** @brief Job Structure */
407 {
408     int            id;                  /** @brief job id */
409     std::string    schema;              /** @brief database name */
410     std::string    name;                /** @brief job name */
411     std::string    desc;                /** @brief job description */
412     std::string    userName;            /** @brief user name */
413     JobTableList   jobTableList;        /** @brief job table list */
414 
415     std::string    createDate;          /** @brief job create date */
416     std::string    createTime;          /** @brief job create time */
417 
418     char           fDelimiter;
419     char           fEnclosedByChar;
420     char           fEscapeChar;
421     int            numberOfReadBuffers;
422     unsigned       readBufferSize;
423     unsigned       writeBufferSize;
JobJob424     Job() : id(0), fDelimiter('|'),
425         fEnclosedByChar('\0'), fEscapeChar('\0'),
426         numberOfReadBuffers(0), readBufferSize(0), writeBufferSize(0) { }
427 };
428 
429 /************************************************************************
430  * @brief Cache memory
431  ************************************************************************/
432 struct CacheBlock                       /** @brief Cache block structure */
433 {
434     uint64_t       fbo;                 /** @brief file fbo */
435     uint64_t       lbid;                /** @brief lbid */
436     bool           dirty;               /** @brief dirty flag */
437     int            hitCount;            /** @brief hit count */
438     unsigned char* data;                /** @brief block buffer */
CacheBlockCacheBlock439     CacheBlock()
440     {
441         data = NULL;    /** @brief constructor */
442         clear();
443     }
clearCacheBlock444     void clear()
445     {
446         fbo = lbid = hitCount = 0;
447         dirty = false;
448 
449         if ( data ) memset( data, 0, BYTE_PER_BLOCK);
450     }   /** @brief clear, NOTE: buf must be free by caller first */
initCacheBlock451     void init()
452     {
453         data = (unsigned char*)malloc(BYTE_PER_BLOCK);
454     }
freeMemCacheBlock455     void freeMem()
456     {
457         if ( data ) free( data );
458     }
459 };
460 
461 struct BlockBuffer                      /** @brief Block buffer */
462 {
463     CommBlock      cb;                  /** @brief Communication block structure */
464     CacheBlock     block;               /** @brief Cache block strucutre */
465     CacheListType  listType;            /** @brief List number, 0 - free, 1 - LRU, 2 - write */
BlockBufferBlockBuffer466     BlockBuffer()
467     {
468         clear();    /** @brief constructor */
469     }
initBlockBuffer470     void init()
471     {
472         block.init();
473     }
freeMemBlockBuffer474     void freeMem()
475     {
476         block.freeMem();
477     }
clearBlockBuffer478     void clear()
479     {
480         cb.clear();
481         block.clear();
482         listType = FREE_LIST;
483     }
484 };
485 
486 struct CacheControl                     /** @brief Cache control structure */
487 {
488     int            totalBlock;          /** @brief The toal number of allocated blocks */
489     int            pctFree;             /** @brief The percentage of free blocks when some blocks must be aged out */
490     int            checkInterval;       /** @brief A check point interval in seconds */
CacheControlCacheControl491     CacheControl()
492     {
493         totalBlock = pctFree = checkInterval = 0;    /** @brief constructor */
494     }
495 };
496 
497 /************************************************************************
498  * @brief Bulk parse meta data describing data in a read buffer.
499  * An offset of COLPOSPAIR_NULL_TOKEN_OFFSET represents a null token.
500  ************************************************************************/
501 struct ColPosPair            /** @brief Column position pair structure */
502 {
503     int               start;  /** @brief start position */
504     int               offset; /** @brief length of token*/
505 };
506 
507 /************************************************************************
508  * @brief SecondaryShutdown used to terminate a thread when it sees that the
509  * JobStatus flag has been set to EXIT_FAILURE (by another thread).
510  ************************************************************************/
511 class SecondaryShutdownException : public std::runtime_error
512 {
513 public:
SecondaryShutdownException(const std::string & msg)514     SecondaryShutdownException(const std::string& msg) :
515         std::runtime_error(msg) { }
516 };
517 
518 /************************************************************************
519  * @brief Generic exception class used to store exception string and error
520  * code for a writeengine error.
521  ************************************************************************/
522 class WeException : public std::runtime_error
523 {
524 public:
525     WeException(const std::string& msg, int err = 0) :
runtime_error(msg)526         std::runtime_error(msg), fErrorCode(err) { }
errorCode(int code)527     void errorCode(int code)
528     {
529         fErrorCode = code;
530     }
errorCode()531     int  errorCode() const
532     {
533         return fErrorCode;
534     }
535 private:
536     int fErrorCode;
537 };
538 
539 } //end of namespace
540 
541 #endif // _WE_TYPE_H_
542