1 /* Copyright (C) 2014 InfiniDB, Inc. 2 3 This program is free software; you can redistribute it and/or 4 modify it under the terms of the GNU General Public License 5 as published by the Free Software Foundation; version 2 of 6 the License. 7 8 This program is distributed in the hope that it will be useful, 9 but WITHOUT ANY WARRANTY; without even the implied warranty of 10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 GNU General Public License for more details. 12 13 You should have received a copy of the GNU General Public License 14 along with this program; if not, write to the Free Software 15 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 16 MA 02110-1301, USA. */ 17 18 // $Id: we_type.h 4737 2013-08-14 20:45:46Z bwilkinson $ 19 20 /** @file */ 21 22 23 #ifndef _WE_TYPE_H_ 24 #define _WE_TYPE_H_ 25 26 #undef EXPORT 27 #undef DELETE 28 #undef NO_ERROR 29 30 #include <sys/types.h> 31 #include <string> 32 #include <vector> 33 #include <boost/any.hpp> 34 #include <cstring> 35 #include <stdexcept> 36 37 #include "we_define.h" 38 #include "we_typeext.h" 39 #include "calpontsystemcatalog.h" 40 #include "IDBDataFile.h" 41 #include "IDBPolicy.h" 42 43 #undef EXPORT 44 #undef DELETE 45 #undef NO_ERROR 46 47 /** Namespace WriteEngine */ 48 namespace WriteEngine 49 { 50 typedef idbdatafile::IDBDataFile IDBDataFile; 51 52 /************************************************************************ 53 * Type definitions 54 ************************************************************************/ 55 typedef uint32_t OID; /** @brief Object ID */ 56 typedef uint32_t FID; /** @brief File ID */ 57 typedef uint64_t RID; /** @brief Row ID */ 58 typedef uint32_t TxnID; /** @brief Transaction ID (New)*/ 59 typedef uint32_t HWM; /** @brief high water mark */ 60 61 /************************************************************************ 62 * Type enumerations 63 ************************************************************************/ 64 enum DebugLevel /** @brief Debug level type */ 65 { 66 DEBUG_0 = 0, /** @brief No debug info */ 67 DEBUG_1 = 1, /** @brief Summary level debug */ 68 DEBUG_2 = 2, /** @brief Moderate debug */ 69 DEBUG_3 = 3, /** @brief Detail debug */ 70 }; 71 72 // INFO2 only goes to log file unless '-i' cmd line arg is specified, 73 // in which case the msg will also get logged to the console. 74 // All other messages always get logged to the log file and the console. 75 enum MsgLevel /** @brief Message level */ 76 { 77 MSGLVL_INFO1 = 0, /** @brief Basic Information level*/ 78 MSGLVL_INFO2 = 1, /** @brief More Information level */ 79 MSGLVL_WARNING = 2, /** @brief Warning level */ 80 MSGLVL_ERROR = 3, /** @brief Error level */ 81 MSGLVL_CRITICAL = 4, /** @brief Critical level */ 82 }; 83 84 enum OpType /** @brief Operation type */ 85 { 86 NOOP = 0, /** @brief No oper */ 87 INSERT = 1, /** @brief Insert */ 88 UPDATE = 2, /** @brief Update */ 89 DELETE = 4, /** @brief Delete */ 90 QUERY = 8, /** @brief Query */ 91 }; 92 93 enum ColType /** @brief Column type enumeration*/ 94 { 95 // WR_BIT = 1, /** @brief Bit */ 96 WR_BYTE = 2, /** @brief Byte */ 97 WR_SHORT = 3, /** @brief Short */ 98 WR_INT = 4, /** @brief Int */ 99 // WR_LONG = 5, /** @brief Long */ 100 WR_LONGLONG = 6, /** @brief Long long*/ 101 WR_FLOAT = 7, /** @brief Float */ 102 WR_DOUBLE = 8, /** @brief Double */ 103 WR_CHAR = 9, /** @brief Char */ 104 WR_TOKEN = 10, /** @brief Token */ 105 WR_BLOB = 11, /** @brief BLOB */ 106 WR_VARBINARY = 12, /** @brief VARBINARY */ 107 WR_UBYTE = 13, /** @brief Unsigned Byte */ 108 WR_USHORT = 14, /** @brief Unsigned Short */ 109 WR_UINT = 15, /** @brief Unsigned Int */ 110 WR_ULONGLONG = 16, /** @brief Unsigned Long long*/ 111 WR_TEXT = 17, /** @brief TEXT */ 112 WR_MEDINT = 18, /** @brief Medium Int */ 113 WR_UMEDINT = 19 /** @brief Unsigned Medium Int */ 114 }; 115 116 // Describes relation of field to column for a bulk load 117 enum BulkFldColRel { BULK_FLDCOL_COLUMN_FIELD, // map input field to db col 118 BULK_FLDCOL_COLUMN_DEFAULT,// import def val to db col 119 BULK_FLDCOL_IGNORE_FIELD 120 };// ignore fld in import file 121 122 // Bulk Load Mode (ex: local vs remote, single src vs multiple src files) 123 enum BulkModeType { BULK_MODE_REMOTE_SINGLE_SRC = 1, 124 BULK_MODE_REMOTE_MULTIPLE_SRC = 2, 125 BULK_MODE_LOCAL = 3 126 }; 127 128 // Import Mode 0-text Import (default) 129 // 1-Binary Import with NULL values 130 // 2-Binary Import with saturated NULL values 131 enum ImportDataMode { IMPORT_DATA_TEXT = 0, 132 IMPORT_DATA_BIN_ACCEPT_NULL = 1, 133 IMPORT_DATA_BIN_SAT_NULL = 2 134 }; 135 136 /** 137 * the set of Calpont column data type names; MUST match ColDataType in 138 * calpontsystemcatalog.h. 139 */ 140 const char ColDataTypeStr[execplan::CalpontSystemCatalog::NUM_OF_COL_DATA_TYPE][20] = 141 { 142 "bit", 143 "tinyint", 144 "char", 145 "smallint", 146 "decimal", 147 "medint", 148 "integer", 149 "float", 150 "date", 151 "bigint", 152 "double", 153 "datetime", 154 "varchar", 155 "varbinary", 156 "clob", 157 "blob", 158 "unsigned-tinyint", 159 "unsigned-smallint", 160 "unsigned-decimal", 161 "unsigned-med int", 162 "unsigned-int", 163 "unsigned-float", 164 "unsigned-bigint", 165 "unsigned-double", 166 "text", 167 "time", 168 "timestamp" 169 }; 170 171 enum FuncType { FUNC_WRITE_ENGINE, FUNC_INDEX, FUNC_DICTIONARY }; 172 173 enum CacheListType { FREE_LIST, LRU_LIST, WRITE_LIST }; /** @brief List type */ 174 175 /************************************************************************ 176 * struct data block structure 177 ************************************************************************/ 178 struct DataBlock /** @brief Data block structure */ 179 { 180 long no; /** @brief block number */ 181 uint64_t lbid; /** @brief lbid */ 182 bool dirty; /** @brief block dirty flag */ 183 int state; /** @brief initialized 0, read 1 , modified 2 */ 184 unsigned char data[BYTE_PER_BLOCK];/** @brief data buffer */ DataBlockDataBlock185 DataBlock() 186 { 187 dirty = false; /** @brief constructor */ 188 memset( data, 0, BYTE_PER_BLOCK ); 189 } 190 }; 191 192 struct DataSubBlock /** @brief Data subblock structure*/ 193 { 194 long no; /** @brief sub block number */ 195 bool dirty; /** @brief block dirty flag */ 196 unsigned char data[BYTE_PER_SUBBLOCK]; /** @brief data buffer */ DataSubBlockDataSubBlock197 DataSubBlock() 198 { 199 dirty = false; /** @brief constructor */ 200 memset( data, 0, BYTE_PER_SUBBLOCK ); 201 } 202 }; 203 204 /************************************************************************ 205 * @brief file structure. Default copy constructor, assignment oper, etc 206 * are in play here, as they are not overridden. Beware that if copies 207 * of a File object are made, only one user should be closing the pFile. 208 * oid and fid replicate one another. oid mostly used by index, cache, 209 * and dictionary. fid mostly used by colop and bulk. 210 ************************************************************************/ 211 struct File /** @brief File structure */ 212 { 213 OID oid; /** @brief Oid */ 214 FID fid; /** @brief File id */ 215 HWM hwm; /** @brief High water mark */ 216 IDBDataFile* pFile; /** @brief File handle */ 217 uint32_t fPartition; /** @brief Partition for pFile*/ 218 uint16_t fSegment; /** @brief Segment for pFile */ 219 uint16_t fDbRoot; /** @brief DbRoot for pFile */ 220 std::string fSegFileName; /** @brief Current seg file path */ FileFile221 File() 222 { 223 clear(); /** @brief constructor */ 224 } clearFile225 void clear() 226 { 227 pFile = NULL; 228 oid = fid = hwm = 0; 229 fPartition = fSegment = fDbRoot = 0; 230 fSegFileName.clear(); 231 } 232 }; 233 234 /************************************************************************ 235 * @brief Internal communication block structure 236 ************************************************************************/ 237 struct CommBlock /** @brief Communication Block */ 238 { 239 File file; /** @brief File structure */ clearCommBlock240 void clear() 241 { 242 file.clear(); 243 } 244 }; 245 246 /************************************************************************ 247 * @brief column structure used to pass data in/out of we_colop functions 248 ************************************************************************/ 249 struct Column /** @brief Column structure */ 250 { 251 int colNo; /** @brief column number */ 252 int colWidth; /** @brief column width */ 253 ColType colType; /** @brief column type (internal use)*/ 254 execplan::CalpontSystemCatalog::ColDataType colDataType; /** @brief column data type (from interface)*/ 255 File dataFile; /** @brief column data file */ 256 int compressionType; /** @brief column compression type*/ ColumnColumn257 Column() : colNo(0), colWidth(0), colType(WR_INT), 258 colDataType(execplan::CalpontSystemCatalog::INT), 259 compressionType(idbdatafile::IDBPolicy::useHdfs() ? 2 : 0) { } 260 }; 261 262 /************************************************************************ 263 * @brief dictionary related structures (Token struct is defined in 264 * we_typeext.h to facilitate its use in dbcon and utils/dataconvert). 265 ************************************************************************/ 266 typedef struct offset_ /** @brief Offset structure */ 267 { 268 int hdrLoc; /** @brief offset postion in hdr */ 269 uint16_t offset; /** @brief offset in block */ 270 } Offset; 271 272 /************************************************************************ 273 * @brief interfaces with DDL/DML 274 ************************************************************************/ 275 typedef struct colTuple_struct /** @brief Column Tuple definition*/ 276 { 277 boost::any data; /** @brief column value */ 278 } ColTuple; 279 280 typedef std::vector<ColTuple> ColTupleList; /** @brief column value list */ 281 282 struct ColStruct /** @brief Column Interface Struct*/ 283 { 284 OID dataOid; /** @brief column data file object id */ 285 int colWidth; /** @brief column width */ 286 bool tokenFlag; /** @brief column token flag, must be set to true if it is a token column */ 287 execplan::CalpontSystemCatalog::ColDataType colDataType; /** @brief column data type (for interface)*/ 288 ColType colType; /** @brief column type (internal use for write engine)*/ 289 uint32_t fColPartition; /** @brief Partition for column file */ 290 uint16_t fColSegment; /** @brief Segment for column file*/ 291 uint16_t fColDbRoot; /** @brief DBRoot for column file */ 292 int fCompressionType; /** @brief Compression tpye for column file */ ColStructColStruct293 ColStruct() : dataOid(0), colWidth(0), /** @brief constructor */ 294 tokenFlag(false), colDataType(execplan::CalpontSystemCatalog::INT), colType(WR_INT), 295 fColPartition(0), fColSegment(0), fColDbRoot(0), 296 fCompressionType(idbdatafile::IDBPolicy::useHdfs() ? 2 : 0) { } 297 }; 298 299 typedef std::vector<ColStruct> ColStructList; /** @brief column struct list */ 300 typedef std::vector<ColTupleList> ColValueList; /** @brief column value list */ 301 typedef std::vector<RID> RIDList; /** @brief RID list */ 302 303 typedef std::vector<std::string> dictStr; 304 typedef std::vector<dictStr> DictStrList; 305 306 // dictionary 307 struct DctnryStruct /** @brief Dctnry Interface Struct*/ 308 { 309 OID dctnryOid; /** @brief dictionary signature file */ 310 OID columnOid; /** @brief corresponding column file */ 311 int colWidth; /** @brief string width for the dictionary column*/ 312 uint32_t fColPartition; /** @brief Partition for column file */ 313 uint16_t fColSegment; /** @brief Segment for column file */ 314 uint16_t fColDbRoot; /** @brief DBRoot for column file */ 315 int fCompressionType; /** @brief Compression tpye for column file */ DctnryStructDctnryStruct316 DctnryStruct() : dctnryOid(0), columnOid(0), /** @brief constructor */ 317 colWidth(0), 318 fColPartition(0), fColSegment(0), 319 fColDbRoot(0), fCompressionType(idbdatafile::IDBPolicy::useHdfs() ? 2 : 0) { } 320 }; 321 322 struct DctnryTuple /** @brief Dictionary Tuple struct*/ 323 { 324 unsigned char* sigValue; /** @brief dictionary signature value*/ 325 int sigSize; /** @brief dictionary signature size */ 326 Token token; /** @brief dictionary token */ 327 bool isNull; DctnryTupleDctnryTuple328 DctnryTuple() { } ~DctnryTupleDctnryTuple329 ~DctnryTuple() { } 330 }; 331 332 typedef std::vector<DctnryTuple> DctColTupleList; 333 typedef std::vector<DctnryStruct> DctnryStructList; /** @brief column struct list */ 334 typedef std::vector<DctColTupleList> DctnryValueList; /** @brief column value list */ 335 336 /************************************************************************ 337 * @brief Used by Bulk Load to describe a column 338 ************************************************************************/ 339 struct JobColumn /** @brief Job Column Structure */ 340 { 341 std::string colName; /** @brief column name */ 342 OID mapOid; /** @brief column OID */ 343 execplan::CalpontSystemCatalog::ColDataType dataType; /** @brief column data type */ 344 ColType weType; /** @brief write engine data type */ 345 std::string typeName; /** @brief data type name */ 346 uint64_t emptyVal; /** @brief default empty value */ 347 int width; /** @brief column width; for a dictionary column, this is "eventually" the token width */ 348 int definedWidth; /** @brief column width as defined in the table, used for non-dictionary strings */ 349 int dctnryWidth; /** @brief dictionary width */ 350 int precision; /** @brief precision of decimal */ 351 int scale; /** @brief scale of decimal */ 352 bool fNotNull; /** @brief not null flag */ 353 BulkFldColRel fFldColRelation; /** @brief type of field/col relation*/ 354 char colType; /** @brief column type, blank is regular, D is dictionary */ 355 int compressionType; /** @brief compression type */ 356 bool autoIncFlag; /** @brief auto increment flag */ 357 DctnryStruct dctnry; /** @brief dictionary structure */ 358 int64_t fMinIntSat; /** @brief For integer type, the min saturation value */ 359 uint64_t fMaxIntSat; /** @brief For integer type, the max saturation value */ 360 double fMinDblSat; /** @brief for float/double, the min saturation value */ 361 double fMaxDblSat; /** @brief for float/double, the max saturation value */ 362 bool fWithDefault; /** @brief With default */ 363 long long fDefaultInt; /** @brief Integer column default */ 364 unsigned long long fDefaultUInt; /** @brief UnsignedInt col default*/ 365 double fDefaultDbl; /** @brief Dbl/Flt column default */ 366 std::string fDefaultChr; /** @brief Char column default */ JobColumnJobColumn367 JobColumn() : mapOid(0), dataType(execplan::CalpontSystemCatalog::INT), weType(WR_INT), 368 typeName("integer"), emptyVal(0), 369 width(0), definedWidth(0), dctnryWidth(0), 370 precision(0), scale(0), fNotNull(false), 371 fFldColRelation(BULK_FLDCOL_COLUMN_FIELD), colType(' '), 372 compressionType(0), autoIncFlag(false), 373 fMinIntSat(0), fMaxIntSat(0), 374 fMinDblSat(0), fMaxDblSat(0), fWithDefault(false), 375 fDefaultInt(0), fDefaultUInt(0), fDefaultDbl(0.0) 376 { } 377 }; 378 379 typedef std::vector<JobColumn> JobColList; /** @brief column value list */ 380 381 struct JobFieldRef // references field/column in JobTable 382 { 383 BulkFldColRel fFldColType; // type of field or column 384 unsigned fArrayIndex; // index into colList or fIgnoredFields 385 // in JobTable based on fFldColType. JobFieldRefJobFieldRef386 JobFieldRef( ) : fFldColType(BULK_FLDCOL_COLUMN_FIELD), fArrayIndex(0) { } JobFieldRefJobFieldRef387 JobFieldRef( BulkFldColRel fldColType, unsigned idx ) : 388 fFldColType( fldColType ), fArrayIndex( idx ) { } 389 }; 390 typedef std::vector<JobFieldRef> JobFieldRefList; 391 392 struct JobTable /** @brief Job Table Structure */ 393 { 394 std::string tblName; /** @brief table name */ 395 OID mapOid; /** @brief table OID */ 396 std::string loadFileName; /** @brief table load file name */ 397 uint64_t maxErrNum; /** @brief max number of error rows before abort */ 398 JobColList colList; /** @brief list of columns to be loaded; followed by default columns to be loaded */ 399 JobColList fIgnoredFields; /** @brief list of fields in input file to be ignored */ 400 JobFieldRefList fFldRefs; /** @brief Combined list of refs to entries in colList and fIgnoredFields */ JobTableJobTable401 JobTable() : mapOid(0), maxErrNum(0) { } 402 }; 403 404 typedef std::vector<JobTable> JobTableList;/** @brief table list */ 405 406 struct Job /** @brief Job Structure */ 407 { 408 int id; /** @brief job id */ 409 std::string schema; /** @brief database name */ 410 std::string name; /** @brief job name */ 411 std::string desc; /** @brief job description */ 412 std::string userName; /** @brief user name */ 413 JobTableList jobTableList; /** @brief job table list */ 414 415 std::string createDate; /** @brief job create date */ 416 std::string createTime; /** @brief job create time */ 417 418 char fDelimiter; 419 char fEnclosedByChar; 420 char fEscapeChar; 421 int numberOfReadBuffers; 422 unsigned readBufferSize; 423 unsigned writeBufferSize; JobJob424 Job() : id(0), fDelimiter('|'), 425 fEnclosedByChar('\0'), fEscapeChar('\0'), 426 numberOfReadBuffers(0), readBufferSize(0), writeBufferSize(0) { } 427 }; 428 429 /************************************************************************ 430 * @brief Cache memory 431 ************************************************************************/ 432 struct CacheBlock /** @brief Cache block structure */ 433 { 434 uint64_t fbo; /** @brief file fbo */ 435 uint64_t lbid; /** @brief lbid */ 436 bool dirty; /** @brief dirty flag */ 437 int hitCount; /** @brief hit count */ 438 unsigned char* data; /** @brief block buffer */ CacheBlockCacheBlock439 CacheBlock() 440 { 441 data = NULL; /** @brief constructor */ 442 clear(); 443 } clearCacheBlock444 void clear() 445 { 446 fbo = lbid = hitCount = 0; 447 dirty = false; 448 449 if ( data ) memset( data, 0, BYTE_PER_BLOCK); 450 } /** @brief clear, NOTE: buf must be free by caller first */ initCacheBlock451 void init() 452 { 453 data = (unsigned char*)malloc(BYTE_PER_BLOCK); 454 } freeMemCacheBlock455 void freeMem() 456 { 457 if ( data ) free( data ); 458 } 459 }; 460 461 struct BlockBuffer /** @brief Block buffer */ 462 { 463 CommBlock cb; /** @brief Communication block structure */ 464 CacheBlock block; /** @brief Cache block strucutre */ 465 CacheListType listType; /** @brief List number, 0 - free, 1 - LRU, 2 - write */ BlockBufferBlockBuffer466 BlockBuffer() 467 { 468 clear(); /** @brief constructor */ 469 } initBlockBuffer470 void init() 471 { 472 block.init(); 473 } freeMemBlockBuffer474 void freeMem() 475 { 476 block.freeMem(); 477 } clearBlockBuffer478 void clear() 479 { 480 cb.clear(); 481 block.clear(); 482 listType = FREE_LIST; 483 } 484 }; 485 486 struct CacheControl /** @brief Cache control structure */ 487 { 488 int totalBlock; /** @brief The toal number of allocated blocks */ 489 int pctFree; /** @brief The percentage of free blocks when some blocks must be aged out */ 490 int checkInterval; /** @brief A check point interval in seconds */ CacheControlCacheControl491 CacheControl() 492 { 493 totalBlock = pctFree = checkInterval = 0; /** @brief constructor */ 494 } 495 }; 496 497 /************************************************************************ 498 * @brief Bulk parse meta data describing data in a read buffer. 499 * An offset of COLPOSPAIR_NULL_TOKEN_OFFSET represents a null token. 500 ************************************************************************/ 501 struct ColPosPair /** @brief Column position pair structure */ 502 { 503 int start; /** @brief start position */ 504 int offset; /** @brief length of token*/ 505 }; 506 507 /************************************************************************ 508 * @brief SecondaryShutdown used to terminate a thread when it sees that the 509 * JobStatus flag has been set to EXIT_FAILURE (by another thread). 510 ************************************************************************/ 511 class SecondaryShutdownException : public std::runtime_error 512 { 513 public: SecondaryShutdownException(const std::string & msg)514 SecondaryShutdownException(const std::string& msg) : 515 std::runtime_error(msg) { } 516 }; 517 518 /************************************************************************ 519 * @brief Generic exception class used to store exception string and error 520 * code for a writeengine error. 521 ************************************************************************/ 522 class WeException : public std::runtime_error 523 { 524 public: 525 WeException(const std::string& msg, int err = 0) : runtime_error(msg)526 std::runtime_error(msg), fErrorCode(err) { } errorCode(int code)527 void errorCode(int code) 528 { 529 fErrorCode = code; 530 } errorCode()531 int errorCode() const 532 { 533 return fErrorCode; 534 } 535 private: 536 int fErrorCode; 537 }; 538 539 } //end of namespace 540 541 #endif // _WE_TYPE_H_ 542