1 /* Copyright (C) 2014 InfiniDB, Inc. 2 3 This program is free software; you can redistribute it and/or 4 modify it under the terms of the GNU General Public License 5 as published by the Free Software Foundation; version 2 of 6 the License. 7 8 This program is distributed in the hope that it will be useful, 9 but WITHOUT ANY WARRANTY; without even the implied warranty of 10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 GNU General Public License for more details. 12 13 You should have received a copy of the GNU General Public License 14 along with this program; if not, write to the Free Software 15 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 16 MA 02110-1301, USA. */ 17 18 /******************************************************************************* 19 * $Id: we_colextinf.h 4501 2013-01-31 21:15:58Z dcathey $ 20 * 21 ******************************************************************************/ 22 23 /** @file 24 * Contains class to track column information per extent. 25 * For ex: this is where we track the min/max values per extent for a column. 26 */ 27 28 #ifndef WE_COLEXTINF_H_ 29 #define WE_COLEXTINF_H_ 30 31 #include <limits> 32 #include <stdint.h> 33 #include <set> 34 #ifdef _MSC_VER 35 #include <unordered_map> 36 #else 37 #include <tr1/unordered_map> 38 #endif 39 #include <boost/thread/mutex.hpp> 40 41 #include "brmtypes.h" 42 #include "we_type.h" 43 44 namespace WriteEngine 45 { 46 class Log; 47 class BRMReporter; 48 typedef execplan::CalpontSystemCatalog::ColDataType ColDataType; 49 //------------------------------------------------------------------------------ 50 /** @brief Class to store min/max and LBID information for an extent. 51 * For character data, the min and max values are maintained in reverse 52 * order to facilitate string comparisions. When the range is sent to 53 * BRM, the bytes will be swapped back into the correct order. 54 * BRM will need to be told when the column carries character data, so 55 * that BRM can do the correct binary comparisons of the char data. 56 */ 57 //------------------------------------------------------------------------------ 58 class ColExtInfEntry 59 { 60 public: 61 // Default constructor ColExtInfEntry()62 ColExtInfEntry() : fLbid(INVALID_LBID), 63 fMinVal(LLONG_MIN), 64 fMaxVal(LLONG_MIN), 65 fNewExtent(true) { } 66 67 // Used to create entry for an existing extent we are going to add data to. ColExtInfEntry(BRM::LBID_t lbid,bool bIsNewExtent)68 ColExtInfEntry(BRM::LBID_t lbid, bool bIsNewExtent) : 69 fLbid(lbid), 70 fMinVal(LLONG_MIN), 71 fMaxVal(LLONG_MIN), 72 fNewExtent(bIsNewExtent) { } 73 74 // Used to create entry for a new extent, with LBID not yet allocated ColExtInfEntry(int64_t minVal,int64_t maxVal)75 ColExtInfEntry(int64_t minVal, int64_t maxVal) : 76 fLbid(INVALID_LBID), 77 fMinVal(minVal), 78 fMaxVal(maxVal), 79 fNewExtent(true) { } 80 81 // Used to create entry for a new extent, with LBID not yet allocated ColExtInfEntry(uint64_t minVal,uint64_t maxVal)82 ColExtInfEntry(uint64_t minVal, uint64_t maxVal) : 83 fLbid(INVALID_LBID), 84 fMinVal(static_cast<int64_t>(minVal)), 85 fMaxVal(static_cast<int64_t>(maxVal)), 86 fNewExtent(true) { } 87 88 BRM::LBID_t fLbid; // LBID for an extent; should be the starting LBID 89 int64_t fMinVal; // minimum value for extent associated with LBID 90 int64_t fMaxVal; // maximum value for extent associated with LBID 91 bool fNewExtent;// is this a new extent 92 }; 93 94 //------------------------------------------------------------------------------ 95 /** @brief Hash function used to store ColEntInfEntry objects into a map; using 96 * the last input Row number in the extent, as the key. 97 */ 98 //------------------------------------------------------------------------------ 99 struct uint64Hasher : public std::unary_function<RID, std::size_t> 100 { operatoruint64Hasher101 std::size_t operator()(RID val) const 102 { 103 return static_cast<std::size_t>(val); 104 } 105 }; 106 107 //------------------------------------------------------------------------------ 108 /** @brief Stub base class for ColExtInf; used for column data types that do 109 * not need the functionality of ColExtInf (ex: floats and dictionaries). 110 */ 111 //------------------------------------------------------------------------------ 112 class ColExtInfBase 113 { 114 public: ColExtInfBase()115 ColExtInfBase( ) { } ~ColExtInfBase()116 virtual ~ColExtInfBase( ) { } 117 addFirstEntry(RID lastInputRow,BRM::LBID_t lbid,bool bIsNewExtent)118 virtual void addFirstEntry ( RID lastInputRow, 119 BRM::LBID_t lbid, 120 bool bIsNewExtent) { } 121 addOrUpdateEntry(RID lastInputRow,int64_t minVal,int64_t maxVal,ColDataType colDataType)122 virtual void addOrUpdateEntry( RID lastInputRow, 123 int64_t minVal, 124 int64_t maxVal, 125 ColDataType colDataType ) { } 126 getCPInfoForBRM(JobColumn column,BRMReporter & brmReporter)127 virtual void getCPInfoForBRM ( JobColumn column, 128 BRMReporter& brmReporter) { } print(const JobColumn & column)129 virtual void print( const JobColumn& column ) { } updateEntryLbid(BRM::LBID_t startLbid)130 virtual int updateEntryLbid( BRM::LBID_t startLbid ) 131 { 132 return NO_ERROR; 133 } 134 }; 135 136 //------------------------------------------------------------------------------ 137 /** @brief Collects LBID and min/max info about the extents that are loaded. 138 * 139 * As a Read buffer is parsed, addOrUpdateEntryi() is called to add the extent, 140 * and it's information to the collection. For new extents, we have to add 141 * the LBID later, when the extent is allocated, since the extent's first 142 * buffer will be finished before the extent is allocated from BRM. In this 143 * case, updateEntryLbid() is called to add the LBID. The specified LBID is 144 * assigned to the extent with the lowest Row id that is awaiting an LBID. 145 * This should be a safe assumption to make, that the extents will be allocated 146 * in Row id order. lastInputRow numbers are relative to the first row in 147 * the import (ie: Row 0 is the first row in the *.tbl file). 148 */ 149 //------------------------------------------------------------------------------ 150 class ColExtInf : public ColExtInfBase 151 { 152 public: 153 154 /** @brief Constructor 155 * @param logger Log object using for debug logging. 156 */ ColExtInf(OID oid,Log * logger)157 ColExtInf( OID oid, Log* logger ) : fColOid(oid), fLog(logger) { } ~ColExtInf()158 virtual ~ColExtInf( ) { } 159 160 /** @brief Add an entry for first extent, for the specified Row and LBID. 161 * @param lastInputRow Last input Row for old extent we are adding data to 162 * @param lbid LBID of the relevant extent. 163 * @param bIsNewExtent Treat as new or existing extent when CP min/max is 164 * sent to BRM 165 */ 166 virtual void addFirstEntry( RID lastInputRow, 167 BRM::LBID_t lbid, 168 bool bIsNewExtent ); 169 170 /** @brief Add or update an entry for the specified Row and its min/max val. 171 * If new extent, LBID will be added later when extent is allocated. 172 * @param lastInputRow Last input Row for a new extent being loaded. 173 * @param minVal Minimum value for the latest buffer read 174 * @param maxVal Maximum value for the latest buffer read 175 */ 176 virtual void addOrUpdateEntry( RID lastInputRow, 177 int64_t minVal, 178 int64_t maxVal, 179 ColDataType colDataType ); 180 181 /** @brief Send updated Casual Partition (CP) info to BRM. 182 */ 183 virtual void getCPInfoForBRM ( JobColumn column, 184 BRMReporter& brmReporter ); 185 186 /** @brief Debug print function. 187 */ 188 virtual void print( const JobColumn& column ); 189 190 /** @brief Add extent's LBID to the oldest entry that is awaiting an LBID 191 * @param startLbid Starting LBID for a pending extent. 192 * @return NO_ERROR upon success; else error if extent entry not found 193 */ 194 virtual int updateEntryLbid( BRM::LBID_t startLbid ); 195 196 private: 197 OID fColOid; // Column OID for the relevant extents 198 Log* fLog; // Log used for debug logging 199 boost::mutex fMapMutex; // protects unordered map access 200 std::set<RID> fPendingExtentRows; // list of lastInputRow entries that 201 // are awaiting an LBID assignment. 202 203 // unordered map where we collect the min/max values per extent 204 std::tr1::unordered_map<RID, ColExtInfEntry, uint64Hasher> fMap; 205 206 // disable copy constructor and assignment operator 207 ColExtInf(const ColExtInf&); 208 ColExtInf& operator=(const ColExtInf&); 209 }; 210 211 } //end of namespace 212 213 #endif // WE_COLEXTINF_H_ 214