1 /* Copyright (C) 2014 InfiniDB, Inc.
2 
3    This program is free software; you can redistribute it and/or
4    modify it under the terms of the GNU General Public License
5    as published by the Free Software Foundation; version 2 of
6    the License.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software
15    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
16    MA 02110-1301, USA. */
17 
18 /*******************************************************************************
19  * $Id: we_colextinf.h 4501 2013-01-31 21:15:58Z dcathey $
20  *
21  ******************************************************************************/
22 
23 /** @file
24  * Contains class to track column information per extent.
25  * For ex: this is where we track the min/max values per extent for a column.
26  */
27 
28 #ifndef WE_COLEXTINF_H_
29 #define WE_COLEXTINF_H_
30 
31 #include <limits>
32 #include <stdint.h>
33 #include <set>
34 #ifdef _MSC_VER
35 #include <unordered_map>
36 #else
37 #include <tr1/unordered_map>
38 #endif
39 #include <boost/thread/mutex.hpp>
40 
41 #include "brmtypes.h"
42 #include "we_type.h"
43 
44 namespace WriteEngine
45 {
46 class Log;
47 class BRMReporter;
48 typedef execplan::CalpontSystemCatalog::ColDataType ColDataType;
49 //------------------------------------------------------------------------------
50 /** @brief Class to store min/max and LBID information for an extent.
51  *  For character data, the min and max values are maintained in reverse
52  *  order to facilitate string comparisions.  When the range is sent to
53  *  BRM, the bytes will be swapped back into the correct order.
54  *  BRM will need to be told when the column carries character data, so
55  *  that BRM can do the correct binary comparisons of the char data.
56  */
57 //------------------------------------------------------------------------------
58 class ColExtInfEntry
59 {
60 public:
61     // Default constructor
ColExtInfEntry()62     ColExtInfEntry() : fLbid(INVALID_LBID),
63         fMinVal(LLONG_MIN),
64         fMaxVal(LLONG_MIN),
65         fNewExtent(true)   { }
66 
67     // Used to create entry for an existing extent we are going to add data to.
ColExtInfEntry(BRM::LBID_t lbid,bool bIsNewExtent)68     ColExtInfEntry(BRM::LBID_t lbid, bool bIsNewExtent) :
69         fLbid(lbid),
70         fMinVal(LLONG_MIN),
71         fMaxVal(LLONG_MIN),
72         fNewExtent(bIsNewExtent)  { }
73 
74     // Used to create entry for a new extent, with LBID not yet allocated
ColExtInfEntry(int64_t minVal,int64_t maxVal)75     ColExtInfEntry(int64_t minVal, int64_t maxVal) :
76         fLbid(INVALID_LBID),
77         fMinVal(minVal),
78         fMaxVal(maxVal),
79         fNewExtent(true)   { }
80 
81     // Used to create entry for a new extent, with LBID not yet allocated
ColExtInfEntry(uint64_t minVal,uint64_t maxVal)82     ColExtInfEntry(uint64_t minVal, uint64_t maxVal) :
83         fLbid(INVALID_LBID),
84         fMinVal(static_cast<int64_t>(minVal)),
85         fMaxVal(static_cast<int64_t>(maxVal)),
86         fNewExtent(true)   { }
87 
88     BRM::LBID_t fLbid;     // LBID for an extent; should be the starting LBID
89     int64_t     fMinVal;   // minimum value for extent associated with LBID
90     int64_t     fMaxVal;   // maximum value for extent associated with LBID
91     bool        fNewExtent;// is this a new extent
92 };
93 
94 //------------------------------------------------------------------------------
95 /** @brief Hash function used to store ColEntInfEntry objects into a map; using
96  *  the last input Row number in the extent, as the key.
97  */
98 //------------------------------------------------------------------------------
99 struct uint64Hasher : public std::unary_function<RID, std::size_t>
100 {
operatoruint64Hasher101     std::size_t operator()(RID val) const
102     {
103         return static_cast<std::size_t>(val);
104     }
105 };
106 
107 //------------------------------------------------------------------------------
108 /** @brief Stub base class for ColExtInf; used for column data types that do
109  *  not need the functionality of ColExtInf (ex: floats and dictionaries).
110  */
111 //------------------------------------------------------------------------------
112 class ColExtInfBase
113 {
114 public:
ColExtInfBase()115     ColExtInfBase( )                                        { }
~ColExtInfBase()116     virtual ~ColExtInfBase( )                               { }
117 
addFirstEntry(RID lastInputRow,BRM::LBID_t lbid,bool bIsNewExtent)118     virtual void addFirstEntry   ( RID     lastInputRow,
119                                    BRM::LBID_t lbid,
120                                    bool    bIsNewExtent)    { }
121 
addOrUpdateEntry(RID lastInputRow,int64_t minVal,int64_t maxVal,ColDataType colDataType)122     virtual void addOrUpdateEntry( RID     lastInputRow,
123                                    int64_t minVal,
124                                    int64_t maxVal,
125                                    ColDataType colDataType ) { }
126 
getCPInfoForBRM(JobColumn column,BRMReporter & brmReporter)127     virtual void getCPInfoForBRM ( JobColumn column,
128                                    BRMReporter& brmReporter) { }
print(const JobColumn & column)129     virtual void print( const JobColumn& column )           { }
updateEntryLbid(BRM::LBID_t startLbid)130     virtual int updateEntryLbid( BRM::LBID_t startLbid )
131     {
132         return NO_ERROR;
133     }
134 };
135 
136 //------------------------------------------------------------------------------
137 /** @brief Collects LBID and min/max info about the extents that are loaded.
138  *
139  *  As a Read buffer is parsed, addOrUpdateEntryi() is called to add the extent,
140  *  and it's information to the collection.  For new extents, we have to add
141  *  the LBID later, when the extent is allocated, since the extent's first
142  *  buffer will be finished before the extent is allocated from BRM.  In this
143  *  case, updateEntryLbid() is called to add the LBID.  The specified LBID is
144  *  assigned to the extent with the lowest Row id that is awaiting an LBID.
145  *  This should be a safe assumption to make, that the extents will be allocated
146  *  in Row id order.   lastInputRow numbers are relative to the first row in
147  *  the import (ie: Row 0 is the first row in the *.tbl file).
148  */
149 //------------------------------------------------------------------------------
150 class ColExtInf : public ColExtInfBase
151 {
152 public:
153 
154     /** @brief Constructor
155      *  @param logger Log object using for debug logging.
156      */
ColExtInf(OID oid,Log * logger)157     ColExtInf( OID oid, Log* logger ) : fColOid(oid), fLog(logger) { }
~ColExtInf()158     virtual ~ColExtInf( )                   { }
159 
160     /** @brief Add an entry for first extent, for the specified Row and LBID.
161      *  @param lastInputRow Last input Row for old extent we are adding data to
162      *  @param lbid         LBID of the relevant extent.
163      *  @param bIsNewExtent Treat as new or existing extent when CP min/max is
164      *                      sent to BRM
165      */
166     virtual void addFirstEntry( RID         lastInputRow,
167                                 BRM::LBID_t lbid,
168                                 bool        bIsNewExtent );
169 
170     /** @brief Add or update an entry for the specified Row and its min/max val.
171      *         If new extent, LBID will be added later when extent is allocated.
172      *  @param lastInputRow Last input Row for a new extent being loaded.
173      *  @param minVal       Minimum value for the latest buffer read
174      *  @param maxVal       Maximum value for the latest buffer read
175      */
176     virtual void addOrUpdateEntry( RID     lastInputRow,
177                                    int64_t minVal,
178                                    int64_t maxVal,
179                                    ColDataType colDataType );
180 
181     /** @brief Send updated Casual Partition (CP) info to BRM.
182      */
183     virtual void getCPInfoForBRM ( JobColumn column,
184                                    BRMReporter& brmReporter );
185 
186     /** @brief Debug print function.
187      */
188     virtual void print( const JobColumn& column );
189 
190     /** @brief Add extent's LBID to the oldest entry that is awaiting an LBID
191      *  @param startLbid Starting LBID for a pending extent.
192      *  @return NO_ERROR upon success; else error if extent entry not found
193      */
194     virtual int updateEntryLbid( BRM::LBID_t startLbid );
195 
196 private:
197     OID             fColOid;              // Column OID for the relevant extents
198     Log*            fLog;                 // Log used for debug logging
199     boost::mutex    fMapMutex;            // protects unordered map access
200     std::set<RID>   fPendingExtentRows;   // list of lastInputRow entries that
201     // are awaiting an LBID assignment.
202 
203     // unordered map where we collect the min/max values per extent
204     std::tr1::unordered_map<RID, ColExtInfEntry, uint64Hasher> fMap;
205 
206     // disable copy constructor and assignment operator
207     ColExtInf(const ColExtInf&);
208     ColExtInf& operator=(const ColExtInf&);
209 };
210 
211 } //end of namespace
212 
213 #endif // WE_COLEXTINF_H_
214