1 /* Copyright (C) 2014 InfiniDB, Inc.
2 
3    This program is free software; you can redistribute it and/or
4    modify it under the terms of the GNU General Public License
5    as published by the Free Software Foundation; version 2 of
6    the License.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software
15    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
16    MA 02110-1301, USA. */
17 
18 /******************************************************************************
19  * $Id: slavedbrmnode.h 1828 2013-01-30 16:13:05Z pleblanc $
20  *
21  *****************************************************************************/
22 
23 /** @file
24  * class SlaveDBRMNode
25  */
26 
27 #ifndef SLAVEDBRMNODE_H_
28 #define SLAVEDBRMNODE_H_
29 
30 #include <sys/types.h>
31 #include <vector>
32 #include <set>
33 
34 #include "brmtypes.h"
35 #include "rwlock.h"
36 #include "mastersegmenttable.h"
37 #include "extentmap.h"
38 
39 #include "vss.h"
40 #include "vbbm.h"
41 #include "copylocks.h"
42 
43 #if defined(_MSC_VER) && defined(xxxSLAVEDBRMNODE_DLLEXPORT)
44 #define EXPORT __declspec(dllexport)
45 #else
46 #define EXPORT
47 #endif
48 
49 namespace BRM
50 {
51 
52 /** @brief The Slave node of the DBRM system
53  *
54  * There are 3 components of the Distributed BRM (DBRM).
55  * \li The interface
56  * \li The Master node
57  * \li Slave nodes
58  *
59  * The DBRM components effectively implement a networking & synchronization
60  * layer to the BlockResolutionManager class so that every node that needs
61  * BRM data always has an up-to-date copy of it locally.  An operation that changes
62  * BRM data is duplicated on all hosts that run a Slave node so that every
63  * node has identical copies.  All "read" operations are satisfied locally.
64  *
65  * The SlaveDBRMNode class does the work of modifying the BRM data structures
66  * on the node it's running on.  Only nodes that use the BRM need to run a slave
67  * and only one instance should run on any given node.
68  *
69  * The Calpont configuration file should contain entries for the Master
70  * and every Slave node on the system.
71  *
72  * Config file entries look like
73  * \code
74  * <DBRM_Controller>
75  *	<IPAddr>
76  * 	<Port>
77  *	<NumWorkers>N</NumWorkers>
78  * </DBRM_Controller>
79  * <DBRM_Worker1>
80  *	<IPAddr>
81  *	<Port>
82  * </DBRM_Worker1>
83  *	...
84  * <DBRM_WorkerN>
85  *	<IPAddr>
86  *	<Port>
87  * </DBRM_WorkerN>
88  * \endcode
89  */
90 
91 class SlaveDBRMNode
92 {
93 public:
94     EXPORT SlaveDBRMNode() throw();
95     EXPORT ~SlaveDBRMNode() throw();
96 
97     /** @brief Allocate a "stripe" of extents for columns in a table.
98      *
99      * Allocate a "stripe" of extents for the specified columns and DBRoot
100      * @param cols (in) List of column OIDs and column widths
101      * @param dbRoot (in) DBRoot for requested extents.
102      * @param partitionNum (in/out) Partition number in file path.
103      *        If allocating OID's first extent for this DBRoot, then
104      *        partitionNum is input, else it is an output arg.
105      * @param segmentNum (out) Segment number selected for new extents.
106      * @param extents (out) list of lbids, numBlks, and fbo for new extents
107      * @return 0 on success, -1 on error
108      */
109     EXPORT int createStripeColumnExtents(
110         const std::vector<CreateStripeColumnExtentsArgIn>& cols,
111         uint16_t  dbRoot,
112         uint32_t& partitionNum,
113         uint16_t& segmentNum,
114         std::vector<CreateStripeColumnExtentsArgOut>& extents) throw();
115 
116     /** @brief Allocate extent in the specified segment file
117      *
118      * Allocate column extent for the exact segment file specified by the
119      * requested OID,DBRoot, partition, and segment.
120      * @param OID (in) The OID requesting the extent.
121      * @param colWidth (in) Column width of the OID.
122      * @param dbRoot (in) DBRoot where extent is to be added.
123      * @param partitionNum (in) Partition number in file path.
124      * @param segmentNum (in) Segment number in file path.
125      * @param colDataType (in) the column type
126      * @param lbid (out) The first LBID of the extent created.
127      * @param allocdSize (out) The total number of LBIDs allocated.
128      * @param startBlockOffset (out) The first block of the extent created.
129      * @return 0 on success, -1 on error
130      */
131     EXPORT int createColumnExtentExactFile(OID_t oid,
132                                            uint32_t  colWidth,
133                                            uint16_t  dbRoot,
134                                            uint32_t  partitionNum,
135                                            uint16_t  segmentNum,
136                                            execplan::CalpontSystemCatalog::ColDataType colDataType,
137                                            LBID_t&    lbid,
138                                            int&       allocdSize,
139                                            uint32_t& startBlockOffset) throw();
140 
141     /** @brief Allocate an extent for a column file
142      *
143      * Allocate a column extent for the specified OID and DBRoot.
144      * @param OID (in) The OID requesting the extent.
145      * @param colWidth (in) Column width of the OID.
146      * @param dbRoot (in) DBRoot where extent is to be added.
147      * @param colDataType (in) the column type
148      * @param partitionNum (in/out) Partition number in file path.
149      *        If allocating OID's first extent for this DBRoot, then
150      *        partitionNum is input, else it is an output arg.
151      * @param segmentNum (in/out) Segment number in file path.
152      *        If allocating OID's first extent for this DBRoot, then
153      *        segmentNum is input, else it is an output arg.
154      * @param lbid (out) The first LBID of the extent created.
155      * @param allocdSize (out) The total number of LBIDs allocated.
156      * @param startBlockOffset (out) The first block of the extent created.
157      * @return 0 on success, -1 on error
158      */
159     EXPORT int createColumnExtent_DBroot(OID_t oid,
160                                          uint32_t  colWidth,
161                                          uint16_t  dbRoot,
162                                          execplan::CalpontSystemCatalog::ColDataType colDataType,
163                                          uint32_t& partitionNum,
164                                          uint16_t& segmentNum,
165                                          LBID_t&    lbid,
166                                          int&       allocdSize,
167                                          uint32_t& startBlockOffset) throw();
168 
169     /** @brief Allocate an extent for a dictionary store file
170      *
171      * Allocate a dictionary store extent for the specified OID, dbRoot,
172      * partition number, and segment number.
173      * @param OID (in) The OID requesting the extent.
174      * @param dbRoot (in) DBRoot to assign to the extent.
175      * @param partitionNum (in) Partition number to assign to the extent.
176      * @param segmentNum (in) Segment number to assign to the extent.
177      * @param lbid (out) The first LBID of the extent created.
178      * @param allocdSize (out) The total number of LBIDs allocated.
179      * @return 0 on success, -1 on error
180      */
181     EXPORT int createDictStoreExtent(OID_t oid,
182                                      uint16_t  dbRoot,
183                                      uint32_t  partitionNum,
184                                      uint16_t  segmentNum,
185                                      LBID_t&    lbid,
186                                      int&       allocdSize) throw();
187 
188     /** @brief Rollback (delete) a set of extents for the specified OID.
189      *
190      * Deletes all the extents that logically follow the specified
191      * column extent; and sets the HWM for the specified extent.
192      * @param oid OID of the extents to be deleted.
193      * @param partitionNum Last partition to be kept.
194      * @param segmentNum Last segment in partitionNum to be kept.
195      * @param hwm HWM to be assigned to the last extent that is kept.
196      * @return 0 on success, -1 on error
197      */
198     EXPORT int rollbackColumnExtents(OID_t oid,
199                                      uint32_t partitionNum,
200                                      uint16_t segmentNum,
201                                      HWM_t    hwm) throw();
202 
203     /** @brief Rollback (delete) set of extents for specified OID & DBRoot.
204      *
205      * Deletes all the extents that logically follow the specified
206      * column extent; and sets the HWM for the specified extent.
207      * @param oid OID of the extents to be deleted.
208      * @param bDeleteAll Indicates if all extents in oid and dbroot are to
209      *        be deleted; else part#, seg#, and hwm are used.
210      * @param dbRoot DBRoot of the extents to be deleted.
211      * @param partitionNum Last partition to be kept.
212      * @param segmentNum Last segment in partitionNum to be kept.
213      * @param hwm HWM to be assigned to the last extent that is kept.
214      * @return 0 on success, -1 on error
215      */
216     EXPORT int rollbackColumnExtents_DBroot(OID_t oid,
217                                             bool     bDeleteAll,
218                                             uint16_t dbRoot,
219                                             uint32_t partitionNum,
220                                             uint16_t segmentNum,
221                                             HWM_t    hwm) throw();
222 
223     /** @brief Rollback (delete) a set of dict store extents for an OID.
224      *
225      * Arguments specify the last stripe.  Any extents after this are
226      * deleted.  The hwm's of the extents in the last stripe are updated
227      * based on the contents of the hwm vector.  If hwms is a partial list,
228      * (as in the first stripe of a partition), then any extents in sub-
229      *  sequent segment files for that partition are deleted.
230      * @param oid OID of the extents to be deleted or updated.
231      * @param partitionNum Last partition to be kept.
232      * @param hwms Vector of hwms for the last partition to be kept.
233      * @return 0 on success, -1 on error
234      */
235     EXPORT int rollbackDictStoreExtents(OID_t oid,
236                                         uint32_t         partitionNum,
237                                         const std::vector<HWM_t>& hwms) throw ();
238 
239     /** @brief Rollback (delete) a set of dict store extents for an OID &
240      *  DBRoot.
241      *
242      * Arguments specify the last stripe.  Any extents after this are
243      * deleted.  The hwm's of the extents in the last stripe are updated
244      * based on the contents of the hwm vector.  If hwms is a partial list,
245      * (as in the first stripe of a partition), then any extents in sub-
246      *  sequent segment files for that partition are deleted.  If hwms is
247      * empty then all the extents in dbRoot are deleted.
248      * @param oid OID of the extents to be deleted or updated.
249      * @param dbRoot DBRoot of the extents to be deleted.
250      * @param partitionNum Last partition to be kept.
251      * @param hwms Vector of hwms for the last partition to be kept.
252      * @return 0 on success, -1 on error
253      */
254     EXPORT int rollbackDictStoreExtents_DBroot(OID_t oid,
255             uint16_t          dbRoot,
256             uint32_t         partitionNum,
257             const std::vector<uint16_t>& segNums,
258             const std::vector<HWM_t>& hwms) throw ();
259 
260     /** @brief delete of column extents for the specified extents.
261      *
262      * Deletes the extents from extent map
263      * @param extentInfo the information for extents
264      */
265     EXPORT int deleteEmptyColExtents(const ExtentsInfoMap_t& extentsInfo)  throw();
266 
267     /** @brief delete of dictionary extents for the specified extents.
268      *
269      * Deletes the extents from extent map
270      * @param extentInfo the information for extents
271      */
272     EXPORT int deleteEmptyDictStoreExtents(const ExtentsInfoMap_t& extentsInfo)  throw();
273 
274     /** @brief Delete the extents of an OID and invalidate VSS references to them
275      *
276      * Delete the extents assigned to an OID and deletes entries in the VSS
277      * that refer to the LBIDs used by it.
278      * @note The old version of this function deliberately did not delete the entries
279      * in the version buffer.
280      * @note This function is ridiculously slow right now.
281      * @param OID The OID of the object being deleted
282      * @return 0 on success, -1 on error
283      */
284     EXPORT int deleteOID(OID_t oid) throw();
285 
286     /** @brief Delete the extents of OIDs and invalidate VSS references to them
287      *
288      * Delete the extents assigned to OIDs and deletes entries in the VSS
289      * that refer to the LBIDs used by it.
290      * @note The old version of this function deliberately did not delete the entries
291      * in the version buffer.
292      * @param OIDs The OIDs of the object being deleted
293      * @return 0 on success, -1 on error
294      */
295     EXPORT int deleteOIDs(const OidsMap_t&  oids) throw();
296 
297     /** @brief Set the "high water mark" of an OID, partition, segment
298      *
299      * Set the high water mark (aka, the highest numbered written
300      * block offset) for a specific OID, partition, segment file.
301      * @param oid (in) The OID
302      * @param partitionNum (in) The relevant partition number
303      * @param segmentNum (in) The relevant segment number
304      * @param hwm (in) The high water mark of oid
305      * @return 0 on success, -1 on error
306      */
307     EXPORT int setLocalHWM(OID_t, uint32_t partitionNum, uint16_t segmentNum,
308                            HWM_t hwm, bool firstNode) throw();
309 
310     EXPORT int bulkSetHWM(const std::vector<BulkSetHWMArg>&, VER_t transID,
311                           bool firstNode) throw();
312 
313     EXPORT int bulkSetHWMAndCP(const std::vector<BulkSetHWMArg>& hwmArgs,
314                                const std::vector<CPInfo>& setCPDataArgs,
315                                const std::vector<CPInfoMerge>& mergeCPDataArgs,
316                                VER_t transID, bool firstNode) throw();
317 
318     EXPORT int bulkUpdateDBRoot(const std::vector<BulkUpdateDBRootArg>&) throw();
319 
320     /** @brief Delete a Partition for the specified OID(s).
321      *
322      * @param OID (in) the OID of interest.
323      * @param partitionNums (in) the set of partitions to be deleted.
324      */
325     EXPORT int deletePartition(const std::set<OID_t>& oids,
326                                std::set<LogicalPartition>& partitionNums, std::string& emsg) throw();
327 
328     /** @brief Mark a Partition for the specified OID(s) as out of service.
329      *
330      * @param OID (in) the OID of interest.
331      * @param partitionNums (in) the set of partitions to be marked out of service.
332      */
333     EXPORT int markPartitionForDeletion(const std::set<OID_t>& oids,
334                                         std::set<LogicalPartition>& partitionNums, std::string& emsg) throw();
335 
336     /** @brief Mark all Partitions for the specified OID(s) as out of service.
337      *
338      * @param OID (in) the OID of interest.
339      */
340     EXPORT int markAllPartitionForDeletion(const std::set<OID_t>& oids) throw();
341 
342     /** @brief Restore a Partition for the specified OID(s).
343      *
344      * @param OID (in) the OID of interest.
345      * @param partitionNums (in) the set of partitions to be restored.
346      */
347     EXPORT int restorePartition(const std::set<OID_t>& oids,
348                                 std::set<LogicalPartition>& partitionNum, std::string& emsg) throw();
349 
350     /** @brief Delete all extent map rows for the specified dbroot
351      *
352      * @param dbroot (in) the dbroot
353      */
354     EXPORT int deleteDBRoot(uint16_t dbroot) throw();
355 
356     /** @brief Registers a version buffer entry.
357      *
358      * Registers a version buffer entry at <vbOID, vbFBO> with
359      * values of <transID, lbid>.
360      * @note The version buffer locations must hold the 'copy' lock
361      * first.
362      * @return 0 on success, -1 on error
363      */
364     EXPORT int writeVBEntry(VER_t transID, LBID_t lbid, OID_t vbOID,
365                             uint32_t vbFBO) throw();
366 
367     /** @brief Bulk registers a version buffer entry.
368      *
369      * Similar to writeVBEntry, but registers the version buffer
370      * entries in bulk for a list of lbids and vbFBOs, for a given
371      * transID and vbOID.
372      * @note The version buffer locations must hold the 'copy' lock
373      * first.
374      * @return 0 on success, -1 on error
375      */
376     EXPORT int bulkWriteVBEntry(VER_t transID,
377                                 const std::vector<BRM::LBID_t>& lbids,
378                                 OID_t vbOID,
379                                 const std::vector<uint32_t>& vbFBOs) throw();
380 
381     /** @brief Atomically prepare to copy data to the version buffer
382      *
383      * Atomically sets the copy flag on the specified LBID ranges
384      * and allocate blocks in the version buffer to copy them to.
385      * If any LBID in the range cannot be locked, none will be
386      * and this will return -1.
387      * @param transID The transaction ID doing the operation
388      * @param ranges (in) A list of LBID ranges that will be copied
389      * @param freeList (out) On success, a list of ranges of the version
390      * buffer blocks to copy the LBID range to.
391      * @return 0 on success, -1 on error.
392      */
393 
394     /* Note, the params to the analogous DBRM class fcn are slightly different.
395      * It takes a DBRoot param instead of a VB OID.  The conversion is
396      * done in the controllernode b/c the OID server is housed there.
397      */
398     EXPORT int beginVBCopy(VER_t transID, uint16_t vbOID,
399                            const LBIDRange_v& ranges, VBRange_v& freeList, bool flushPMCache) throw();
400 
401     /** @brief Atomically unset the copy lock & update the VSS.  Beware!  Read the warning!
402      *
403      * Atomically unset the copy lock for the specified LBID ranges
404      * and add a new locked VSS entry for each LBID in the range.
405      * @note The elements of the ranges parameter <b>MUST</b> be the
406      * same elements passed to beginVBCopy().  The number and order of the
407      * elements can be different, but every element in ranges must also
408      * have been an element in beginVBCopy's ranges.
409      * @return 0 on success, -1 on error.
410      */
411     EXPORT int endVBCopy(VER_t transID, const LBIDRange_v& ranges)
412     throw();
413 
414     /** @brief Commit the changes made for the given transaction.
415      *
416      * This unlocks the VSS entries with VerID = transID.
417      * @return 0 on success, -1 on error.
418      */
419     EXPORT int vbCommit(VER_t transID) throw();
420 
421     /** @brief Reverse the changes made during the given transaction.
422      *
423      * Record that the given LBID was reverted to version verID.
424      * @warning This removes the copy locks held on all ranges by transID.
425      * @param transID The transaction ID
426      * @param lbidList The list of ranges to rollback.
427      * @param verID The version of the block now in the database.
428      * @return 0 on success, -1 on error.
429      */
430     EXPORT int vbRollback(VER_t transID, const LBIDRange_v& lbidList,
431                           bool flushPMCache) throw();
432 
433     /** @brief Reverse the changes made during the given transaction.
434      *
435      * Record that the given LBID was reverted to version verID.
436      * @warning This removes the copy locks held on all ranges by transID.
437      * @param transID The transaction ID
438      * @param lbidList The list of singular LBIDs to rollback.
439      * @param verID The version of the block now in the database.
440      * @return 0 on success, -1 on error.
441      */
442     EXPORT int vbRollback(VER_t transID, const std::vector<LBID_t>& lbidList,
443                           bool flushPMCache) throw();
444 
445     EXPORT int clear() throw();
446 
447     /** @brief Check the consistency of each data structure
448      *
449      * Check the consistency of each data structure
450      * @return 0 on success, -1 on error.
451      */
452     EXPORT int checkConsistency() throw();
453 
454     EXPORT void confirmChanges() throw();
455     EXPORT void undoChanges() throw();
456 
457     EXPORT int loadExtentMap(const std::string& filename);
458     EXPORT int saveExtentMap(const std::string& filename);
459 
460     // Casual partitioning support
461     //
462     /** @brief mark the extent containing the lbid as not having valid max and min values
463     *
464     **/
465     EXPORT int markExtentInvalid(const LBID_t lbid,
466                                  execplan::CalpontSystemCatalog::ColDataType colDataType);
467     EXPORT int markExtentsInvalid(const std::vector<LBID_t>& lbids,
468                                   const std::vector<execplan::CalpontSystemCatalog::ColDataType>& colDataTypes);
469 
470     /** @brief update the extent with the lbidRange with max, min, & seqNum values
471     *
472     **/
473     EXPORT int setExtentMaxMin(const LBID_t lbid, const int64_t max, const int64_t min,
474                                const int32_t seqNum, bool firstNode);
475 
476     // Casual partitioning support
477     //
478     /** @brief Sets min and max values from the information in the passed map.
479      *
480      *   @param cpMaxMinMap - Map with cp info.  The key must be the starting LBID.
481      *
482      **/
483     // @bug 1970.  Added setExtentsMaxMin.
484     EXPORT int setExtentsMaxMin(const CPMaxMinMap_t& cpMaxMinMap, bool firstNode);
485 
486     /** @brief Merges list of min/max values with current CP min/max info
487      *
488      *  @param cpMaxMinMap - Map with CP info.  The key is the starting LBID
489      */
490     EXPORT int mergeExtentsMaxMin(CPMaxMinMergeMap_t& cpMaxMinMap);
491 
492     /* Write-side copylocks interface */
493     EXPORT int dmlLockLBIDRanges(const std::vector<LBIDRange>& ranges, int txnID);
494     EXPORT int dmlReleaseLBIDRanges(const std::vector<LBIDRange>& ranges);
495 
496     EXPORT int loadState(std::string filename) throw();
497     EXPORT int saveState(std::string filename) throw();
498 
499     EXPORT const bool* getEMFLLockStatus();
500     EXPORT const bool* getEMLockStatus();
501     EXPORT const bool* getVBBMLockStatus();
502     EXPORT const bool* getVSSLockStatus();
503 
504 private:
505     explicit SlaveDBRMNode(const SlaveDBRMNode& brm);
506     SlaveDBRMNode& operator=(const SlaveDBRMNode& brm);
507     int lookup(OID_t oid, LBIDRange_v& lbidList) throw();
508 
509     MasterSegmentTable mst;
510     ExtentMap em;
511     VBBM vbbm;
512     VSS vss;
513     CopyLocks copylocks;
514     bool locked[3];  // 0 = VBBM, 1 = VSS, 2 = CopyLocks
515 
516 };
517 
518 }
519 
520 #undef EXPORT
521 
522 #endif
523