1 /* Copyright (C) 2014 InfiniDB, Inc. 2 3 This program is free software; you can redistribute it and/or 4 modify it under the terms of the GNU General Public License 5 as published by the Free Software Foundation; version 2 of 6 the License. 7 8 This program is distributed in the hope that it will be useful, 9 but WITHOUT ANY WARRANTY; without even the implied warranty of 10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 GNU General Public License for more details. 12 13 You should have received a copy of the GNU General Public License 14 along with this program; if not, write to the Free Software 15 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 16 MA 02110-1301, USA. */ 17 18 /****************************************************************************** 19 * $Id: slavedbrmnode.h 1828 2013-01-30 16:13:05Z pleblanc $ 20 * 21 *****************************************************************************/ 22 23 /** @file 24 * class SlaveDBRMNode 25 */ 26 27 #ifndef SLAVEDBRMNODE_H_ 28 #define SLAVEDBRMNODE_H_ 29 30 #include <sys/types.h> 31 #include <vector> 32 #include <set> 33 34 #include "brmtypes.h" 35 #include "rwlock.h" 36 #include "mastersegmenttable.h" 37 #include "extentmap.h" 38 39 #include "vss.h" 40 #include "vbbm.h" 41 #include "copylocks.h" 42 43 #if defined(_MSC_VER) && defined(xxxSLAVEDBRMNODE_DLLEXPORT) 44 #define EXPORT __declspec(dllexport) 45 #else 46 #define EXPORT 47 #endif 48 49 namespace BRM 50 { 51 52 /** @brief The Slave node of the DBRM system 53 * 54 * There are 3 components of the Distributed BRM (DBRM). 55 * \li The interface 56 * \li The Master node 57 * \li Slave nodes 58 * 59 * The DBRM components effectively implement a networking & synchronization 60 * layer to the BlockResolutionManager class so that every node that needs 61 * BRM data always has an up-to-date copy of it locally. An operation that changes 62 * BRM data is duplicated on all hosts that run a Slave node so that every 63 * node has identical copies. All "read" operations are satisfied locally. 64 * 65 * The SlaveDBRMNode class does the work of modifying the BRM data structures 66 * on the node it's running on. Only nodes that use the BRM need to run a slave 67 * and only one instance should run on any given node. 68 * 69 * The Calpont configuration file should contain entries for the Master 70 * and every Slave node on the system. 71 * 72 * Config file entries look like 73 * \code 74 * <DBRM_Controller> 75 * <IPAddr> 76 * <Port> 77 * <NumWorkers>N</NumWorkers> 78 * </DBRM_Controller> 79 * <DBRM_Worker1> 80 * <IPAddr> 81 * <Port> 82 * </DBRM_Worker1> 83 * ... 84 * <DBRM_WorkerN> 85 * <IPAddr> 86 * <Port> 87 * </DBRM_WorkerN> 88 * \endcode 89 */ 90 91 class SlaveDBRMNode 92 { 93 public: 94 EXPORT SlaveDBRMNode() throw(); 95 EXPORT ~SlaveDBRMNode() throw(); 96 97 /** @brief Allocate a "stripe" of extents for columns in a table. 98 * 99 * Allocate a "stripe" of extents for the specified columns and DBRoot 100 * @param cols (in) List of column OIDs and column widths 101 * @param dbRoot (in) DBRoot for requested extents. 102 * @param partitionNum (in/out) Partition number in file path. 103 * If allocating OID's first extent for this DBRoot, then 104 * partitionNum is input, else it is an output arg. 105 * @param segmentNum (out) Segment number selected for new extents. 106 * @param extents (out) list of lbids, numBlks, and fbo for new extents 107 * @return 0 on success, -1 on error 108 */ 109 EXPORT int createStripeColumnExtents( 110 const std::vector<CreateStripeColumnExtentsArgIn>& cols, 111 uint16_t dbRoot, 112 uint32_t& partitionNum, 113 uint16_t& segmentNum, 114 std::vector<CreateStripeColumnExtentsArgOut>& extents) throw(); 115 116 /** @brief Allocate extent in the specified segment file 117 * 118 * Allocate column extent for the exact segment file specified by the 119 * requested OID,DBRoot, partition, and segment. 120 * @param OID (in) The OID requesting the extent. 121 * @param colWidth (in) Column width of the OID. 122 * @param dbRoot (in) DBRoot where extent is to be added. 123 * @param partitionNum (in) Partition number in file path. 124 * @param segmentNum (in) Segment number in file path. 125 * @param colDataType (in) the column type 126 * @param lbid (out) The first LBID of the extent created. 127 * @param allocdSize (out) The total number of LBIDs allocated. 128 * @param startBlockOffset (out) The first block of the extent created. 129 * @return 0 on success, -1 on error 130 */ 131 EXPORT int createColumnExtentExactFile(OID_t oid, 132 uint32_t colWidth, 133 uint16_t dbRoot, 134 uint32_t partitionNum, 135 uint16_t segmentNum, 136 execplan::CalpontSystemCatalog::ColDataType colDataType, 137 LBID_t& lbid, 138 int& allocdSize, 139 uint32_t& startBlockOffset) throw(); 140 141 /** @brief Allocate an extent for a column file 142 * 143 * Allocate a column extent for the specified OID and DBRoot. 144 * @param OID (in) The OID requesting the extent. 145 * @param colWidth (in) Column width of the OID. 146 * @param dbRoot (in) DBRoot where extent is to be added. 147 * @param colDataType (in) the column type 148 * @param partitionNum (in/out) Partition number in file path. 149 * If allocating OID's first extent for this DBRoot, then 150 * partitionNum is input, else it is an output arg. 151 * @param segmentNum (in/out) Segment number in file path. 152 * If allocating OID's first extent for this DBRoot, then 153 * segmentNum is input, else it is an output arg. 154 * @param lbid (out) The first LBID of the extent created. 155 * @param allocdSize (out) The total number of LBIDs allocated. 156 * @param startBlockOffset (out) The first block of the extent created. 157 * @return 0 on success, -1 on error 158 */ 159 EXPORT int createColumnExtent_DBroot(OID_t oid, 160 uint32_t colWidth, 161 uint16_t dbRoot, 162 execplan::CalpontSystemCatalog::ColDataType colDataType, 163 uint32_t& partitionNum, 164 uint16_t& segmentNum, 165 LBID_t& lbid, 166 int& allocdSize, 167 uint32_t& startBlockOffset) throw(); 168 169 /** @brief Allocate an extent for a dictionary store file 170 * 171 * Allocate a dictionary store extent for the specified OID, dbRoot, 172 * partition number, and segment number. 173 * @param OID (in) The OID requesting the extent. 174 * @param dbRoot (in) DBRoot to assign to the extent. 175 * @param partitionNum (in) Partition number to assign to the extent. 176 * @param segmentNum (in) Segment number to assign to the extent. 177 * @param lbid (out) The first LBID of the extent created. 178 * @param allocdSize (out) The total number of LBIDs allocated. 179 * @return 0 on success, -1 on error 180 */ 181 EXPORT int createDictStoreExtent(OID_t oid, 182 uint16_t dbRoot, 183 uint32_t partitionNum, 184 uint16_t segmentNum, 185 LBID_t& lbid, 186 int& allocdSize) throw(); 187 188 /** @brief Rollback (delete) a set of extents for the specified OID. 189 * 190 * Deletes all the extents that logically follow the specified 191 * column extent; and sets the HWM for the specified extent. 192 * @param oid OID of the extents to be deleted. 193 * @param partitionNum Last partition to be kept. 194 * @param segmentNum Last segment in partitionNum to be kept. 195 * @param hwm HWM to be assigned to the last extent that is kept. 196 * @return 0 on success, -1 on error 197 */ 198 EXPORT int rollbackColumnExtents(OID_t oid, 199 uint32_t partitionNum, 200 uint16_t segmentNum, 201 HWM_t hwm) throw(); 202 203 /** @brief Rollback (delete) set of extents for specified OID & DBRoot. 204 * 205 * Deletes all the extents that logically follow the specified 206 * column extent; and sets the HWM for the specified extent. 207 * @param oid OID of the extents to be deleted. 208 * @param bDeleteAll Indicates if all extents in oid and dbroot are to 209 * be deleted; else part#, seg#, and hwm are used. 210 * @param dbRoot DBRoot of the extents to be deleted. 211 * @param partitionNum Last partition to be kept. 212 * @param segmentNum Last segment in partitionNum to be kept. 213 * @param hwm HWM to be assigned to the last extent that is kept. 214 * @return 0 on success, -1 on error 215 */ 216 EXPORT int rollbackColumnExtents_DBroot(OID_t oid, 217 bool bDeleteAll, 218 uint16_t dbRoot, 219 uint32_t partitionNum, 220 uint16_t segmentNum, 221 HWM_t hwm) throw(); 222 223 /** @brief Rollback (delete) a set of dict store extents for an OID. 224 * 225 * Arguments specify the last stripe. Any extents after this are 226 * deleted. The hwm's of the extents in the last stripe are updated 227 * based on the contents of the hwm vector. If hwms is a partial list, 228 * (as in the first stripe of a partition), then any extents in sub- 229 * sequent segment files for that partition are deleted. 230 * @param oid OID of the extents to be deleted or updated. 231 * @param partitionNum Last partition to be kept. 232 * @param hwms Vector of hwms for the last partition to be kept. 233 * @return 0 on success, -1 on error 234 */ 235 EXPORT int rollbackDictStoreExtents(OID_t oid, 236 uint32_t partitionNum, 237 const std::vector<HWM_t>& hwms) throw (); 238 239 /** @brief Rollback (delete) a set of dict store extents for an OID & 240 * DBRoot. 241 * 242 * Arguments specify the last stripe. Any extents after this are 243 * deleted. The hwm's of the extents in the last stripe are updated 244 * based on the contents of the hwm vector. If hwms is a partial list, 245 * (as in the first stripe of a partition), then any extents in sub- 246 * sequent segment files for that partition are deleted. If hwms is 247 * empty then all the extents in dbRoot are deleted. 248 * @param oid OID of the extents to be deleted or updated. 249 * @param dbRoot DBRoot of the extents to be deleted. 250 * @param partitionNum Last partition to be kept. 251 * @param hwms Vector of hwms for the last partition to be kept. 252 * @return 0 on success, -1 on error 253 */ 254 EXPORT int rollbackDictStoreExtents_DBroot(OID_t oid, 255 uint16_t dbRoot, 256 uint32_t partitionNum, 257 const std::vector<uint16_t>& segNums, 258 const std::vector<HWM_t>& hwms) throw (); 259 260 /** @brief delete of column extents for the specified extents. 261 * 262 * Deletes the extents from extent map 263 * @param extentInfo the information for extents 264 */ 265 EXPORT int deleteEmptyColExtents(const ExtentsInfoMap_t& extentsInfo) throw(); 266 267 /** @brief delete of dictionary extents for the specified extents. 268 * 269 * Deletes the extents from extent map 270 * @param extentInfo the information for extents 271 */ 272 EXPORT int deleteEmptyDictStoreExtents(const ExtentsInfoMap_t& extentsInfo) throw(); 273 274 /** @brief Delete the extents of an OID and invalidate VSS references to them 275 * 276 * Delete the extents assigned to an OID and deletes entries in the VSS 277 * that refer to the LBIDs used by it. 278 * @note The old version of this function deliberately did not delete the entries 279 * in the version buffer. 280 * @note This function is ridiculously slow right now. 281 * @param OID The OID of the object being deleted 282 * @return 0 on success, -1 on error 283 */ 284 EXPORT int deleteOID(OID_t oid) throw(); 285 286 /** @brief Delete the extents of OIDs and invalidate VSS references to them 287 * 288 * Delete the extents assigned to OIDs and deletes entries in the VSS 289 * that refer to the LBIDs used by it. 290 * @note The old version of this function deliberately did not delete the entries 291 * in the version buffer. 292 * @param OIDs The OIDs of the object being deleted 293 * @return 0 on success, -1 on error 294 */ 295 EXPORT int deleteOIDs(const OidsMap_t& oids) throw(); 296 297 /** @brief Set the "high water mark" of an OID, partition, segment 298 * 299 * Set the high water mark (aka, the highest numbered written 300 * block offset) for a specific OID, partition, segment file. 301 * @param oid (in) The OID 302 * @param partitionNum (in) The relevant partition number 303 * @param segmentNum (in) The relevant segment number 304 * @param hwm (in) The high water mark of oid 305 * @return 0 on success, -1 on error 306 */ 307 EXPORT int setLocalHWM(OID_t, uint32_t partitionNum, uint16_t segmentNum, 308 HWM_t hwm, bool firstNode) throw(); 309 310 EXPORT int bulkSetHWM(const std::vector<BulkSetHWMArg>&, VER_t transID, 311 bool firstNode) throw(); 312 313 EXPORT int bulkSetHWMAndCP(const std::vector<BulkSetHWMArg>& hwmArgs, 314 const std::vector<CPInfo>& setCPDataArgs, 315 const std::vector<CPInfoMerge>& mergeCPDataArgs, 316 VER_t transID, bool firstNode) throw(); 317 318 EXPORT int bulkUpdateDBRoot(const std::vector<BulkUpdateDBRootArg>&) throw(); 319 320 /** @brief Delete a Partition for the specified OID(s). 321 * 322 * @param OID (in) the OID of interest. 323 * @param partitionNums (in) the set of partitions to be deleted. 324 */ 325 EXPORT int deletePartition(const std::set<OID_t>& oids, 326 std::set<LogicalPartition>& partitionNums, std::string& emsg) throw(); 327 328 /** @brief Mark a Partition for the specified OID(s) as out of service. 329 * 330 * @param OID (in) the OID of interest. 331 * @param partitionNums (in) the set of partitions to be marked out of service. 332 */ 333 EXPORT int markPartitionForDeletion(const std::set<OID_t>& oids, 334 std::set<LogicalPartition>& partitionNums, std::string& emsg) throw(); 335 336 /** @brief Mark all Partitions for the specified OID(s) as out of service. 337 * 338 * @param OID (in) the OID of interest. 339 */ 340 EXPORT int markAllPartitionForDeletion(const std::set<OID_t>& oids) throw(); 341 342 /** @brief Restore a Partition for the specified OID(s). 343 * 344 * @param OID (in) the OID of interest. 345 * @param partitionNums (in) the set of partitions to be restored. 346 */ 347 EXPORT int restorePartition(const std::set<OID_t>& oids, 348 std::set<LogicalPartition>& partitionNum, std::string& emsg) throw(); 349 350 /** @brief Delete all extent map rows for the specified dbroot 351 * 352 * @param dbroot (in) the dbroot 353 */ 354 EXPORT int deleteDBRoot(uint16_t dbroot) throw(); 355 356 /** @brief Registers a version buffer entry. 357 * 358 * Registers a version buffer entry at <vbOID, vbFBO> with 359 * values of <transID, lbid>. 360 * @note The version buffer locations must hold the 'copy' lock 361 * first. 362 * @return 0 on success, -1 on error 363 */ 364 EXPORT int writeVBEntry(VER_t transID, LBID_t lbid, OID_t vbOID, 365 uint32_t vbFBO) throw(); 366 367 /** @brief Bulk registers a version buffer entry. 368 * 369 * Similar to writeVBEntry, but registers the version buffer 370 * entries in bulk for a list of lbids and vbFBOs, for a given 371 * transID and vbOID. 372 * @note The version buffer locations must hold the 'copy' lock 373 * first. 374 * @return 0 on success, -1 on error 375 */ 376 EXPORT int bulkWriteVBEntry(VER_t transID, 377 const std::vector<BRM::LBID_t>& lbids, 378 OID_t vbOID, 379 const std::vector<uint32_t>& vbFBOs) throw(); 380 381 /** @brief Atomically prepare to copy data to the version buffer 382 * 383 * Atomically sets the copy flag on the specified LBID ranges 384 * and allocate blocks in the version buffer to copy them to. 385 * If any LBID in the range cannot be locked, none will be 386 * and this will return -1. 387 * @param transID The transaction ID doing the operation 388 * @param ranges (in) A list of LBID ranges that will be copied 389 * @param freeList (out) On success, a list of ranges of the version 390 * buffer blocks to copy the LBID range to. 391 * @return 0 on success, -1 on error. 392 */ 393 394 /* Note, the params to the analogous DBRM class fcn are slightly different. 395 * It takes a DBRoot param instead of a VB OID. The conversion is 396 * done in the controllernode b/c the OID server is housed there. 397 */ 398 EXPORT int beginVBCopy(VER_t transID, uint16_t vbOID, 399 const LBIDRange_v& ranges, VBRange_v& freeList, bool flushPMCache) throw(); 400 401 /** @brief Atomically unset the copy lock & update the VSS. Beware! Read the warning! 402 * 403 * Atomically unset the copy lock for the specified LBID ranges 404 * and add a new locked VSS entry for each LBID in the range. 405 * @note The elements of the ranges parameter <b>MUST</b> be the 406 * same elements passed to beginVBCopy(). The number and order of the 407 * elements can be different, but every element in ranges must also 408 * have been an element in beginVBCopy's ranges. 409 * @return 0 on success, -1 on error. 410 */ 411 EXPORT int endVBCopy(VER_t transID, const LBIDRange_v& ranges) 412 throw(); 413 414 /** @brief Commit the changes made for the given transaction. 415 * 416 * This unlocks the VSS entries with VerID = transID. 417 * @return 0 on success, -1 on error. 418 */ 419 EXPORT int vbCommit(VER_t transID) throw(); 420 421 /** @brief Reverse the changes made during the given transaction. 422 * 423 * Record that the given LBID was reverted to version verID. 424 * @warning This removes the copy locks held on all ranges by transID. 425 * @param transID The transaction ID 426 * @param lbidList The list of ranges to rollback. 427 * @param verID The version of the block now in the database. 428 * @return 0 on success, -1 on error. 429 */ 430 EXPORT int vbRollback(VER_t transID, const LBIDRange_v& lbidList, 431 bool flushPMCache) throw(); 432 433 /** @brief Reverse the changes made during the given transaction. 434 * 435 * Record that the given LBID was reverted to version verID. 436 * @warning This removes the copy locks held on all ranges by transID. 437 * @param transID The transaction ID 438 * @param lbidList The list of singular LBIDs to rollback. 439 * @param verID The version of the block now in the database. 440 * @return 0 on success, -1 on error. 441 */ 442 EXPORT int vbRollback(VER_t transID, const std::vector<LBID_t>& lbidList, 443 bool flushPMCache) throw(); 444 445 EXPORT int clear() throw(); 446 447 /** @brief Check the consistency of each data structure 448 * 449 * Check the consistency of each data structure 450 * @return 0 on success, -1 on error. 451 */ 452 EXPORT int checkConsistency() throw(); 453 454 EXPORT void confirmChanges() throw(); 455 EXPORT void undoChanges() throw(); 456 457 EXPORT int loadExtentMap(const std::string& filename); 458 EXPORT int saveExtentMap(const std::string& filename); 459 460 // Casual partitioning support 461 // 462 /** @brief mark the extent containing the lbid as not having valid max and min values 463 * 464 **/ 465 EXPORT int markExtentInvalid(const LBID_t lbid, 466 execplan::CalpontSystemCatalog::ColDataType colDataType); 467 EXPORT int markExtentsInvalid(const std::vector<LBID_t>& lbids, 468 const std::vector<execplan::CalpontSystemCatalog::ColDataType>& colDataTypes); 469 470 /** @brief update the extent with the lbidRange with max, min, & seqNum values 471 * 472 **/ 473 EXPORT int setExtentMaxMin(const LBID_t lbid, const int64_t max, const int64_t min, 474 const int32_t seqNum, bool firstNode); 475 476 // Casual partitioning support 477 // 478 /** @brief Sets min and max values from the information in the passed map. 479 * 480 * @param cpMaxMinMap - Map with cp info. The key must be the starting LBID. 481 * 482 **/ 483 // @bug 1970. Added setExtentsMaxMin. 484 EXPORT int setExtentsMaxMin(const CPMaxMinMap_t& cpMaxMinMap, bool firstNode); 485 486 /** @brief Merges list of min/max values with current CP min/max info 487 * 488 * @param cpMaxMinMap - Map with CP info. The key is the starting LBID 489 */ 490 EXPORT int mergeExtentsMaxMin(CPMaxMinMergeMap_t& cpMaxMinMap); 491 492 /* Write-side copylocks interface */ 493 EXPORT int dmlLockLBIDRanges(const std::vector<LBIDRange>& ranges, int txnID); 494 EXPORT int dmlReleaseLBIDRanges(const std::vector<LBIDRange>& ranges); 495 496 EXPORT int loadState(std::string filename) throw(); 497 EXPORT int saveState(std::string filename) throw(); 498 499 EXPORT const bool* getEMFLLockStatus(); 500 EXPORT const bool* getEMLockStatus(); 501 EXPORT const bool* getVBBMLockStatus(); 502 EXPORT const bool* getVSSLockStatus(); 503 504 private: 505 explicit SlaveDBRMNode(const SlaveDBRMNode& brm); 506 SlaveDBRMNode& operator=(const SlaveDBRMNode& brm); 507 int lookup(OID_t oid, LBIDRange_v& lbidList) throw(); 508 509 MasterSegmentTable mst; 510 ExtentMap em; 511 VBBM vbbm; 512 VSS vss; 513 CopyLocks copylocks; 514 bool locked[3]; // 0 = VBBM, 1 = VSS, 2 = CopyLocks 515 516 }; 517 518 } 519 520 #undef EXPORT 521 522 #endif 523