1 /* Copyright (C) 2014 InfiniDB, Inc.
2
3 This program is free software; you can redistribute it and/or
4 modify it under the terms of the GNU General Public License
5 as published by the Free Software Foundation; version 2 of
6 the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
16 MA 02110-1301, USA. */
17
18 /*****************************************************************************
19 * $Id: extentmap.cpp 1936 2013-07-09 22:10:29Z dhall $
20 *
21 ****************************************************************************/
22
23 #include <iostream>
24 #include <sys/types.h>
25 #include <sys/time.h>
26 #include <sys/stat.h>
27 #include <cstdlib>
28 #include <fcntl.h>
29 #include <unistd.h>
30 #include <stdexcept>
31 #include <algorithm>
32 #include <ios>
33 #include <cerrno>
34 #include <sstream>
35 #include <vector>
36 #include <limits>
37 #include <boost/scoped_array.hpp>
38 #include <boost/scoped_ptr.hpp>
39 #include <boost/thread.hpp>
40 #ifndef _MSC_VER
41 #include <tr1/unordered_set>
42 #else
43 #include <unordered_set>
44 #endif
45
46 #include <boost/interprocess/shared_memory_object.hpp>
47 #include <boost/interprocess/mapped_region.hpp>
48 namespace bi = boost::interprocess;
49
50 #include "liboamcpp.h"
51 #include "brmtypes.h"
52 #include "configcpp.h"
53 #include "rwlock.h"
54 #include "calpontsystemcatalog.h"
55 #include "mastersegmenttable.h"
56 #include "blocksize.h"
57 #include "dataconvert.h"
58 #include "oamcache.h"
59 #include "IDBDataFile.h"
60 #include "IDBPolicy.h"
61 #ifdef BRM_INFO
62 #include "tracer.h"
63 #include "configcpp.h"
64 #endif
65
66 #define EXTENTMAP_DLLEXPORT
67 #include "extentmap.h"
68 #undef EXTENTMAP_DLLEXPORT
69
70 #define EM_MAX_SEQNUM 2000000000
71 #define MAX_IO_RETRIES 10
72 #define EM_MAGIC_V1 0x76f78b1c
73 #define EM_MAGIC_V2 0x76f78b1d
74 #define EM_MAGIC_V3 0x76f78b1e
75 #define EM_MAGIC_V4 0x76f78b1f
76
77 #ifndef NDEBUG
78 #define ASSERT(x) \
79 if (!(x)) { \
80 cerr << "assertion at file " << __FILE__ << " line " << __LINE__ << " failed" << endl; \
81 throw logic_error("assertion failed"); \
82 }
83 #else
84 #define ASSERT(x)
85 #endif
86
87 using namespace std;
88 using namespace boost;
89 using namespace logging;
90 using namespace idbdatafile;
91
92 namespace
93 {
94 unsigned ExtentSize = 0; // dmc-need to deprecate
95 unsigned ExtentRows = 0;
96 unsigned filesPerColumnPartition = 0;
97 unsigned extentsPerSegmentFile = 0;
98
99 // Increment CP sequence (version) number, and wrap-around when applicable
incSeqNum(int32_t & seqNum)100 inline void incSeqNum(int32_t& seqNum)
101 {
102 seqNum++;
103
104 if (seqNum > EM_MAX_SEQNUM)
105 seqNum = 0;
106 }
107
108 }
109
110 namespace BRM
111 {
112
113 //------------------------------------------------------------------------------
114 // EMCasualPartition_struct methods
115 //------------------------------------------------------------------------------
116
EMCasualPartition_struct()117 EMCasualPartition_struct::EMCasualPartition_struct()
118 {
119 lo_val = numeric_limits<int64_t>::min();
120 hi_val = numeric_limits<int64_t>::max();
121 sequenceNum = 0;
122 isValid = CP_INVALID;
123 }
124
EMCasualPartition_struct(const int64_t lo,const int64_t hi,const int32_t seqNum)125 EMCasualPartition_struct::EMCasualPartition_struct(const int64_t lo, const int64_t hi, const int32_t seqNum)
126 {
127 lo_val = lo;
128 hi_val = hi;
129 sequenceNum = seqNum;
130 isValid = CP_INVALID;
131 }
132
EMCasualPartition_struct(const EMCasualPartition_struct & em)133 EMCasualPartition_struct::EMCasualPartition_struct(const EMCasualPartition_struct& em)
134 {
135 lo_val = em.lo_val;
136 hi_val = em.hi_val;
137 sequenceNum = em.sequenceNum;
138 isValid = em.isValid;
139 }
140
operator =(const EMCasualPartition_struct & em)141 EMCasualPartition_struct& EMCasualPartition_struct::operator= (const EMCasualPartition_struct& em)
142 {
143 lo_val = em.lo_val;
144 hi_val = em.hi_val;
145 sequenceNum = em.sequenceNum;
146 isValid = em.isValid;
147 return *this;
148 }
149
150 //------------------------------------------------------------------------------
151 // Version 4 EmEntry methods
152 //------------------------------------------------------------------------------
153
EMEntry()154 EMEntry::EMEntry()
155 {
156 fileID = 0;
157 blockOffset = 0;
158 HWM = 0;
159 partitionNum = 0;
160 segmentNum = 0;
161 dbRoot = 0;
162 colWid = 0;
163 status = 0;
164 }
165
EMEntry(const EMEntry & e)166 EMEntry::EMEntry(const EMEntry& e)
167 {
168 range.start = e.range.start;
169 range.size = e.range.size;
170 fileID = e.fileID;
171 blockOffset = e.blockOffset;
172 HWM = e.HWM;
173 partition = e.partition;
174 partitionNum = e.partitionNum;
175 segmentNum = e.segmentNum;
176 dbRoot = e.dbRoot;
177 colWid = e.colWid;
178 status = e.status;
179 }
180
operator =(const EMEntry & e)181 EMEntry& EMEntry::operator= (const EMEntry& e)
182 {
183 range.start = e.range.start;
184 range.size = e.range.size;
185 fileID = e.fileID;
186 blockOffset = e.blockOffset;
187 HWM = e.HWM;
188 partition = e.partition;
189 partitionNum = e.partitionNum;
190 segmentNum = e.segmentNum;
191 colWid = e.colWid;
192 dbRoot = e.dbRoot;
193 status = e.status;
194 return *this;
195 }
196
operator <(const EMEntry & e) const197 bool EMEntry::operator< (const EMEntry& e) const
198 {
199 if (range.start < e.range.start)
200 return true;
201
202 return false;
203 }
204
205 /*static*/
206 boost::mutex ExtentMapImpl::fInstanceMutex;
207 boost::mutex ExtentMap::mutex;
208
209 /*static*/
210 ExtentMapImpl* ExtentMapImpl::fInstance = 0;
211
212 /*static*/
makeExtentMapImpl(unsigned key,off_t size,bool readOnly)213 ExtentMapImpl* ExtentMapImpl::makeExtentMapImpl(unsigned key, off_t size, bool readOnly)
214 {
215 boost::mutex::scoped_lock lk(fInstanceMutex);
216
217 if (fInstance)
218 {
219 if (key != fInstance->fExtMap.key())
220 {
221 BRMShmImpl newShm(key, 0);
222 fInstance->swapout(newShm);
223 }
224
225 ASSERT(key == fInstance->fExtMap.key());
226 return fInstance;
227 }
228
229 fInstance = new ExtentMapImpl(key, size, readOnly);
230
231 return fInstance;
232 }
233
ExtentMapImpl(unsigned key,off_t size,bool readOnly)234 ExtentMapImpl::ExtentMapImpl(unsigned key, off_t size, bool readOnly) :
235 fExtMap(key, size, readOnly)
236 {
237 }
238
239 /*static*/
240 boost::mutex FreeListImpl::fInstanceMutex;
241
242 /*static*/
243 FreeListImpl* FreeListImpl::fInstance = 0;
244
245 /*static*/
makeFreeListImpl(unsigned key,off_t size,bool readOnly)246 FreeListImpl* FreeListImpl::makeFreeListImpl(unsigned key, off_t size, bool readOnly)
247 {
248 boost::mutex::scoped_lock lk(fInstanceMutex);
249
250 if (fInstance)
251 {
252 if (key != fInstance->fFreeList.key())
253 {
254 BRMShmImpl newShm(key, 0);
255 fInstance->swapout(newShm);
256 }
257
258 ASSERT(key == fInstance->fFreeList.key());
259 return fInstance;
260 }
261
262 fInstance = new FreeListImpl(key, size, readOnly);
263
264 return fInstance;
265 }
266
FreeListImpl(unsigned key,off_t size,bool readOnly)267 FreeListImpl::FreeListImpl(unsigned key, off_t size, bool readOnly) :
268 fFreeList(key, size, readOnly)
269 {
270 }
271
ExtentMap()272 ExtentMap::ExtentMap()
273 {
274 fExtentMap = NULL;
275 fFreeList = NULL;
276 fCurrentEMShmkey = -1;
277 fCurrentFLShmkey = -1;
278 fEMShminfo = NULL;
279 fFLShminfo = NULL;
280 r_only = false;
281 flLocked = false;
282 emLocked = false;
283 fPExtMapImpl = 0;
284 fPFreeListImpl = 0;
285
286 #ifdef BRM_INFO
287 fDebug = ("Y" == config::Config::makeConfig()->getConfig("DBRM", "Debug"));
288 #endif
289 }
290
~ExtentMap()291 ExtentMap::~ExtentMap()
292 {
293 PmDbRootMap_t::iterator iter = fPmDbRootMap.begin();
294 PmDbRootMap_t::iterator end = fPmDbRootMap.end();
295
296 while (iter != end)
297 {
298 delete iter->second;
299 iter->second = 0;
300 ++iter;
301 }
302
303 fPmDbRootMap.clear();
304 }
305
306 // Casual Partioning support
307 //
308
309 /**
310 * @brief mark the max/min values of an extent as invalid
311 *
312 * mark the extent containing the lbid as invalid and
313 * increment the sequenceNum value. If the lbid is found
314 * in the extent map a 0 is returned otherwise a 1.
315 *
316 **/
317
_markInvalid(const LBID_t lbid,const execplan::CalpontSystemCatalog::ColDataType colDataType)318 int ExtentMap::_markInvalid(const LBID_t lbid, const execplan::CalpontSystemCatalog::ColDataType colDataType)
319 {
320 int entries;
321 int i;
322 LBID_t lastBlock;
323
324 entries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
325
326 for (i = 0; i < entries; i++)
327 {
328 lastBlock = fExtentMap[i].range.start +
329 (static_cast<LBID_t>(fExtentMap[i].range.size) * 1024) - 1;
330
331 if (fExtentMap[i].range.size != 0)
332 {
333 if (lbid >= fExtentMap[i].range.start && lbid <= lastBlock)
334 {
335 makeUndoRecord(&fExtentMap[i], sizeof(struct EMEntry));
336 fExtentMap[i].partition.cprange.isValid = CP_UPDATING;
337
338 if (isUnsigned(colDataType))
339 {
340 fExtentMap[i].partition.cprange.lo_val = numeric_limits<uint64_t>::max();
341 fExtentMap[i].partition.cprange.hi_val = 0;
342 }
343 else
344 {
345 fExtentMap[i].partition.cprange.lo_val = numeric_limits<int64_t>::max();
346 fExtentMap[i].partition.cprange.hi_val = numeric_limits<int64_t>::min();
347 }
348
349 incSeqNum(fExtentMap[i].partition.cprange.sequenceNum);
350 #ifdef BRM_DEBUG
351 ostringstream os;
352 os << "ExtentMap::_markInvalid(): casual partitioning update: firstLBID=" <<
353 fExtentMap[i].range.start << " lastLBID=" << fExtentMap[i].range.start +
354 fExtentMap[i].range.size * 1024 - 1 << " OID=" << fExtentMap[i].fileID <<
355 " min=" << fExtentMap[i].partition.cprange.lo_val <<
356 " max=" << fExtentMap[i].partition.cprange.hi_val <<
357 "seq=" << fExtentMap[i].partition.cprange.sequenceNum;
358 log(os.str(), logging::LOG_TYPE_DEBUG);
359 #endif
360 return 0;
361 }
362 }
363 }
364
365 throw logic_error("ExtentMap::markInvalid(): lbid isn't allocated");
366 }
367
markInvalid(const LBID_t lbid,const execplan::CalpontSystemCatalog::ColDataType colDataType)368 int ExtentMap::markInvalid(const LBID_t lbid,
369 const execplan::CalpontSystemCatalog::ColDataType colDataType)
370 {
371 #ifdef BRM_DEBUG
372
373 if (lbid < 0)
374 throw invalid_argument("ExtentMap::markInvalid(): lbid must be >= 0");
375
376 #endif
377 #ifdef BRM_INFO
378
379 if (fDebug)
380 {
381 TRACER_WRITELATER("_markInvalid");
382 TRACER_ADDINPUT(lbid);
383 TRACER_WRITE;
384 }
385
386 #endif
387
388 #ifdef BRM_DEBUG
389 ostringstream os;
390 os << "ExtentMap::markInvalid(" << lbid << "," << colDataType << ")";
391 log(os.str(), logging::LOG_TYPE_DEBUG);
392 #endif
393
394 grabEMEntryTable(WRITE);
395 return _markInvalid(lbid, colDataType);
396 }
397
398 /**
399 * @brief calls markInvalid(LBID_t lbid) for each extent containing any lbid in vector<LBID_t>& lbids
400 *
401 **/
402
markInvalid(const vector<LBID_t> & lbids,const vector<execplan::CalpontSystemCatalog::ColDataType> & colDataTypes)403 int ExtentMap::markInvalid(const vector<LBID_t>& lbids,
404 const vector<execplan::CalpontSystemCatalog::ColDataType>& colDataTypes)
405 {
406 uint32_t i, size = lbids.size();
407
408 #ifdef BRM_DEBUG
409
410 for (i = 0; i < size; ++i)
411 if (lbids[i] < 0)
412 throw invalid_argument("ExtentMap::markInvalid(vector): all lbids must be >= 0");
413
414 #endif
415 #ifdef BRM_INFO
416
417 if (fDebug)
418 {
419 TRACER_WRITELATER("_markInvalid");
420 TRACER_ADDINPUT(size);
421 TRACER_WRITE;
422 }
423
424 #endif
425
426 grabEMEntryTable(WRITE);
427
428 // XXXPAT: what's the proper return code when one and only one fails?
429 for (i = 0; i < size; ++i)
430 {
431 #ifdef BRM_DEBUG
432 ostringstream os;
433 os << "ExtentMap::markInvalid() lbids[" << i << "]=" << lbids[i] <<
434 " colDataTypes[" << i << "]=" << colDataTypes[i];
435 log(os.str(), logging::LOG_TYPE_DEBUG);
436 #endif
437
438 try
439 {
440 _markInvalid(lbids[i], colDataTypes[i]);
441 }
442 catch (std::exception& e)
443 {
444 cerr << "ExtentMap::markInvalid(vector): warning! lbid " << lbids[i] <<
445 " caused " << e.what() << endl;
446 }
447 }
448
449 return 0;
450 }
451
452 /**
453 * @brief set the max/min values for the extent if the seqNum matches the extents sequenceNum
454 *
455 * reset the lbid's hi_val to max and lo_val to min
456 * the seqNum matches the ExtentMap.sequenceNum. Then increments
457 * the current sequenceNum value by 1. If the sequenceNum does not
458 * match the seqNum value do not update the lbid's max/min values
459 * or increment the sequenceNum value and return a -1.
460
461 **/
462
setMaxMin(const LBID_t lbid,const int64_t max,const int64_t min,const int32_t seqNum,bool firstNode)463 int ExtentMap::setMaxMin(const LBID_t lbid,
464 const int64_t max,
465 const int64_t min,
466 const int32_t seqNum,
467 bool firstNode)
468 {
469 #ifdef BRM_INFO
470
471 if (fDebug)
472 {
473 TRACER_WRITELATER("updateMaxMin");
474 TRACER_ADDINPUT(lbid);
475 TRACER_ADDINPUT(max);
476 TRACER_ADDINPUT(min);
477 TRACER_ADDINPUT(seqNum);
478 TRACER_WRITE;
479 }
480
481 #endif
482 int entries;
483 int i;
484 LBID_t lastBlock;
485 int32_t curSequence;
486
487 #ifdef BRM_DEBUG
488
489 if (lbid < 0)
490 throw invalid_argument("ExtentMap::setMaxMin(): lbid must be >= 0");
491
492 #endif
493
494 grabEMEntryTable(WRITE);
495 entries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
496
497 for (i = 0; i < entries; i++)
498 {
499 if (fExtentMap[i].range.size != 0)
500 {
501 lastBlock = fExtentMap[i].range.start +
502 (static_cast<LBID_t>(fExtentMap[i].range.size) * 1024) - 1;
503 curSequence = fExtentMap[i].partition.cprange.sequenceNum;
504
505 if (lbid >= fExtentMap[i].range.start && lbid <= lastBlock)
506 {
507 #ifdef BRM_DEBUG
508
509 if (firstNode)
510 {
511 ostringstream os;
512 os << "ExtentMap::setMaxMin(): casual partitioning update: firstLBID=" <<
513 fExtentMap[i].range.start << " lastLBID=" << fExtentMap[i].range.start +
514 fExtentMap[i].range.size * 1024 - 1 << " OID=" << fExtentMap[i].fileID <<
515 " min=" << min << " max=" << max << "seq=" << seqNum;
516 log(os.str(), logging::LOG_TYPE_DEBUG);
517 }
518
519 #endif
520
521 if (curSequence == seqNum)
522 {
523 makeUndoRecord(&fExtentMap[i], sizeof(struct EMEntry));
524 fExtentMap[i].partition.cprange.hi_val = max;
525 fExtentMap[i].partition.cprange.lo_val = min;
526 fExtentMap[i].partition.cprange.isValid = CP_VALID;
527 incSeqNum(fExtentMap[i].partition.cprange.sequenceNum);
528 return 0;
529 }
530 //special val to indicate a reset--used by editem -c.
531 //Also used by COMMIT and ROLLBACK to invalidate CP.
532 else if (seqNum == -1)
533 {
534 makeUndoRecord(&fExtentMap[i], sizeof(struct EMEntry));
535 // We set hi_val and lo_val to correct values for signed or unsigned
536 // during the markinvalid step, which sets the invalid variable to CP_UPDATING.
537 // During this step (seqNum == -1), the min and max passed in are not reliable
538 // and should not be used.
539 fExtentMap[i].partition.cprange.isValid = CP_INVALID;
540 incSeqNum(fExtentMap[i].partition.cprange.sequenceNum);
541 return 0;
542 }
543 else
544 {
545 return 0;
546 }
547 }
548 }
549 }
550
551 if (emLocked)
552 releaseEMEntryTable(WRITE);
553
554 throw logic_error("ExtentMap::setMaxMin(): lbid isn't allocated");
555 // return -1;
556 }
557
558 // @bug 1970. Added updateExtentsMaxMin function.
559 // @note - The key passed in the map must the the first LBID in the extent.
setExtentsMaxMin(const CPMaxMinMap_t & cpMap,bool firstNode,bool useLock)560 void ExtentMap::setExtentsMaxMin(const CPMaxMinMap_t& cpMap, bool firstNode, bool useLock)
561 {
562 CPMaxMinMap_t::const_iterator it;
563
564 #ifdef BRM_DEBUG
565 log("ExtentMap::setExtentsMaxMin()", logging::LOG_TYPE_DEBUG);
566
567 for (it = cpMap.begin(); it != cpMap.end(); ++it)
568 {
569 ostringstream os;
570 os << "FirstLBID=" << it->first <<
571 " min=" << it->second.min <<
572 " max=" << it->second.max <<
573 " seq=" << it->second.seqNum;
574 log(os.str(), logging::LOG_TYPE_DEBUG);
575 }
576
577 #endif
578
579
580 #ifdef BRM_INFO
581
582 if (fDebug)
583 {
584 TRACER_WRITELATER("setExtentsMaxMin");
585
586 for (it = cpMap.begin(); it != cpMap.end(); ++it)
587 {
588 TRACER_ADDINPUT((*it).first);
589 TRACER_ADDINPUT((*it).second.max);
590 TRACER_ADDINPUT((*it).second.min);
591 TRACER_ADDINPUT((*it).second.seqNum);
592 TRACER_WRITE;
593 }
594 }
595
596 #endif
597 int entries;
598 int i;
599 int32_t curSequence;
600 const int32_t extentsToUpdate = cpMap.size();
601 int32_t extentsUpdated = 0;
602
603 #ifdef BRM_DEBUG
604
605 if (extentsToUpdate <= 0)
606 throw invalid_argument("ExtentMap::setExtentsMaxMin(): cpMap must be populated");
607
608 #endif
609
610 if (useLock)
611 grabEMEntryTable(WRITE);
612
613 entries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
614
615 for (i = 0; i < entries; i++)
616 {
617 if (fExtentMap[i].range.size != 0)
618 {
619 it = cpMap.find(fExtentMap[i].range.start);
620
621 if (it != cpMap.end())
622 {
623 curSequence = fExtentMap[i].partition.cprange.sequenceNum;
624
625 if (curSequence == it->second.seqNum &&
626 fExtentMap[i].partition.cprange.isValid == CP_INVALID)
627 {
628 makeUndoRecord(&fExtentMap[i], sizeof(struct EMEntry));
629 fExtentMap[i].partition.cprange.hi_val = it->second.max;
630 fExtentMap[i].partition.cprange.lo_val = it->second.min;
631 fExtentMap[i].partition.cprange.isValid = CP_VALID;
632 incSeqNum(fExtentMap[i].partition.cprange.sequenceNum);
633 extentsUpdated++;
634 #ifdef BRM_DEBUG
635
636 if (firstNode)
637 {
638 ostringstream os;
639 os << "ExtentMap::setExtentsMaxMin(): casual partitioning update: firstLBID=" <<
640 fExtentMap[i].range.start << " lastLBID=" << fExtentMap[i].range.start +
641 fExtentMap[i].range.size * 1024 - 1 << " OID=" << fExtentMap[i].fileID <<
642 " min=" << it->second.min << " max=" <<
643 it->second.max << " seq=" <<
644 it->second.seqNum;
645 log(os.str(), logging::LOG_TYPE_DEBUG);
646 }
647
648 #endif
649 }
650 //special val to indicate a reset -- ignore the min/max
651 else if (it->second.seqNum == -1)
652 {
653 makeUndoRecord(&fExtentMap[i], sizeof(struct EMEntry));
654 // We set hi_val and lo_val to correct values for signed or unsigned
655 // during the markinvalid step, which sets the invalid variable to CP_UPDATING.
656 // During this step (seqNum == -1), the min and max passed in are not reliable
657 // and should not be used.
658 fExtentMap[i].partition.cprange.isValid = CP_INVALID;
659 incSeqNum(fExtentMap[i].partition.cprange.sequenceNum);
660 extentsUpdated++;
661 }
662 //special val to indicate a reset -- assign the min/max
663 else if (it->second.seqNum == -2)
664 {
665 makeUndoRecord(&fExtentMap[i], sizeof(struct EMEntry));
666 fExtentMap[i].partition.cprange.hi_val = it->second.max;
667 fExtentMap[i].partition.cprange.lo_val = it->second.min;
668 fExtentMap[i].partition.cprange.isValid = CP_INVALID;
669 incSeqNum(fExtentMap[i].partition.cprange.sequenceNum);
670 extentsUpdated++;
671 }
672 // else sequence has changed since start of the query. Don't update the EM entry.
673 else
674 {
675 extentsUpdated++;
676 }
677
678 if (extentsUpdated == extentsToUpdate)
679 {
680 return;
681 }
682 }
683 }
684 }
685
686 throw logic_error("ExtentMap::setExtentsMaxMin(): lbid isn't allocated");
687 }
688
689 //------------------------------------------------------------------------------
690 // @bug 1970. Added mergeExtentsMaxMin to merge CP info for list of extents.
691 // @note - The key passed in the map must the starting LBID in the extent.
692 // Used by cpimport to update extentmap casual partition min/max.
693 // NULL or empty values should not be passed in as min/max values.
694 // seqNum in the input struct is not currently used.
695 //
696 // Note that DML calls markInvalid() to flag an extent as CP_UPDATING and incre-
697 // ments the sequence number prior to any change, and then marks the extent as
698 // CP_INVALID at transaction's end.
699 // Since cpimport locks the entire table prior to making any changes, it is
700 // assumed that the state of an extent will not be changed (by anyone else)
701 // during an import; so cpimport does not employ the intermediate CP_UPDATING
702 // state that DML uses. cpimport just waits till the end of the job and incre-
703 // ments the sequence number and changes the state to CP_INVALID at that time.
704 // We may want/need to reconsider this at some point.
705 //------------------------------------------------------------------------------
mergeExtentsMaxMin(CPMaxMinMergeMap_t & cpMap,bool useLock)706 void ExtentMap::mergeExtentsMaxMin(CPMaxMinMergeMap_t& cpMap, bool useLock)
707 {
708 CPMaxMinMergeMap_t::const_iterator it;
709
710 #ifdef BRM_DEBUG
711 log("ExtentMap::mergeExtentsMaxMin()", logging::LOG_TYPE_DEBUG);
712
713 for (it = cpMap.begin(); it != cpMap.end(); ++it)
714 {
715 ostringstream os;
716 os << "FirstLBID=" << it->first <<
717 " min=" << it->second.min <<
718 " max=" << it->second.max <<
719 " seq=" << it->second.seqNum <<
720 " typ: " << (*it).second.type <<
721 " new: " << (*it).second.newExtent;
722 log(os.str(), logging::LOG_TYPE_DEBUG);
723 }
724
725 #endif
726
727 #ifdef BRM_INFO
728
729 if (fDebug)
730 {
731 TRACER_WRITENOW("mergeExtentsMaxMin");
732 unsigned int count = 1;
733
734 for (it = cpMap.begin(); it != cpMap.end(); ++it)
735 {
736 ostringstream oss;
737 oss << " " << count <<
738 ". LBID: " << (*it).first <<
739 "; max: " << (*it).second.max <<
740 "; min: " << (*it).second.min <<
741 "; seq: " << (*it).second.seqNum <<
742 "; typ: " << (*it).second.type <<
743 "; new: " << (*it).second.newExtent;
744 TRACER_WRITEDIRECT(oss.str());
745 count++;
746 }
747 }
748
749 #endif
750
751 const int32_t extentsToMerge = cpMap.size();
752 int32_t extentsMerged = 0;
753
754 #ifdef BRM_DEBUG
755
756 if (extentsToMerge <= 0)
757 throw invalid_argument("ExtentMap::mergeExtentsMaxMin(): "
758 "cpMap must be populated");
759
760 #endif
761
762 if (useLock)
763 grabEMEntryTable(WRITE);
764
765 int entries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
766
767 for (int i = 0; i < entries; i++) // loop through all extents
768 {
769 if (fExtentMap[i].range.size != 0) // find eligible extents
770 {
771 it = cpMap.find(fExtentMap[i].range.start);
772
773 if (it != cpMap.end())
774 {
775 #ifdef BRM_DEBUG
776 ostringstream os;
777 os << "ExtentMap::mergeExtentsMaxMin(): casual partitioning update: firstLBID=" <<
778 fExtentMap[i].range.start << " lastLBID=" << fExtentMap[i].range.start +
779 fExtentMap[i].range.size * 1024 - 1 << " OID=" << fExtentMap[i].fileID <<
780 " hi_val=" << fExtentMap[i].partition.cprange.hi_val <<
781 " lo_val=" << fExtentMap[i].partition.cprange.lo_val <<
782 " min=" << it->second.min << " max=" << it->second.max <<
783 " seq=" << it->second.seqNum;
784 log(os.str(), logging::LOG_TYPE_DEBUG);
785 #endif
786
787 switch (fExtentMap[i].partition.cprange.isValid)
788 {
789 // Merge input min/max with current min/max
790 case CP_VALID:
791 {
792 if (!isValidCPRange( it->second.max,
793 it->second.min,
794 it->second.type ))
795 {
796 break;
797 }
798
799 makeUndoRecord(&fExtentMap[i], sizeof(struct EMEntry));
800
801 // We check the validity of the current min/max,
802 // because isValid could be CP_VALID for an extent
803 // having all NULL values, in which case the current
804 // min/max needs to be set instead of merged.
805
806 if (isValidCPRange(
807 fExtentMap[i].partition.cprange.hi_val,
808 fExtentMap[i].partition.cprange.lo_val,
809 it->second.type))
810 {
811 // Swap byte order to do binary string comparison
812 if (isCharType(it->second.type))
813 {
814 int64_t newMinVal =
815 static_cast<int64_t>( uint64ToStr(
816 static_cast<uint64_t>(it->second.min)));
817 int64_t newMaxVal =
818 static_cast<int64_t>( uint64ToStr(
819 static_cast<uint64_t>(it->second.max)));
820 int64_t oldMinVal =
821 static_cast<int64_t>( uint64ToStr(
822 static_cast<uint64_t>(
823 fExtentMap[i].partition.cprange.lo_val)) );
824 int64_t oldMaxVal =
825 static_cast<int64_t>( uint64ToStr(
826 static_cast<uint64_t>(
827 fExtentMap[i].partition.cprange.hi_val)) );
828
829 if (newMinVal < oldMinVal)
830 fExtentMap[i].partition.cprange.lo_val =
831 it->second.min;
832
833 if (newMaxVal > oldMaxVal)
834 fExtentMap[i].partition.cprange.hi_val =
835 it->second.max;
836 }
837 else if (isUnsigned(it->second.type))
838 {
839 if (static_cast<uint64_t>(it->second.min) <
840 static_cast<uint64_t>(fExtentMap[i].partition.cprange.lo_val))
841 {
842 fExtentMap[i].partition.cprange.lo_val =
843 it->second.min;
844 }
845
846 if (static_cast<uint64_t>(it->second.max) >
847 static_cast<uint64_t>(fExtentMap[i].partition.cprange.hi_val))
848 {
849 fExtentMap[i].partition.cprange.hi_val =
850 it->second.max;
851 }
852 }
853 else
854 {
855 if (it->second.min <
856 fExtentMap[i].partition.cprange.lo_val)
857 fExtentMap[i].partition.cprange.lo_val =
858 it->second.min;
859
860 if (it->second.max >
861 fExtentMap[i].partition.cprange.hi_val)
862 fExtentMap[i].partition.cprange.hi_val =
863 it->second.max;
864 }
865 }
866 else
867 {
868 fExtentMap[i].partition.cprange.lo_val =
869 it->second.min;
870 fExtentMap[i].partition.cprange.hi_val =
871 it->second.max;
872 }
873
874 incSeqNum(fExtentMap[i].partition.cprange.sequenceNum);
875
876 break;
877 }
878
879 // DML is updating; just increment seqnum.
880 // This case is here for completeness. Table lock should
881 // prevent this state from occurring (see notes at top of
882 // this function)
883 case CP_UPDATING:
884 {
885 makeUndoRecord(&fExtentMap[i], sizeof(struct EMEntry));
886 incSeqNum(fExtentMap[i].partition.cprange.sequenceNum);
887
888 break;
889 }
890
891 // Reset min/max to new min/max only "if" we can treat this
892 // as a new extent, else leave the extent marked as INVALID
893 case CP_INVALID:
894 default:
895 {
896 makeUndoRecord(&fExtentMap[i], sizeof(struct EMEntry));
897
898 if (it->second.newExtent)
899 {
900 if (isValidCPRange( it->second.max,
901 it->second.min,
902 it->second.type ))
903 {
904 fExtentMap[i].partition.cprange.lo_val =
905 it->second.min;
906 fExtentMap[i].partition.cprange.hi_val =
907 it->second.max;
908 }
909
910 // Even if invalid range; we set state to CP_VALID,
911 // because the extent is valid, it is just empty.
912 fExtentMap[i].partition.cprange.isValid = CP_VALID;
913 }
914
915 incSeqNum(fExtentMap[i].partition.cprange.sequenceNum);
916 break;
917 }
918 } // switch on isValid state
919
920 extentsMerged++;
921
922 if (extentsMerged == extentsToMerge)
923 {
924 return; // Leave when all extents in map are matched
925 }
926
927 // Deleting objects from map, may speed up successive searches
928 cpMap.erase( it );
929
930 } // found a matching extent in the Map
931 } // extent map range size != 0
932 } // end of loop through extent map
933
934 throw logic_error("ExtentMap::mergeExtentsMaxMin(): lbid not found");
935 }
936
937 //------------------------------------------------------------------------------
938 // Use this function to see if the range is a valid min/max range or not.
939 // Range is considered invalid if min or max, are NULL (min()), or EMPTY
940 // (min()+1). For unsigned types NULL is max() and EMPTY is max()-1.
941 //------------------------------------------------------------------------------
isValidCPRange(int64_t max,int64_t min,execplan::CalpontSystemCatalog::ColDataType type) const942 bool ExtentMap::isValidCPRange(int64_t max, int64_t min, execplan::CalpontSystemCatalog::ColDataType type) const
943 {
944 if (isUnsigned(type))
945 {
946 if ( (static_cast<uint64_t>(min) >= (numeric_limits<uint64_t>::max() - 1)) ||
947 (static_cast<uint64_t>(max) >= (numeric_limits<uint64_t>::max() - 1)) )
948 {
949 return false;
950 }
951 }
952 else
953 {
954 if ( (min <= (numeric_limits<int64_t>::min() + 1)) ||
955 (max <= (numeric_limits<int64_t>::min() + 1)) )
956 {
957 return false;
958 }
959 }
960
961 return true;
962 }
963
964 /**
965 * @brief retrieve the hi_val and lo_val or sequenceNum of the extent containing the LBID lbid.
966 *
967 * For the extent containing the LBID lbid, return the max/min values if the extent range values
968 * are valid and a -1 in the seqNum parameter. If the range values are flaged as invalid
969 * return the sequenceNum of the extent and the max/min values as -1.
970 **/
971
getMaxMin(const LBID_t lbid,int64_t & max,int64_t & min,int32_t & seqNum)972 int ExtentMap::getMaxMin(const LBID_t lbid,
973 int64_t& max,
974 int64_t& min,
975 int32_t& seqNum)
976 {
977 #ifdef BRM_INFO
978
979 if (fDebug)
980 {
981 TRACER_WRITELATER("getMaxMin");
982 TRACER_ADDINPUT(lbid);
983 TRACER_ADDOUTPUT(max);
984 TRACER_ADDOUTPUT(min);
985 TRACER_ADDOUTPUT(seqNum);
986 TRACER_WRITE;
987 }
988
989 #endif
990 max = numeric_limits<uint64_t>::max();
991 min = 0;
992 seqNum *= (-1);
993 int entries;
994 int i;
995 LBID_t lastBlock;
996 int isValid = CP_INVALID;
997
998 #ifdef BRM_DEBUG
999
1000 if (lbid < 0)
1001 throw invalid_argument("ExtentMap::getMaxMin(): lbid must be >= 0");
1002
1003 #endif
1004
1005 grabEMEntryTable(READ);
1006 entries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
1007
1008 for (i = 0; i < entries; i++)
1009 {
1010 if (fExtentMap[i].range.size != 0)
1011 {
1012 lastBlock = fExtentMap[i].range.start +
1013 (static_cast<LBID_t>(fExtentMap[i].range.size) * 1024) - 1;
1014
1015 if (lbid >= fExtentMap[i].range.start && lbid <= lastBlock)
1016 {
1017 max = fExtentMap[i].partition.cprange.hi_val;
1018 min = fExtentMap[i].partition.cprange.lo_val;
1019 seqNum = fExtentMap[i].partition.cprange.sequenceNum;
1020 isValid = fExtentMap[i].partition.cprange.isValid;
1021 releaseEMEntryTable(READ);
1022 return isValid;
1023 }
1024 }
1025 }
1026
1027 releaseEMEntryTable(READ);
1028 throw logic_error("ExtentMap::getMaxMin(): that lbid isn't allocated");
1029 // return -1;
1030 }
1031
1032 /* Removes a range from the freelist. Used by load() */
reserveLBIDRange(LBID_t start,uint8_t size)1033 void ExtentMap::reserveLBIDRange(LBID_t start, uint8_t size)
1034 {
1035 int i;
1036 int flEntries = fFLShminfo->allocdSize / sizeof(InlineLBIDRange);
1037 LBID_t lastLBID = start + (size * 1024) - 1;
1038 int32_t freeIndex = -1;
1039
1040 /* Find a range the request intersects. There should be one and only one. */
1041 for (i = 0; i < flEntries; i++)
1042 {
1043 LBID_t eLastLBID;
1044
1045 // while scanning, grab the first free slot
1046 if (fFreeList[i].size == 0)
1047 {
1048 if (freeIndex == -1)
1049 freeIndex = i;
1050
1051 continue;
1052 }
1053
1054 eLastLBID = fFreeList[i].start + (((int64_t) fFreeList[i].size) * 1024) - 1;
1055
1056 /* if it's at the front... */
1057 if (start == fFreeList[i].start)
1058 {
1059 /* if the request is larger than the freelist entry -> implies an extent
1060 * overlap. This is debugging code. */
1061 //idbassert(size > fFreeList[i].size);
1062 makeUndoRecord(&fFreeList[i], sizeof(InlineLBIDRange));
1063 fFreeList[i].start += size * 1024;
1064 fFreeList[i].size -= size;
1065
1066 if (fFreeList[i].size == 0)
1067 {
1068 makeUndoRecord(fFLShminfo, sizeof(MSTEntry));
1069 fFLShminfo->currentSize -= sizeof(InlineLBIDRange);
1070 }
1071
1072 break;
1073 }
1074 /* if it's at the back... */
1075 else if (eLastLBID == lastLBID)
1076 {
1077 makeUndoRecord(&fFreeList[i], sizeof(InlineLBIDRange));
1078 fFreeList[i].size -= size;
1079
1080 if (fFreeList[i].size == 0)
1081 {
1082 makeUndoRecord(fFLShminfo, sizeof(MSTEntry));
1083 fFLShminfo->currentSize -= sizeof(InlineLBIDRange);
1084 }
1085
1086 break;
1087 /* This entry won't be the same size as the request or the first
1088 * clause would have run instead.
1089 */
1090 }
1091 /* if it's in the middle... */
1092 /* break it into two elements */
1093 else if (fFreeList[i].start < start && eLastLBID > lastLBID)
1094 {
1095 if (freeIndex == -1)
1096 {
1097 if (fFLShminfo->currentSize == fFLShminfo->allocdSize)
1098 {
1099 growFLShmseg();
1100 freeIndex = flEntries;
1101 }
1102 else
1103 for (freeIndex = i + 1; freeIndex < flEntries; freeIndex++)
1104 if (fFreeList[freeIndex].size == 0)
1105 break;
1106
1107 #ifdef BRM_DEBUG
1108 idbassert(nextIndex < flEntries);
1109 #endif
1110 }
1111
1112 makeUndoRecord(&fFreeList[i], sizeof(InlineLBIDRange));
1113 makeUndoRecord(&fFreeList[freeIndex], sizeof(InlineLBIDRange));
1114 makeUndoRecord(fFLShminfo, sizeof(MSTEntry));
1115 fFreeList[i].size = (start - fFreeList[i].start) / 1024;
1116 fFreeList[freeIndex].start = start + (size * 1024);
1117 fFreeList[freeIndex].size = (eLastLBID - lastLBID) / 1024;
1118 fFLShminfo->currentSize += sizeof(InlineLBIDRange);
1119 break;
1120 }
1121 }
1122 }
1123
1124 /*
1125 The file layout looks like this:
1126
1127 EM Magic (32-bits)
1128 number of EM entries (32-bits)
1129 number of FL entries (32-bits)
1130 EMEntry
1131 ... (* numEM)
1132 struct InlineLBIDRange
1133 ... (* numFL)
1134 */
1135
1136
loadVersion4(IDBDataFile * in)1137 void ExtentMap::loadVersion4(IDBDataFile* in)
1138 {
1139 int emNumElements = 0, flNumElements = 0;
1140
1141 int nbytes = 0;
1142 nbytes += in->read((char*) &emNumElements, sizeof(int));
1143 nbytes += in->read((char*) &flNumElements, sizeof(int));
1144 idbassert(emNumElements > 0);
1145
1146 if ((size_t) nbytes != sizeof(int) + sizeof(int))
1147 {
1148 log_errno("ExtentMap::loadVersion4(): read ");
1149 throw runtime_error("ExtentMap::loadVersion4(): read failed. Check the error log.");
1150 }
1151
1152 void *fExtentMapPtr = static_cast<void*>(fExtentMap);
1153 memset(fExtentMapPtr, 0, fEMShminfo->allocdSize);
1154 fEMShminfo->currentSize = 0;
1155
1156 // init the free list
1157 memset(fFreeList, 0, fFLShminfo->allocdSize);
1158 fFreeList[0].size = (1 << 26); // 2^36 LBIDs
1159 fFLShminfo->currentSize = sizeof(InlineLBIDRange);
1160
1161 // @Bug 3498
1162 // Calculate how big an extent map we're going to need and allocate it in one call
1163 if ((fEMShminfo->allocdSize / sizeof(EMEntry)) < (unsigned)emNumElements)
1164 {
1165 size_t nrows = emNumElements;
1166
1167 //Round up to the nearest EM_INCREMENT_ROWS
1168 if ((nrows % EM_INCREMENT_ROWS) != 0)
1169 {
1170 nrows /= EM_INCREMENT_ROWS;
1171 nrows++;
1172 nrows *= EM_INCREMENT_ROWS;
1173 }
1174
1175 growEMShmseg(nrows);
1176 }
1177
1178 size_t progress = 0, writeSize = emNumElements * sizeof(EMEntry);
1179 int err;
1180 char *writePos = (char *) fExtentMap;
1181 while (progress < writeSize)
1182 {
1183 err = in->read(writePos + progress, writeSize - progress);
1184 if (err <= 0)
1185 {
1186 log_errno("ExtentMap::loadVersion4(): read ");
1187 throw runtime_error("ExtentMap::loadVersion4(): read failed. Check the error log.");
1188 }
1189 progress += (uint) err;
1190 }
1191
1192 for (int i = 0; i < emNumElements; i++)
1193 {
1194 reserveLBIDRange(fExtentMap[i].range.start, fExtentMap[i].range.size);
1195
1196 //@bug 1911 - verify status value is valid
1197 if (fExtentMap[i].status < EXTENTSTATUSMIN ||
1198 fExtentMap[i].status > EXTENTSTATUSMAX)
1199 fExtentMap[i].status = EXTENTAVAILABLE;
1200 }
1201
1202 fEMShminfo->currentSize = emNumElements * sizeof(EMEntry);
1203
1204 #ifdef DUMP_EXTENT_MAP
1205 EMEntry* emSrc = fExtentMap;
1206 cout << "lbid\tsz\toid\tfbo\thwm\tpart#\tseg#\tDBRoot\twid\tst\thi\tlo\tsq\tv" << endl;
1207
1208 for (int i = 0; i < emNumElements; i++)
1209 {
1210 cout <<
1211 emSrc[i].start
1212 << '\t' << emSrc[i].size
1213 << '\t' << emSrc[i].fileID
1214 << '\t' << emSrc[i].blockOffset
1215 << '\t' << emSrc[i].HWM
1216 << '\t' << emSrc[i].partitionNum
1217 << '\t' << emSrc[i].segmentNum
1218 << '\t' << emSrc[i].dbRoot
1219 << '\t' << emSrc[i].status
1220 << '\t' << emSrc[i].partition.cprange.hi_val
1221 << '\t' << emSrc[i].partition.cprange.lo_val
1222 << '\t' << emSrc[i].partition.cprange.sequenceNum
1223 << '\t' << (int)(emSrc[i].partition.cprange.isValid)
1224 << endl;
1225 }
1226
1227 cout << "Free list entries:" << endl;
1228 cout << "start\tsize" << endl;
1229
1230 for (int i = 0; i < flNumElements; i++)
1231 cout << fFreeList[i].start << '\t' << fFreeList[i].size << endl;
1232
1233 #endif
1234 }
1235
load(const string & filename,bool fixFL)1236 void ExtentMap::load(const string& filename, bool fixFL)
1237 {
1238 #ifdef BRM_INFO
1239
1240 if (fDebug)
1241 {
1242 TRACER_WRITELATER("load");
1243 TRACER_ADDSTRINPUT(filename);
1244 TRACER_WRITE;
1245 }
1246
1247 #endif
1248
1249 grabEMEntryTable(WRITE);
1250
1251 try
1252 {
1253 grabFreeList(WRITE);
1254 }
1255 catch (...)
1256 {
1257 releaseEMEntryTable(WRITE);
1258 throw;
1259 }
1260
1261 const char* filename_p = filename.c_str();
1262 scoped_ptr<IDBDataFile> in(IDBDataFile::open(
1263 IDBPolicy::getType(filename_p, IDBPolicy::WRITEENG),
1264 filename_p, "r", 0));
1265
1266 if (!in)
1267 {
1268 log_errno("ExtentMap::load(): open");
1269 releaseFreeList(WRITE);
1270 releaseEMEntryTable(WRITE);
1271 throw ios_base::failure("ExtentMap::load(): open failed. Check the error log.");
1272 }
1273
1274 try
1275 {
1276 int emVersion = 0;
1277 int bytes = in->read((char*) &emVersion, sizeof(int));
1278
1279 if (bytes == (int) sizeof(int) && emVersion == EM_MAGIC_V4)
1280 loadVersion4(in.get());
1281 else
1282 {
1283 log("ExtentMap::load(): That file is not a valid ExtentMap image");
1284 throw runtime_error("ExtentMap::load(): That file is not a valid ExtentMap image");
1285 }
1286 }
1287 catch (...)
1288 {
1289 releaseFreeList(WRITE);
1290 releaseEMEntryTable(WRITE);
1291 throw;
1292 }
1293
1294 releaseFreeList(WRITE);
1295 releaseEMEntryTable(WRITE);
1296 // checkConsistency();
1297 }
1298
save(const string & filename)1299 void ExtentMap::save(const string& filename)
1300 {
1301 #ifdef BRM_INFO
1302
1303 if (fDebug)
1304 {
1305 TRACER_WRITELATER("save");
1306 TRACER_ADDSTRINPUT(filename);
1307 TRACER_WRITE;
1308 }
1309
1310 #endif
1311
1312 int allocdSize, loadSize[3], i;
1313
1314 grabEMEntryTable(READ);
1315
1316 try
1317 {
1318 grabFreeList(READ);
1319 }
1320 catch (...)
1321 {
1322 releaseEMEntryTable(READ);
1323 throw;
1324 }
1325
1326 if (fEMShminfo->currentSize == 0)
1327 {
1328 log("ExtentMap::save(): got request to save an empty BRM");
1329 releaseFreeList(READ);
1330 releaseEMEntryTable(READ);
1331 throw runtime_error("ExtentMap::save(): got request to save an empty BRM");
1332 }
1333
1334 const char* filename_p = filename.c_str();
1335 scoped_ptr<IDBDataFile> out(IDBDataFile::open(
1336 IDBPolicy::getType(filename_p, IDBPolicy::WRITEENG),
1337 filename_p, "wb", IDBDataFile::USE_VBUF));
1338
1339 if (!out)
1340 {
1341 log_errno("ExtentMap::save(): open");
1342 releaseFreeList(READ);
1343 releaseEMEntryTable(READ);
1344 throw ios_base::failure("ExtentMap::save(): open failed. Check the error log.");
1345 }
1346
1347 loadSize[0] = EM_MAGIC_V4;
1348 loadSize[1] = fEMShminfo->currentSize / sizeof(EMEntry);
1349 loadSize[2] = fFLShminfo->allocdSize / sizeof(InlineLBIDRange); // needs to send all entries
1350
1351 int bytes = 0;
1352
1353 try
1354 {
1355 const int wsize = 3 * sizeof(int);
1356 bytes = out->write((char*)loadSize, wsize);
1357
1358 if (bytes != wsize)
1359 throw ios_base::failure("ExtentMap::save(): write failed. Check the error log.");
1360 }
1361 catch (...)
1362 {
1363 releaseFreeList(READ);
1364 releaseEMEntryTable(READ);
1365 throw;
1366 }
1367
1368 allocdSize = fEMShminfo->allocdSize / sizeof(EMEntry);
1369 //const int emEntrySize = sizeof(EMEntry);
1370
1371 int first = -1, last = -1, err;
1372 size_t progress, writeSize;
1373 for (i = 0; i < allocdSize; i++)
1374 {
1375 if (fExtentMap[i].range.size > 0 && first == -1)
1376 first = i;
1377 else if (fExtentMap[i].range.size <= 0 && first != -1)
1378 {
1379 last = i;
1380 writeSize = (last - first) * sizeof(EMEntry);
1381 progress = 0;
1382 char *writePos = (char *) &fExtentMap[first];
1383 while (progress < writeSize)
1384 {
1385 err = out->write(writePos + progress, writeSize - progress);
1386 if (err < 0)
1387 {
1388 releaseFreeList(READ);
1389 releaseEMEntryTable(READ);
1390 throw ios_base::failure("ExtentMap::save(): write failed. Check the error log.");
1391 }
1392 progress += err;
1393 }
1394 first = -1;
1395 }
1396
1397 }
1398 if (first != -1)
1399 {
1400 writeSize = (allocdSize - first) * sizeof(EMEntry);
1401 progress = 0;
1402 char *writePos = (char *) &fExtentMap[first];
1403 while (progress < writeSize)
1404 {
1405 err = out->write(writePos + progress, writeSize - progress);
1406 if (err < 0)
1407 {
1408 releaseFreeList(READ);
1409 releaseEMEntryTable(READ);
1410 throw ios_base::failure("ExtentMap::save(): write failed. Check the error log.");
1411 }
1412 progress += err;
1413 }
1414 }
1415
1416 //allocdSize = fFLShminfo->allocdSize / sizeof(InlineLBIDRange);
1417 //const int inlineLbidRangeSize = sizeof(InlineLBIDRange);
1418
1419 progress = 0;
1420 writeSize = fFLShminfo->allocdSize;
1421 char *writePos = (char *) fFreeList;
1422 while (progress < writeSize)
1423 {
1424 err = out->write(writePos + progress, writeSize - progress);
1425 if (err < 0)
1426 {
1427 releaseFreeList(READ);
1428 releaseEMEntryTable(READ);
1429 throw ios_base::failure("ExtentMap::save(): write failed. Check the error log.");
1430 }
1431
1432 progress += err;
1433 }
1434
1435 releaseFreeList(READ);
1436 releaseEMEntryTable(READ);
1437 }
1438
1439 /* always returns holding the EM lock, and with the EM seg mapped */
grabEMEntryTable(OPS op)1440 void ExtentMap::grabEMEntryTable(OPS op)
1441 {
1442 boost::mutex::scoped_lock lk(mutex);
1443
1444 if (op == READ)
1445 fEMShminfo = fMST.getTable_read(MasterSegmentTable::EMTable);
1446 else
1447 {
1448 fEMShminfo = fMST.getTable_write(MasterSegmentTable::EMTable);
1449 emLocked = true;
1450 }
1451
1452 if (!fPExtMapImpl || fPExtMapImpl->key() != (unsigned)fEMShminfo->tableShmkey)
1453 {
1454 if (fExtentMap != NULL)
1455 {
1456 fExtentMap = NULL;
1457 }
1458
1459 if (fEMShminfo->allocdSize == 0)
1460 {
1461 if (op == READ)
1462 {
1463 fMST.getTable_upgrade(MasterSegmentTable::EMTable);
1464 emLocked = true;
1465
1466 if (fEMShminfo->allocdSize == 0)
1467 growEMShmseg();
1468
1469 emLocked = false; // has to be done holding the write lock
1470 fMST.getTable_downgrade(MasterSegmentTable::EMTable);
1471 }
1472 else
1473 growEMShmseg();
1474 }
1475 else
1476 {
1477 fPExtMapImpl = ExtentMapImpl::makeExtentMapImpl(fEMShminfo->tableShmkey, 0);
1478 ASSERT(fPExtMapImpl);
1479
1480 if (r_only)
1481 fPExtMapImpl->makeReadOnly();
1482
1483 fExtentMap = fPExtMapImpl->get();
1484
1485 if (fExtentMap == NULL)
1486 {
1487 log_errno("ExtentMap::grabEMEntryTable(): shmat");
1488 throw runtime_error("ExtentMap::grabEMEntryTable(): shmat failed. Check the error log.");
1489 }
1490 }
1491 }
1492 else
1493 fExtentMap = fPExtMapImpl->get();
1494 }
1495
1496 /* always returns holding the FL lock */
grabFreeList(OPS op)1497 void ExtentMap::grabFreeList(OPS op)
1498 {
1499 boost::mutex::scoped_lock lk(mutex, boost::defer_lock);
1500
1501 if (op == READ)
1502 {
1503 fFLShminfo = fMST.getTable_read(MasterSegmentTable::EMFreeList);
1504 lk.lock();
1505 }
1506 else
1507 {
1508 fFLShminfo = fMST.getTable_write(MasterSegmentTable::EMFreeList);
1509 flLocked = true;
1510 }
1511
1512 if (!fPFreeListImpl || fPFreeListImpl->key() != (unsigned)fFLShminfo->tableShmkey)
1513 {
1514 if (fFreeList != NULL)
1515 {
1516 fFreeList = NULL;
1517 }
1518
1519 if (fFLShminfo->allocdSize == 0)
1520 {
1521 if (op == READ)
1522 {
1523 lk.unlock();
1524 fMST.getTable_upgrade(MasterSegmentTable::EMFreeList);
1525 flLocked = true;
1526
1527 if (fFLShminfo->allocdSize == 0)
1528 growFLShmseg();
1529
1530 flLocked = false; // has to be done holding the write lock
1531 fMST.getTable_downgrade(MasterSegmentTable::EMFreeList);
1532 }
1533 else
1534 growFLShmseg();
1535 }
1536 else
1537 {
1538 fPFreeListImpl = FreeListImpl::makeFreeListImpl(fFLShminfo->tableShmkey, 0);
1539 ASSERT(fPFreeListImpl);
1540
1541 if (r_only)
1542 fPFreeListImpl->makeReadOnly();
1543
1544 fFreeList = fPFreeListImpl->get();
1545
1546 if (fFreeList == NULL)
1547 {
1548 log_errno("ExtentMap::grabFreeList(): shmat");
1549 throw runtime_error("ExtentMap::grabFreeList(): shmat failed. Check the error log.");
1550 }
1551
1552 if (op == READ)
1553 lk.unlock();
1554 }
1555 }
1556 else
1557 {
1558 fFreeList = fPFreeListImpl->get();
1559
1560 if (op == READ)
1561 lk.unlock();
1562 }
1563 }
1564
releaseEMEntryTable(OPS op)1565 void ExtentMap::releaseEMEntryTable(OPS op)
1566 {
1567 if (op == READ)
1568 fMST.releaseTable_read(MasterSegmentTable::EMTable);
1569 else
1570 {
1571 /*
1572 Note: Technically we should mark it unlocked after it's unlocked,
1573 however, that's a race condition. The only reason the up operation
1574 here will fail is if the underlying semaphore doesn't exist anymore
1575 or there is a locking logic error somewhere else. Either way,
1576 declaring the EM unlocked here is OK. Same with all similar assignments.
1577 */
1578 emLocked = false;
1579 fMST.releaseTable_write(MasterSegmentTable::EMTable);
1580 }
1581 }
1582
releaseFreeList(OPS op)1583 void ExtentMap::releaseFreeList(OPS op)
1584 {
1585 if (op == READ)
1586 fMST.releaseTable_read(MasterSegmentTable::EMFreeList);
1587 else
1588 {
1589 flLocked = false;
1590 fMST.releaseTable_write(MasterSegmentTable::EMFreeList);
1591 }
1592 }
1593
chooseEMShmkey()1594 key_t ExtentMap::chooseEMShmkey()
1595 {
1596 int fixedKeys = 1;
1597 key_t ret;
1598
1599 if (fEMShminfo->tableShmkey + 1 == (key_t) (fShmKeys.KEYRANGE_EXTENTMAP_BASE +
1600 fShmKeys.KEYRANGE_SIZE - 1) || (unsigned)fEMShminfo->tableShmkey < fShmKeys.KEYRANGE_EXTENTMAP_BASE)
1601 ret = fShmKeys.KEYRANGE_EXTENTMAP_BASE + fixedKeys;
1602 else
1603 ret = fEMShminfo->tableShmkey + 1;
1604
1605 return ret;
1606 }
1607
chooseFLShmkey()1608 key_t ExtentMap::chooseFLShmkey()
1609 {
1610 int fixedKeys = 1, ret;
1611
1612 if (fFLShminfo->tableShmkey + 1 == (key_t) (fShmKeys.KEYRANGE_EMFREELIST_BASE +
1613 fShmKeys.KEYRANGE_SIZE - 1) || (unsigned)fFLShminfo->tableShmkey < fShmKeys.KEYRANGE_EMFREELIST_BASE)
1614 ret = fShmKeys.KEYRANGE_EMFREELIST_BASE + fixedKeys;
1615 else
1616 ret = fFLShminfo->tableShmkey + 1;
1617
1618 return ret;
1619 }
1620
1621 /* Must be called holding the EM write lock
1622 Returns with the new shmseg mapped */
growEMShmseg(size_t nrows)1623 void ExtentMap::growEMShmseg(size_t nrows)
1624 {
1625 size_t allocSize;
1626 key_t newshmkey;
1627
1628 if (fEMShminfo->allocdSize == 0)
1629 allocSize = EM_INITIAL_SIZE;
1630 else
1631 allocSize = fEMShminfo->allocdSize + EM_INCREMENT;
1632
1633 newshmkey = chooseEMShmkey();
1634 ASSERT((allocSize == EM_INITIAL_SIZE && !fPExtMapImpl) || fPExtMapImpl);
1635
1636 //Use the larger of the calculated value or the specified value
1637 allocSize = max(allocSize, nrows * sizeof(EMEntry));
1638
1639 if (!fPExtMapImpl)
1640 {
1641 fPExtMapImpl = ExtentMapImpl::makeExtentMapImpl(newshmkey, allocSize, r_only);
1642 }
1643 else
1644 {
1645 fPExtMapImpl->grow(newshmkey, allocSize);
1646 }
1647
1648 fEMShminfo->tableShmkey = newshmkey;
1649 fEMShminfo->allocdSize = allocSize;
1650
1651 if (r_only)
1652 fPExtMapImpl->makeReadOnly();
1653
1654 fExtentMap = fPExtMapImpl->get();
1655 }
1656
1657 /* Must be called holding the FL lock
1658 Returns with the new shmseg mapped */
growFLShmseg()1659 void ExtentMap::growFLShmseg()
1660 {
1661 size_t allocSize;
1662 key_t newshmkey;
1663
1664 if (fFLShminfo->allocdSize == 0)
1665 allocSize = EM_FREELIST_INITIAL_SIZE;
1666 else
1667 allocSize = fFLShminfo->allocdSize + EM_FREELIST_INCREMENT;
1668
1669 newshmkey = chooseFLShmkey();
1670 ASSERT((allocSize == EM_FREELIST_INITIAL_SIZE && !fPFreeListImpl) || fPFreeListImpl);
1671
1672 if (!fPFreeListImpl)
1673 fPFreeListImpl = FreeListImpl::makeFreeListImpl(newshmkey, allocSize, false);
1674 else
1675 fPFreeListImpl->grow(newshmkey, allocSize);
1676
1677 fFLShminfo->tableShmkey = newshmkey;
1678 fFreeList = fPFreeListImpl->get();
1679
1680 // init freelist entry
1681 if (fFLShminfo->allocdSize == 0)
1682 {
1683 fFreeList->size = (1ULL << 36) / 1024;
1684 fFLShminfo->currentSize = sizeof(InlineLBIDRange);
1685 }
1686
1687 fFLShminfo->allocdSize = allocSize;
1688
1689 if (r_only)
1690 fPFreeListImpl->makeReadOnly();
1691
1692 fFreeList = fPFreeListImpl->get();
1693 }
1694
1695 // @bug 1509. Added new version of lookup that returns the first and last lbid for the extent that contains the
1696 // given lbid.
lookup(LBID_t lbid,LBID_t & firstLbid,LBID_t & lastLbid)1697 int ExtentMap::lookup(LBID_t lbid, LBID_t& firstLbid, LBID_t& lastLbid)
1698 {
1699 #ifdef BRM_INFO
1700
1701 if (fDebug)
1702 {
1703 TRACER_WRITELATER("lookup");
1704 TRACER_ADDINPUT(lbid);
1705 TRACER_ADDOUTPUT(firstLbid);
1706 TRACER_ADDOUTPUT(lastLbid);
1707 TRACER_WRITE;
1708 }
1709
1710 #endif
1711 int entries, i;
1712 LBID_t lastBlock;
1713
1714 #ifdef BRM_DEBUG
1715
1716 //printEM();
1717 if (lbid < 0)
1718 {
1719 log("ExtentMap::lookup(): lbid must be >= 0", logging::LOG_TYPE_DEBUG);
1720 cout << "ExtentMap::lookup(): lbid must be >= 0. Lbid passed was " << lbid << endl;
1721 throw invalid_argument("ExtentMap::lookup(): lbid must be >= 0");
1722 }
1723
1724 #endif
1725
1726 grabEMEntryTable(READ);
1727 entries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
1728
1729 for (i = 0; i < entries; i++)
1730 {
1731 if (fExtentMap[i].range.size != 0)
1732 {
1733 lastBlock = fExtentMap[i].range.start +
1734 (static_cast<LBID_t>(fExtentMap[i].range.size) * 1024) - 1;
1735
1736 if (lbid >= fExtentMap[i].range.start && lbid <= lastBlock)
1737 {
1738 firstLbid = fExtentMap[i].range.start;
1739 lastLbid = lastBlock;
1740 releaseEMEntryTable(READ);
1741 return 0;
1742 }
1743 }
1744 }
1745
1746 releaseEMEntryTable(READ);
1747 return -1;
1748 }
1749
1750 // @bug 1055+. New functions added for multiple files per OID enhancement.
lookupLocal(LBID_t lbid,int & OID,uint16_t & dbRoot,uint32_t & partitionNum,uint16_t & segmentNum,uint32_t & fileBlockOffset)1751 int ExtentMap::lookupLocal(LBID_t lbid, int& OID, uint16_t& dbRoot, uint32_t& partitionNum, uint16_t& segmentNum, uint32_t& fileBlockOffset)
1752 {
1753 #ifdef BRM_INFO
1754
1755 if (fDebug)
1756 {
1757 TRACER_WRITELATER("lookupLocal");
1758 TRACER_ADDINPUT(lbid);
1759 TRACER_ADDOUTPUT(OID);
1760 TRACER_ADDSHORTOUTPUT(dbRoot);
1761 TRACER_ADDOUTPUT(partitionNum);
1762 TRACER_ADDSHORTOUTPUT(segmentNum);
1763 TRACER_ADDOUTPUT(fileBlockOffset);
1764 TRACER_WRITE;
1765 }
1766
1767 #endif
1768 #ifdef EM_AS_A_TABLE_POC__
1769
1770 if (lbid >= (1LL << 54))
1771 {
1772 OID = 1084;
1773 dbRoot = 1;
1774 partitionNum = 0;
1775 segmentNum = 0;
1776 fileBlockOffset = 0;
1777 return 0;
1778 }
1779
1780 #endif
1781 int entries, i, offset;
1782 LBID_t lastBlock;
1783
1784 if (lbid < 0)
1785 {
1786 ostringstream oss;
1787 oss << "ExtentMap::lookupLocal(): invalid lbid requested: " << lbid;
1788 log(oss.str(), logging::LOG_TYPE_CRITICAL);
1789 throw invalid_argument(oss.str());
1790 }
1791
1792 grabEMEntryTable(READ);
1793
1794 entries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
1795
1796 for (i = 0; i < entries; i++)
1797 {
1798 if (fExtentMap[i].range.size != 0)
1799 {
1800 lastBlock = fExtentMap[i].range.start +
1801 (static_cast<LBID_t>(fExtentMap[i].range.size) * 1024) - 1;
1802
1803 if (lbid >= fExtentMap[i].range.start && lbid <= lastBlock)
1804 {
1805 OID = fExtentMap[i].fileID;
1806 dbRoot = fExtentMap[i].dbRoot;
1807 segmentNum = fExtentMap[i].segmentNum;
1808 partitionNum = fExtentMap[i].partitionNum;
1809
1810 // TODO: Offset logic.
1811 offset = lbid - fExtentMap[i].range.start;
1812 fileBlockOffset = fExtentMap[i].blockOffset + offset;
1813
1814 releaseEMEntryTable(READ);
1815 return 0;
1816 }
1817 }
1818 }
1819
1820 releaseEMEntryTable(READ);
1821 return -1;
1822 }
1823
lookupLocal(int OID,uint32_t partitionNum,uint16_t segmentNum,uint32_t fileBlockOffset,LBID_t & LBID)1824 int ExtentMap::lookupLocal(int OID, uint32_t partitionNum, uint16_t segmentNum, uint32_t fileBlockOffset, LBID_t& LBID)
1825 {
1826 #ifdef BRM_INFO
1827
1828 if (fDebug)
1829 {
1830 TRACER_WRITELATER("lookupLocal");
1831 TRACER_ADDINPUT(OID);
1832 TRACER_ADDINPUT(partitionNum);
1833 TRACER_ADDSHORTINPUT(segmentNum);
1834 TRACER_ADDINPUT(fileBlockOffset);
1835 TRACER_ADDOUTPUT(LBID);
1836 TRACER_WRITE;
1837 }
1838
1839 #endif
1840 int entries, i, offset;
1841
1842 if (OID < 0)
1843 {
1844 log("ExtentMap::lookup(): OID and FBO must be >= 0", logging::LOG_TYPE_DEBUG);
1845 throw invalid_argument("ExtentMap::lookup(): OID and FBO must be >= 0");
1846 }
1847
1848 grabEMEntryTable(READ);
1849
1850 entries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
1851
1852 for (i = 0; i < entries; i++)
1853 {
1854
1855 // TODO: Blockoffset logic.
1856 if (fExtentMap[i].range.size != 0 &&
1857 fExtentMap[i].fileID == OID &&
1858 fExtentMap[i].partitionNum == partitionNum &&
1859 fExtentMap[i].segmentNum == segmentNum &&
1860 fExtentMap[i].blockOffset <= fileBlockOffset &&
1861 fileBlockOffset <= (fExtentMap[i].blockOffset +
1862 (static_cast<LBID_t>(fExtentMap[i].range.size) * 1024) - 1))
1863 {
1864
1865 offset = fileBlockOffset - fExtentMap[i].blockOffset;
1866 LBID = fExtentMap[i].range.start + offset;
1867 releaseEMEntryTable(READ);
1868 return 0;
1869 }
1870 }
1871
1872 releaseEMEntryTable(READ);
1873 return -1;
1874 }
1875
lookupLocal_DBroot(int OID,uint16_t dbroot,uint32_t partitionNum,uint16_t segmentNum,uint32_t fileBlockOffset,LBID_t & LBID)1876 int ExtentMap::lookupLocal_DBroot(int OID, uint16_t dbroot, uint32_t partitionNum, uint16_t segmentNum,
1877 uint32_t fileBlockOffset, LBID_t& LBID)
1878 {
1879 #ifdef BRM_INFO
1880
1881 if (fDebug)
1882 {
1883 TRACER_WRITELATER("lookupLocal");
1884 TRACER_ADDINPUT(OID);
1885 TRACER_ADDINPUT(partitionNum);
1886 TRACER_ADDSHORTINPUT(segmentNum);
1887 TRACER_ADDINPUT(fileBlockOffset);
1888 TRACER_ADDOUTPUT(LBID);
1889 TRACER_WRITE;
1890 }
1891
1892 #endif
1893 int entries, i, offset;
1894
1895 if (OID < 0)
1896 {
1897 log("ExtentMap::lookup(): OID and FBO must be >= 0", logging::LOG_TYPE_DEBUG);
1898 throw invalid_argument("ExtentMap::lookup(): OID and FBO must be >= 0");
1899 }
1900
1901 grabEMEntryTable(READ);
1902
1903 entries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
1904
1905 for (i = 0; i < entries; i++)
1906 {
1907
1908 // TODO: Blockoffset logic.
1909 if (fExtentMap[i].range.size != 0 &&
1910 fExtentMap[i].fileID == OID &&
1911 fExtentMap[i].dbRoot == dbroot &&
1912 fExtentMap[i].partitionNum == partitionNum &&
1913 fExtentMap[i].segmentNum == segmentNum &&
1914 fExtentMap[i].blockOffset <= fileBlockOffset &&
1915 fileBlockOffset <= (fExtentMap[i].blockOffset +
1916 (static_cast<LBID_t>(fExtentMap[i].range.size) * 1024) - 1))
1917 {
1918
1919 offset = fileBlockOffset - fExtentMap[i].blockOffset;
1920 LBID = fExtentMap[i].range.start + offset;
1921 releaseEMEntryTable(READ);
1922 return 0;
1923 }
1924 }
1925
1926 releaseEMEntryTable(READ);
1927 return -1;
1928 }
1929
1930 // @bug 1055-.
1931
1932 //------------------------------------------------------------------------------
1933 // Lookup/return starting LBID for the specified OID, partition, segment, and
1934 // file block offset.
1935 //------------------------------------------------------------------------------
lookupLocalStartLbid(int OID,uint32_t partitionNum,uint16_t segmentNum,uint32_t fileBlockOffset,LBID_t & LBID)1936 int ExtentMap::lookupLocalStartLbid(int OID,
1937 uint32_t partitionNum,
1938 uint16_t segmentNum,
1939 uint32_t fileBlockOffset,
1940 LBID_t& LBID)
1941 {
1942 #ifdef BRM_INFO
1943
1944 if (fDebug)
1945 {
1946 TRACER_WRITELATER("lookupLocalStartLbid");
1947 TRACER_ADDINPUT(OID);
1948 TRACER_ADDINPUT(partitionNum);
1949 TRACER_ADDSHORTINPUT(segmentNum);
1950 TRACER_ADDINPUT(fileBlockOffset);
1951 TRACER_ADDOUTPUT(LBID);
1952 TRACER_WRITE;
1953 }
1954
1955 #endif
1956 int entries, i;
1957
1958 if (OID < 0)
1959 {
1960 log("ExtentMap::lookupLocalStartLbid(): OID and FBO must be >= 0",
1961 logging::LOG_TYPE_DEBUG);
1962 throw invalid_argument("ExtentMap::lookupLocalStartLbid(): "
1963 "OID and FBO must be >= 0");
1964 }
1965
1966 grabEMEntryTable(READ);
1967 entries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
1968
1969 for (i = 0; i < entries; i++)
1970 {
1971 if (fExtentMap[i].range.size != 0 &&
1972 fExtentMap[i].fileID == OID &&
1973 fExtentMap[i].partitionNum == partitionNum &&
1974 fExtentMap[i].segmentNum == segmentNum &&
1975 fExtentMap[i].blockOffset <= fileBlockOffset &&
1976 fileBlockOffset <= (fExtentMap[i].blockOffset +
1977 (static_cast<LBID_t>(fExtentMap[i].range.size) * 1024) - 1))
1978 {
1979 LBID = fExtentMap[i].range.start;
1980 releaseEMEntryTable(READ);
1981 return 0;
1982 }
1983 }
1984
1985 releaseEMEntryTable(READ);
1986
1987 return -1;
1988 }
1989
1990 //------------------------------------------------------------------------------
1991 // Creates a "stripe" of column extents across a table, for the specified
1992 // columns and DBRoot.
1993 // cols - Vector of columns OIDs and widths to be allocated
1994 // dbRoot - DBRoot to be used for new extents
1995 // partitionNum - when creating the first extent for a column (on dbRoot),
1996 // partitionNum must be specified as an input argument.
1997 // If not the first extent on dbRoot, then partitionNum
1998 // for the new extents will be assigned and returned, based
1999 // on the current last extent for dbRoot.
2000 // output:
2001 // partitionNum - Partition number for new extents
2002 // segmentNum - Segment number for new exents
2003 // extents - starting Lbid, numBlocks, and FBO for new extents
2004 //------------------------------------------------------------------------------
createStripeColumnExtents(const vector<CreateStripeColumnExtentsArgIn> & cols,uint16_t dbRoot,uint32_t & partitionNum,uint16_t & segmentNum,vector<CreateStripeColumnExtentsArgOut> & extents)2005 void ExtentMap::createStripeColumnExtents(
2006 const vector<CreateStripeColumnExtentsArgIn>& cols,
2007 uint16_t dbRoot,
2008 uint32_t& partitionNum,
2009 uint16_t& segmentNum,
2010 vector<CreateStripeColumnExtentsArgOut>& extents)
2011 {
2012 LBID_t startLbid;
2013 int allocSize;
2014 uint32_t startBlkOffset;
2015
2016 grabEMEntryTable(WRITE);
2017 grabFreeList(WRITE);
2018
2019 OID_t baselineOID = -1;
2020 uint16_t baselineSegmentNum = -1;
2021 uint32_t baselinePartNum = -1;
2022
2023 for (uint32_t i = 0; i < cols.size(); i++)
2024 {
2025 createColumnExtent_DBroot(
2026 cols[i].oid,
2027 cols[i].width,
2028 dbRoot,
2029 cols[i].colDataType,
2030 partitionNum,
2031 segmentNum,
2032 startLbid,
2033 allocSize,
2034 startBlkOffset,
2035 false);
2036
2037 if (i == 0)
2038 {
2039 baselineOID = cols[i].oid;
2040 baselineSegmentNum = segmentNum;
2041 baselinePartNum = partitionNum;
2042 }
2043 else
2044 {
2045 if ((segmentNum != baselineSegmentNum) ||
2046 (partitionNum != baselinePartNum))
2047 {
2048 ostringstream oss;
2049 oss << "ExtentMap::createStripeColumnExtents(): "
2050 "Inconsistent segment extent creation: " <<
2051 "DBRoot: " << dbRoot <<
2052 "OID1: " << baselineOID <<
2053 "; Part#: " << baselinePartNum <<
2054 "; Seg#: " << baselineSegmentNum <<
2055 " <versus> OID2: " << cols[i].oid <<
2056 "; Part#: " << partitionNum <<
2057 "; Seg#: " << segmentNum;
2058 log(oss.str(), logging::LOG_TYPE_CRITICAL);
2059 throw invalid_argument(oss.str());
2060 }
2061 }
2062
2063 CreateStripeColumnExtentsArgOut extentInfo;
2064 extentInfo.startLbid = startLbid;
2065 extentInfo.allocSize = allocSize;
2066 extentInfo.startBlkOffset = startBlkOffset;
2067 extents.push_back( extentInfo );
2068 }
2069 }
2070
2071 //------------------------------------------------------------------------------
2072 // Creates an extent for a column file on the specified DBRoot. This is the
2073 // external API function referenced by the dbrm wrapper class.
2074 // required input:
2075 // OID - column OID for which the extent is to be created
2076 // colWidth - width of column in bytes
2077 // dbRoot - DBRoot where extent is to be added
2078 // partitionNum - when creating the first extent for a column (on dbRoot),
2079 // partitionNum must be specified as an input argument.
2080 // If not the first extent on dbRoot, then partitionNum
2081 // for the new extent will be assigned and returned, based
2082 // on the current last extent for dbRoot.
2083 // useLock - Grab ExtentMap and FreeList WRITE lock to perform work
2084 // output:
2085 // partitionNum - partition number for the new extent
2086 // segmentNum - segment number for the new extent
2087 // lbid - starting LBID of the created extent
2088 // allocdsize - number of LBIDs allocated
2089 // startBlockOffset-starting block of the created extent
2090 //------------------------------------------------------------------------------
createColumnExtent_DBroot(int OID,uint32_t colWidth,uint16_t dbRoot,execplan::CalpontSystemCatalog::ColDataType colDataType,uint32_t & partitionNum,uint16_t & segmentNum,LBID_t & lbid,int & allocdsize,uint32_t & startBlockOffset,bool useLock)2091 void ExtentMap::createColumnExtent_DBroot(int OID,
2092 uint32_t colWidth,
2093 uint16_t dbRoot,
2094 execplan::CalpontSystemCatalog::ColDataType colDataType,
2095 uint32_t& partitionNum,
2096 uint16_t& segmentNum,
2097 LBID_t& lbid,
2098 int& allocdsize,
2099 uint32_t& startBlockOffset,
2100 bool useLock) // defaults to true
2101 {
2102 #ifdef BRM_INFO
2103
2104 if (fDebug)
2105 {
2106 TRACER_WRITELATER("createColumnExtent_DBroot");
2107 TRACER_ADDINPUT(OID);
2108 TRACER_ADDINPUT(colWidth);
2109 TRACER_ADDSHORTINPUT(dbRoot);
2110 TRACER_ADDOUTPUT(partitionNum);
2111 TRACER_ADDSHORTOUTPUT(segmentNum);
2112 TRACER_ADDINT64OUTPUT(lbid);
2113 TRACER_ADDOUTPUT(allocdsize);
2114 TRACER_ADDOUTPUT(startBlockOffset);
2115 TRACER_WRITE;
2116 }
2117
2118 #endif
2119
2120 #ifdef BRM_DEBUG
2121
2122 if (OID <= 0)
2123 {
2124 log("ExtentMap::createColumnExtent_DBroot(): OID must be > 0",
2125 logging::LOG_TYPE_DEBUG);
2126 throw invalid_argument(
2127 "ExtentMap::createColumnExtent_DBroot(): OID must be > 0");
2128 }
2129
2130 #endif
2131
2132 // Convert extent size in rows to extent size in 8192-byte blocks.
2133 // extentRows should be multiple of blocksize (8192).
2134 const unsigned EXTENT_SIZE = (getExtentRows() * colWidth) / BLOCK_SIZE;
2135
2136 if (useLock)
2137 {
2138 grabEMEntryTable(WRITE);
2139 grabFreeList(WRITE);
2140 }
2141
2142 if (fEMShminfo->currentSize == fEMShminfo->allocdSize)
2143 growEMShmseg();
2144
2145 // size is the number of multiples of 1024 blocks.
2146 // ex: size=1 --> 1024 blocks
2147 // size=2 --> 2048 blocks
2148 // size=3 --> 3072 blocks, etc.
2149 uint32_t size = EXTENT_SIZE / 1024;
2150
2151 lbid = _createColumnExtent_DBroot(size, OID, colWidth,
2152 dbRoot, colDataType, partitionNum, segmentNum, startBlockOffset);
2153
2154 allocdsize = EXTENT_SIZE;
2155 }
2156
2157 //------------------------------------------------------------------------------
2158 // Creates an extent for a column file for the specified DBRoot. This is the
2159 // internal implementation function.
2160 // input:
2161 // size - number of multiples of 1024 blocks allocated to the extent
2162 // ex: size=1 --> 1024 blocks
2163 // size=2 --> 2048 blocks
2164 // size=3 --> 3072 blocks, etc.
2165 // OID - column OID for which the extent is to be created
2166 // colWidth - width of column in bytes
2167 // dbRoot - dbRoot where extent is to be added
2168 // partitionNum - when creating the first extent for an empty dbRoot,
2169 // partitionNum must be specified as an input argument.
2170 // output:
2171 // partitionNum - when adding an extent to a dbRoot,
2172 // partitionNum will be the assigned partition number
2173 // segmentNum - segment number for the new extent
2174 // startBlockOffset-starting block of the created extent
2175 // returns starting LBID of the created extent.
2176 //------------------------------------------------------------------------------
_createColumnExtent_DBroot(uint32_t size,int OID,uint32_t colWidth,uint16_t dbRoot,execplan::CalpontSystemCatalog::ColDataType colDataType,uint32_t & partitionNum,uint16_t & segmentNum,uint32_t & startBlockOffset)2177 LBID_t ExtentMap::_createColumnExtent_DBroot(uint32_t size, int OID,
2178 uint32_t colWidth,
2179 uint16_t dbRoot,
2180 execplan::CalpontSystemCatalog::ColDataType colDataType,
2181 uint32_t& partitionNum,
2182 uint16_t& segmentNum,
2183 uint32_t& startBlockOffset)
2184 {
2185 int emptyEMEntry = -1;
2186 int lastExtentIndex = -1;
2187 uint32_t highestOffset = 0;
2188 uint32_t highestPartNum = 0;
2189 uint16_t highestSegNum = 0;
2190 const unsigned FILES_PER_COL_PART = getFilesPerColumnPartition();
2191 const unsigned EXTENT_ROWS = getExtentRows();
2192 const unsigned EXTENTS_PER_SEGFILE = getExtentsPerSegmentFile();
2193 const unsigned DBROOT_COUNT = getDbRootCount();
2194
2195 // Variables that track list of segfiles in target (HWM) DBRoot & partition.
2196 // Map segment number to the highest fbo extent in each file
2197 typedef tr1::unordered_map<uint16_t, uint32_t> TargetDbRootSegsMap;
2198 typedef TargetDbRootSegsMap::iterator TargetDbRootSegsMapIter;
2199 typedef TargetDbRootSegsMap::const_iterator TargetDbRootSegsMapConstIter;
2200 TargetDbRootSegsMap targetDbRootSegs;
2201
2202 uint32_t highEmptySegNum = 0; // high seg num for user specified partition;
2203 // only comes into play for empty DBRoot.
2204 bool bHighEmptySegNumSet = false;
2205
2206 //--------------------------------------------------------------------------
2207 // First Step: Scan ExtentMap
2208 // 1. find HWM extent in relevant DBRoot
2209 // 2. if DBRoot is empty, track highest seg num in user specified partition
2210 // 3. Find first unused extent map entry
2211 //--------------------------------------------------------------------------
2212 int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
2213
2214 LBID_t startLBID = getLBIDsFromFreeList( size );
2215
2216 // Find the first empty Entry; and find last extent for this OID and dbRoot
2217 for (int i = 0; i < emEntries; i++)
2218 {
2219 if (fExtentMap[i].range.size != 0)
2220 {
2221 if (fExtentMap[i].fileID == OID)
2222 {
2223
2224 // 1. Find HWM extent in relevant DBRoot
2225 if (fExtentMap[i].dbRoot == dbRoot)
2226 {
2227 if ( (fExtentMap[i].partitionNum > highestPartNum) ||
2228 ((fExtentMap[i].partitionNum == highestPartNum) &&
2229 (fExtentMap[i].blockOffset > highestOffset)) ||
2230 ((fExtentMap[i].partitionNum == highestPartNum) &&
2231 (fExtentMap[i].blockOffset == highestOffset) &&
2232 (fExtentMap[i].segmentNum >= highestSegNum)) )
2233 {
2234
2235 lastExtentIndex = i;
2236 highestPartNum = fExtentMap[i].partitionNum;
2237 highestSegNum = fExtentMap[i].segmentNum;
2238 highestOffset = fExtentMap[i].blockOffset;
2239 }
2240 }
2241
2242 // 2. for empty DBRoot track hi seg# in user specified part#
2243 if ((lastExtentIndex == -1) &&
2244 (fExtentMap[i].partitionNum == partitionNum))
2245 {
2246 if ((fExtentMap[i].segmentNum > highEmptySegNum) ||
2247 (!bHighEmptySegNumSet))
2248 {
2249 highEmptySegNum = fExtentMap[i].segmentNum;
2250 bHighEmptySegNumSet = true;
2251 }
2252 }
2253 } // found extentmap entry for specified OID
2254 } // found valid extentmap entry
2255
2256 // 3. Find first available extent map entry that can be reused
2257 else if (emptyEMEntry < 0)
2258 emptyEMEntry = i;
2259 } // Loop through extent map entries
2260
2261 if (emptyEMEntry == -1)
2262 {
2263 ostringstream oss;
2264 oss << "ExtentMap::_createColumnExtent_DBroot(): "
2265 "could not find an empty EMEntry for OID " << OID <<
2266 "; Extent Map is full",
2267 log(oss.str(),
2268 logging::LOG_TYPE_CRITICAL);
2269 throw logic_error( oss.str() );
2270 }
2271
2272 //--------------------------------------------------------------------------
2273 // If DBRoot is not empty, then...
2274 // Second Step: Scan ExtentMap again after I know the last partition
2275 // 4. track highest seg num for HWM+1 partition
2276 // 5. track highest seg num for HWM partition
2277 // 6. save list of segment numbers and fbos in target DBRoot and partition
2278 //
2279 // Scanning the extentmap a second time is not a good thing to be doing.
2280 // But the alternative isn't good either. There is certain information
2281 // I need to capture about the last partition and DBRoot, and for the next
2282 // partition as well (which may contain segment files on other DBRoots),
2283 // but until I scan the extentmap, I don't know what my last partition is.
2284 // If I try to do this in a single scan, then I am forced to spend time
2285 // capturing information about partitions that turn out to be inconse-
2286 // quential because the "known" last partition will keep changing as I
2287 // scan the extentmap.
2288 //--------------------------------------------------------------------------
2289 bool bSegsOutOfService = false;
2290 int partHighSeg = -1; // hi seg num for last partition
2291 int partHighSegNext = -1; // hi seg num for next partition
2292
2293 if (lastExtentIndex >= 0)
2294 {
2295 uint32_t targetDbRootPart = fExtentMap[lastExtentIndex].partitionNum;
2296 uint32_t targetDbRootPartNext = targetDbRootPart + 1;
2297 partHighSeg = fExtentMap[lastExtentIndex].segmentNum;
2298 targetDbRootSegs.insert( TargetDbRootSegsMap::value_type(
2299 fExtentMap[lastExtentIndex].segmentNum,
2300 fExtentMap[lastExtentIndex].blockOffset) );
2301
2302 for (int i = 0; i < emEntries; i++)
2303 {
2304 if (fExtentMap[i].range.size != 0)
2305 {
2306 if (fExtentMap[i].fileID == OID)
2307 {
2308
2309 // 4. Track hi seg for hwm+1 partition
2310 if (fExtentMap[i].partitionNum == targetDbRootPartNext)
2311 {
2312 if (fExtentMap[i].segmentNum > partHighSegNext)
2313 {
2314 partHighSegNext = fExtentMap[i].segmentNum;
2315 }
2316 }
2317
2318 // 5. Track hi seg for hwm partition
2319 else if (fExtentMap[i].partitionNum == targetDbRootPart)
2320 {
2321 if (fExtentMap[i].segmentNum > partHighSeg)
2322 {
2323 partHighSeg = fExtentMap[i].segmentNum;
2324 }
2325
2326 // 6. Save list of seg files in target DBRoot/Partition,
2327 // along with the highest fbo for each seg file
2328 if (fExtentMap[i].dbRoot == dbRoot)
2329 {
2330 if (fExtentMap[i].status == EXTENTOUTOFSERVICE)
2331 bSegsOutOfService = true;
2332
2333 TargetDbRootSegsMapIter iter =
2334 targetDbRootSegs.find(fExtentMap[i].segmentNum);
2335
2336 if (iter == targetDbRootSegs.end())
2337 {
2338 targetDbRootSegs.insert(
2339 TargetDbRootSegsMap::value_type(
2340 fExtentMap[i].segmentNum,
2341 fExtentMap[i].blockOffset) );
2342 }
2343 else
2344 {
2345 if (fExtentMap[i].blockOffset > iter->second)
2346 {
2347 iter->second = fExtentMap[i].blockOffset;
2348 }
2349 }
2350 }
2351 }
2352 } // found extentmap entry for specified OID
2353 } // found valid extentmap entry
2354 } // loop through extent map entries
2355 } // (lastExtentIndex >= 0)
2356
2357 //--------------------------------------------------------------------------
2358 // Third Step: Select partition and segment number for new extent
2359 // 1. Loop through targetDbRootSegs to find segment file for next extent
2360 // 2. Check for exceptions that warrant going to next physical partition
2361 // a. See if any extents are marked outOfService
2362 // b. See if extents are not evenly layered as expected
2363 // 3. Perform additional new partition/segment logic as applicable
2364 // a. No action taken if 2a or 2b already detected need for new partition
2365 // b. If HWM extent is in last file of DBRoot/Partition, see if next
2366 // extent goes in new partition, or if wrap-around within current
2367 // partition.
2368 // c. If extent needs to go in next partition, figure out the next
2369 // partition and the next available segment in that partition.
2370 // 4. Set blockOffset of new extent based on where extent is being added
2371 //--------------------------------------------------------------------------
2372 uint16_t newDbRoot = dbRoot;
2373 uint32_t newPartitionNum = partitionNum;
2374 uint16_t newSegmentNum = 0;
2375 uint32_t newBlockOffset = 0;
2376
2377 // If this is not the first extent for this OID and DBRoot then
2378 // extrapolate part# and seg# from last extent; wrap around segment and
2379 // partition number as needed.
2380 // else
2381 // use part# that the user specifies
2382 if (lastExtentIndex >= 0)
2383 {
2384 bool startNewPartition = false;
2385 bool startNewStripeInSegFile = false;
2386 const unsigned int filesPerDBRootPerPartition =
2387 FILES_PER_COL_PART / DBROOT_COUNT;
2388
2389 int& lastExtIdx = lastExtentIndex;
2390
2391 // Find first, last, next seg files in target partition and DBRoot
2392 uint16_t firstTargetSeg = fExtentMap[lastExtIdx].segmentNum;
2393 uint16_t lastTargetSeg = fExtentMap[lastExtIdx].segmentNum;
2394 uint16_t nextTargetSeg = fExtentMap[lastExtIdx].segmentNum;
2395
2396 // 1. Loop thru targetDbRootSegs[] to find next segment after
2397 // lastExtIdx in target list.
2398 // We save low and high segment to use in wrap-around case.
2399 if (targetDbRootSegs.size() > 1)
2400 {
2401 bool bNextSegSet = false;
2402
2403 for (TargetDbRootSegsMapConstIter iter = targetDbRootSegs.begin();
2404 iter != targetDbRootSegs.end();
2405 ++iter)
2406 {
2407 uint16_t targetSeg = iter->first;
2408
2409 if (targetSeg < firstTargetSeg)
2410 firstTargetSeg = targetSeg;
2411 else if (targetSeg > lastTargetSeg)
2412 lastTargetSeg = targetSeg;
2413
2414 if (targetSeg > fExtentMap[lastExtIdx].segmentNum)
2415 {
2416 if ((targetSeg < nextTargetSeg) || (!bNextSegSet))
2417 {
2418 nextTargetSeg = targetSeg;
2419 bNextSegSet = true;
2420 }
2421 }
2422 }
2423 }
2424
2425 newPartitionNum = fExtentMap[lastExtIdx].partitionNum;
2426
2427 // 2a. Skip to next physical partition if any extents in HWM partition/
2428 // DBRoot are marked as outOfService
2429 if (bSegsOutOfService)
2430 {
2431
2432 // cout << "Skipping to next partition (outOfService segs)" <<
2433 // ": oid-" << fExtentMap[lastExtentIndex].fileID <<
2434 // "; root-" << fExtentMap[lastExtentIndex].dbRoot <<
2435 // "; part-" << fExtentMap[lastExtentIndex].partitionNum << endl;
2436
2437 startNewPartition = true;
2438 }
2439
2440 // @bug 4765
2441 // 2b. Skip to next physical partition if we have a set of
2442 // segment files that are not "layered" as expected, meaning we
2443 // have > 1 layer of extents with an incomplete lower layer (could
2444 // be caused by the dropping of logical partitions).
2445 else if (targetDbRootSegs.size() < filesPerDBRootPerPartition)
2446 {
2447 for (TargetDbRootSegsMapConstIter iter = targetDbRootSegs.begin();
2448 iter != targetDbRootSegs.end();
2449 ++iter)
2450 {
2451 if (iter->second > 0)
2452 {
2453
2454 // cout << "Skipping to next partition (unbalanced)" <<
2455 // ": oid-" << fExtentMap[lastExtentIndex].fileID <<
2456 // "; root-" << fExtentMap[lastExtentIndex].dbRoot <<
2457 // "; part-" << fExtentMap[lastExtentIndex].partitionNum <<
2458 // "; seg-" << iter->first <<
2459 // "; hifbo-"<< iter->second << endl;
2460
2461 startNewPartition = true;
2462 break;
2463 }
2464 }
2465 }
2466
2467 // 3a.If we already detected need for new partition, then take no action
2468 if (startNewPartition)
2469 {
2470 // no action taken here; we take additional action later.
2471 }
2472
2473 // 3b.If HWM extent is in last seg file for this partition and DBRoot,
2474 // find out if we need to add a new partition for next extent.
2475 else if (targetDbRootSegs.size() >= filesPerDBRootPerPartition)
2476 {
2477 if (fExtentMap[lastExtIdx].segmentNum == lastTargetSeg)
2478 {
2479 // Use blockOffset of lastExtIdx to see if we need to add
2480 // the next extent to a new partition.
2481 if (fExtentMap[lastExtIdx].blockOffset ==
2482 ((EXTENTS_PER_SEGFILE - 1) *
2483 (EXTENT_ROWS * colWidth / BLOCK_SIZE)) )
2484 {
2485 startNewPartition = true;
2486 }
2487 else // Wrap-around; add extent to low seg in this partition
2488 {
2489 startNewStripeInSegFile = true;
2490 newSegmentNum = firstTargetSeg;
2491 }
2492 }
2493 else
2494 {
2495 newSegmentNum = nextTargetSeg;
2496 }
2497 }
2498 else // Select next segment file in current HWM partition
2499 {
2500 newSegmentNum = partHighSeg + 1;
2501 }
2502
2503 // 3c. Find new partition and segment if we can't create
2504 // an extent for this DBRoot in the current HWM partition.
2505 if (startNewPartition)
2506 {
2507 newPartitionNum++;
2508
2509 if (partHighSegNext == -1)
2510 newSegmentNum = 0;
2511 else
2512 newSegmentNum = partHighSegNext + 1;
2513 }
2514
2515 // 4. Set blockOffset (fbo) for new extent relative to it's seg file
2516 // case1: Init fbo to 0 if first extent in partition/DbRoot
2517 // case2: Init fbo to 0 if first extent in segment file (other than
2518 // first segment in this partition/DbRoot, which case1 handled)
2519 // case3: Init fbo based on previous extent
2520
2521 // case1: leave newBlockOffset set to 0
2522 if (startNewPartition)
2523 {
2524 //...no action necessary
2525 }
2526
2527 // case2: leave newBlockOffset set to 0
2528 else if ((fExtentMap[lastExtIdx].blockOffset == 0) &&
2529 (newSegmentNum > firstTargetSeg))
2530 {
2531 //...no action necessary
2532 }
2533
2534 // case3: Init blockOffset based on previous extent. If we are adding
2535 // extent to 1st seg file, then need to bump up the offset; else
2536 // adding extent to same stripe and can repeat the same offset.
2537 else
2538 {
2539 if (startNewStripeInSegFile) // start next stripe
2540 {
2541 newBlockOffset = static_cast<uint64_t>
2542 (fExtentMap[lastExtIdx].range.size) * 1024 +
2543 fExtentMap[lastExtIdx].blockOffset;
2544 }
2545 else // next extent, same stripe
2546 {
2547 newBlockOffset = fExtentMap[lastExtIdx].blockOffset;
2548 }
2549 }
2550 } // lastExtentIndex >= 0
2551 else // Empty DBRoot; use part# that the user specifies
2552 {
2553 if (bHighEmptySegNumSet)
2554 newSegmentNum = highEmptySegNum + 1;
2555 else
2556 newSegmentNum = 0;
2557 }
2558
2559 //--------------------------------------------------------------------------
2560 // Fourth Step: Construct the new extentmap entry
2561 //--------------------------------------------------------------------------
2562
2563 makeUndoRecord(&fExtentMap[emptyEMEntry], sizeof(EMEntry));
2564 EMEntry* e = &fExtentMap[emptyEMEntry];
2565
2566 e->range.start = startLBID;
2567 e->range.size = size;
2568 e->fileID = OID;
2569
2570 if (isUnsigned(colDataType))
2571 {
2572 e->partition.cprange.lo_val = numeric_limits<uint64_t>::max();
2573 e->partition.cprange.hi_val = 0;
2574 }
2575 else
2576 {
2577 e->partition.cprange.lo_val = numeric_limits<int64_t>::max();
2578 e->partition.cprange.hi_val = numeric_limits<int64_t>::min();
2579 }
2580
2581 e->partition.cprange.sequenceNum = 0;
2582
2583 e->colWid = colWidth;
2584
2585 e->dbRoot = newDbRoot;
2586 e->partitionNum = newPartitionNum;
2587 e->segmentNum = newSegmentNum;
2588
2589 e->blockOffset = newBlockOffset;
2590 e->HWM = 0;
2591 e->status = EXTENTUNAVAILABLE; // mark extent as in process
2592
2593 // Partition, segment, and blockOffset 0 represents new table or column.
2594 // When DDL creates a table, we can mark the first extent as VALID, since
2595 // the table has no data. Marking as VALID enables cpimport to update
2596 // the CP min/max for the first import.
2597 // If DDL is adding a column to an existing table, setting to VALID won't
2598 // hurt, because DDL resets to INVALID after the extent is created.
2599 if ((e->partitionNum == 0) &&
2600 (e->segmentNum == 0) &&
2601 (e->blockOffset == 0))
2602 e->partition.cprange.isValid = CP_VALID;
2603 else
2604 e->partition.cprange.isValid = CP_INVALID;
2605
2606 partitionNum = e->partitionNum;
2607 segmentNum = e->segmentNum;
2608 startBlockOffset = e->blockOffset;
2609
2610 makeUndoRecord(fEMShminfo, sizeof(MSTEntry));
2611 fEMShminfo->currentSize += sizeof(struct EMEntry);
2612
2613 return startLBID;
2614 }
2615
2616 //------------------------------------------------------------------------------
2617 // Creates an extent for the exact segment column file specified by the
2618 // requested OID, DBRoot, partition number, and segment number. This is
2619 // the external API function referenced by the dbrm wrapper class.
2620 // required input:
2621 // OID - column OID for which the extent is to be created
2622 // colWidth - width of column in bytes
2623 // dbRoot - DBRoot where extent is to be added
2624 // partitionNum - partitionNum
2625 // segmentNum - segmentNum
2626 // output:
2627 // lbid - starting LBID of the created extent
2628 // allocdsize - number of LBIDs allocated
2629 // startBlockOffset-starting block of the created extent
2630 //------------------------------------------------------------------------------
createColumnExtentExactFile(int OID,uint32_t colWidth,uint16_t dbRoot,uint32_t partitionNum,uint16_t segmentNum,execplan::CalpontSystemCatalog::ColDataType colDataType,LBID_t & lbid,int & allocdsize,uint32_t & startBlockOffset)2631 void ExtentMap::createColumnExtentExactFile(int OID,
2632 uint32_t colWidth,
2633 uint16_t dbRoot,
2634 uint32_t partitionNum,
2635 uint16_t segmentNum,
2636 execplan::CalpontSystemCatalog::ColDataType colDataType,
2637 LBID_t& lbid,
2638 int& allocdsize,
2639 uint32_t& startBlockOffset)
2640 {
2641 #ifdef BRM_INFO
2642
2643 if (fDebug)
2644 {
2645 TRACER_WRITELATER("createColumnExtentExactFile");
2646 TRACER_ADDINPUT(OID);
2647 TRACER_ADDINPUT(colWidth);
2648 TRACER_ADDSHORTINPUT(dbRoot);
2649 TRACER_ADDOUTPUT(partitionNum);
2650 TRACER_ADDSHORTOUTPUT(segmentNum);
2651 TRACER_ADDINT64OUTPUT(lbid);
2652 TRACER_ADDOUTPUT(allocdsize);
2653 TRACER_ADDOUTPUT(startBlockOffset);
2654 TRACER_WRITE;
2655 }
2656
2657 #endif
2658
2659 #ifdef BRM_DEBUG
2660
2661 if (OID <= 0)
2662 {
2663 log("ExtentMap::createColumnExtentExactFile(): OID must be > 0",
2664 logging::LOG_TYPE_DEBUG);
2665 throw invalid_argument(
2666 "ExtentMap::createColumnExtentExactFile(): OID must be > 0");
2667 }
2668
2669 #endif
2670
2671 // Convert extent size in rows to extent size in 8192-byte blocks.
2672 // extentRows should be multiple of blocksize (8192).
2673 const unsigned EXTENT_SIZE = (getExtentRows() * colWidth) / BLOCK_SIZE;
2674 grabEMEntryTable(WRITE);
2675 grabFreeList(WRITE);
2676
2677 if (fEMShminfo->currentSize == fEMShminfo->allocdSize)
2678 growEMShmseg();
2679
2680 // size is the number of multiples of 1024 blocks.
2681 // ex: size=1 --> 1024 blocks
2682 // size=2 --> 2048 blocks
2683 // size=3 --> 3072 blocks, etc.
2684 uint32_t size = EXTENT_SIZE / 1024;
2685
2686 lbid = _createColumnExtentExactFile(size, OID, colWidth,
2687 dbRoot, partitionNum, segmentNum, colDataType, startBlockOffset);
2688
2689 allocdsize = EXTENT_SIZE;
2690 }
2691
2692 //------------------------------------------------------------------------------
2693 // Creates an extent for the exact segment file specified by the requested
2694 // OID, DBRoot, partition, and segment. This is the internal implementation
2695 // function.
2696 // input:
2697 // size - number of multiples of 1024 blocks allocated to the extent
2698 // ex: size=1 --> 1024 blocks
2699 // size=2 --> 2048 blocks
2700 // size=3 --> 3072 blocks, etc.
2701 // OID - column OID for which the extent is to be created
2702 // colWidth - width of column in bytes
2703 // dbRoot - dbRoot where extent is to be added
2704 // partitionNum - partitionNum
2705 // segmentNum - segmentNum
2706 // output:
2707 // startBlockOffset-starting block of the created extent
2708 // returns starting LBID of the created extent.
2709 //------------------------------------------------------------------------------
_createColumnExtentExactFile(uint32_t size,int OID,uint32_t colWidth,uint16_t dbRoot,uint32_t partitionNum,uint16_t segmentNum,execplan::CalpontSystemCatalog::ColDataType colDataType,uint32_t & startBlockOffset)2710 LBID_t ExtentMap::_createColumnExtentExactFile(uint32_t size, int OID,
2711 uint32_t colWidth,
2712 uint16_t dbRoot,
2713 uint32_t partitionNum,
2714 uint16_t segmentNum,
2715 execplan::CalpontSystemCatalog::ColDataType colDataType,
2716 uint32_t& startBlockOffset)
2717 {
2718 int emptyEMEntry = -1;
2719 int lastExtentIndex = -1;
2720 uint32_t highestOffset = 0;
2721
2722 int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
2723 LBID_t startLBID = getLBIDsFromFreeList( size );
2724
2725 // Find the first empty Entry; and find the last extent for this
2726 // combination of OID, partition, and segment.
2727 for (int i = 0; i < emEntries; i++)
2728 {
2729 if (fExtentMap[i].range.size != 0)
2730 {
2731 if (fExtentMap[i].fileID == OID)
2732 {
2733 if ((fExtentMap[i].dbRoot == dbRoot) &&
2734 (fExtentMap[i].partitionNum == partitionNum) &&
2735 (fExtentMap[i].segmentNum == segmentNum) &&
2736 (fExtentMap[i].blockOffset >= highestOffset))
2737 {
2738 lastExtentIndex = i;
2739 highestOffset = fExtentMap[i].blockOffset;
2740 }
2741 }
2742 }
2743 else if (emptyEMEntry < 0)
2744 emptyEMEntry = i;
2745 } // Loop through extent map entries
2746
2747 if (emptyEMEntry == -1)
2748 {
2749 ostringstream oss;
2750 oss << "ExtentMap::_createColumnExtentExactFile(): "
2751 "could not find an empty EMEntry for OID " << OID <<
2752 "; Extent Map is full",
2753 log(oss.str(),
2754 logging::LOG_TYPE_CRITICAL);
2755 throw logic_error( oss.str() );
2756 }
2757
2758 makeUndoRecord(&fExtentMap[emptyEMEntry], sizeof(EMEntry));
2759 EMEntry* e = &fExtentMap[emptyEMEntry];
2760
2761 e->range.start = startLBID;
2762 e->range.size = size;
2763 e->fileID = OID;
2764
2765 if (isUnsigned(colDataType))
2766 {
2767 e->partition.cprange.lo_val = numeric_limits<uint64_t>::max();
2768 e->partition.cprange.hi_val = 0;
2769 }
2770 else
2771 {
2772 e->partition.cprange.lo_val = numeric_limits<int64_t>::max();
2773 e->partition.cprange.hi_val = numeric_limits<int64_t>::min();
2774 }
2775
2776 e->partition.cprange.sequenceNum = 0;
2777
2778 e->colWid = colWidth;
2779
2780 e->dbRoot = dbRoot;
2781 e->partitionNum = partitionNum;
2782 e->segmentNum = segmentNum;
2783 e->status = EXTENTUNAVAILABLE; // mark extent as in process
2784
2785 // If first extent for this OID, partition, dbroot, and segment then
2786 // blockOffset is set to 0
2787 // else
2788 // blockOffset is extrapolated from the last extent
2789 if (lastExtentIndex == -1)
2790 {
2791 e->blockOffset = 0;
2792 e->HWM = 0;
2793 }
2794 else
2795 {
2796 e->blockOffset = static_cast<uint64_t>
2797 (fExtentMap[lastExtentIndex].range.size) * 1024 +
2798 fExtentMap[lastExtentIndex].blockOffset;
2799 e->HWM = 0;
2800 }
2801
2802 // Partition, segment, and blockOffset 0 represents new table or column.
2803 // When DDL creates a table, we can mark the first extent as VALID, since
2804 // the table has no data. Marking as VALID enables cpimport to update
2805 // the CP min/max for the first import.
2806 // If DDL is adding a column to an existing table, setting to VALID won't
2807 // hurt, because DDL resets to INVALID after the extent is created.
2808 if ((e->partitionNum == 0) &&
2809 (e->segmentNum == 0) &&
2810 (e->blockOffset == 0))
2811 e->partition.cprange.isValid = CP_VALID;
2812 else
2813 e->partition.cprange.isValid = CP_INVALID;
2814
2815 startBlockOffset = e->blockOffset;
2816
2817 makeUndoRecord(fEMShminfo, sizeof(MSTEntry));
2818 fEMShminfo->currentSize += sizeof(struct EMEntry);
2819
2820 return startLBID;
2821 }
2822
2823 //------------------------------------------------------------------------------
2824 // Creates an extent for a dictionary store file. This is the external API
2825 // function.
2826 // input:
2827 // OID - column OID for which the extent is to be created
2828 // dbRoot - DBRoot to be assigned to the new extent
2829 // partitionNum - partition number to be assigned to the new extent
2830 // segmentNum - segment number to be assigned to the new extent
2831 // output:
2832 // lbid - starting LBID of the created extent
2833 // allocdsize - number LBIDs of allocated
2834 //------------------------------------------------------------------------------
createDictStoreExtent(int OID,uint16_t dbRoot,uint32_t partitionNum,uint16_t segmentNum,LBID_t & lbid,int & allocdsize)2835 void ExtentMap::createDictStoreExtent(int OID,
2836 uint16_t dbRoot,
2837 uint32_t partitionNum,
2838 uint16_t segmentNum,
2839 LBID_t& lbid,
2840 int& allocdsize)
2841 {
2842 #ifdef BRM_INFO
2843
2844 if (fDebug)
2845 {
2846 TRACER_WRITELATER("createDictStoreExtent");
2847 TRACER_ADDINPUT(OID);
2848 TRACER_ADDSHORTINPUT(dbRoot);
2849 TRACER_ADDINPUT(partitionNum);
2850 TRACER_ADDSHORTINPUT(segmentNum);
2851 TRACER_ADDINT64OUTPUT(lbid);
2852 TRACER_ADDOUTPUT(allocdsize);
2853 TRACER_WRITE;
2854 }
2855
2856 #endif
2857
2858 #ifdef BRM_DEBUG
2859
2860 if (OID <= 0)
2861 {
2862 log("ExtentMap::createDictStoreExtent(): OID must be > 0",
2863 logging::LOG_TYPE_DEBUG);
2864 throw invalid_argument(
2865 "ExtentMap::createDictStoreExtent(): OID must be > 0");
2866 }
2867
2868 #endif
2869
2870 // Convert extent size in rows to extent size in 8192-byte blocks.
2871 // extentRows should be multiple of blocksize (8192).
2872 const unsigned EXTENT_SIZE = (getExtentRows() * DICT_COL_WIDTH) / BLOCK_SIZE;
2873
2874 grabEMEntryTable(WRITE);
2875 grabFreeList(WRITE);
2876
2877 if (fEMShminfo->currentSize == fEMShminfo->allocdSize)
2878 growEMShmseg();
2879
2880 // size is the number of multiples of 1024 blocks.
2881 // ex: size=1 --> 1024 blocks
2882 // size=2 --> 2048 blocks
2883 // size=3 --> 3072 blocks, etc.
2884 uint32_t size = EXTENT_SIZE / 1024;
2885
2886 lbid = _createDictStoreExtent(size, OID,
2887 dbRoot, partitionNum, segmentNum);
2888
2889 allocdsize = EXTENT_SIZE;
2890 }
2891
2892 //------------------------------------------------------------------------------
2893 // Creates an extent for a dictionary store file. This is the internal
2894 // implementation function.
2895 // input:
2896 // size - number of multiples of 1024 blocks allocated to the extent
2897 // ex: size=1 --> 1024 blocks
2898 // size=2 --> 2048 blocks
2899 // size=3 --> 3072 blocks, etc.
2900 // OID - column OID for which the extent is to be created
2901 // dbRoot - DBRoot to be assigned to the new extent
2902 // partitionNum - partition number to be assigned to the new extent
2903 // segmentNum - segment number to be assigned to the new extent
2904 // returns starting LBID of the created extent.
2905 //------------------------------------------------------------------------------
_createDictStoreExtent(uint32_t size,int OID,uint16_t dbRoot,uint32_t partitionNum,uint16_t segmentNum)2906 LBID_t ExtentMap::_createDictStoreExtent(uint32_t size, int OID,
2907 uint16_t dbRoot,
2908 uint32_t partitionNum,
2909 uint16_t segmentNum)
2910 {
2911 int emptyEMEntry = -1;
2912 int lastExtentIndex = -1;
2913 uint32_t highestOffset = 0;
2914
2915 int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
2916
2917 LBID_t startLBID = getLBIDsFromFreeList( size );
2918
2919 // Find the first empty Entry; and find the last extent for this
2920 // combination of OID, partition, and segment.
2921 for (int i = 0; i < emEntries; i++)
2922 {
2923 if (fExtentMap[i].range.size != 0)
2924 {
2925 if ((fExtentMap[i].fileID == OID) &&
2926 (fExtentMap[i].partitionNum == partitionNum) &&
2927 (fExtentMap[i].segmentNum == segmentNum) &&
2928 (fExtentMap[i].blockOffset >= highestOffset))
2929 {
2930 lastExtentIndex = i;
2931 highestOffset = fExtentMap[i].blockOffset;
2932 }
2933 }
2934 else if (emptyEMEntry < 0)
2935 emptyEMEntry = i;
2936 } // Loop through extent map entries
2937
2938 if (emptyEMEntry == -1)
2939 {
2940 ostringstream oss;
2941 oss << "ExtentMap::_createDictStoreExtent(): "
2942 "could not find an empty EMEntry for OID " << OID <<
2943 "; Extent Map is full",
2944 log(oss.str(),
2945 logging::LOG_TYPE_CRITICAL);
2946 throw logic_error( oss.str() );
2947 }
2948
2949 makeUndoRecord(&fExtentMap[emptyEMEntry], sizeof(EMEntry));
2950 EMEntry* e = &fExtentMap[emptyEMEntry];
2951
2952 e->range.start = startLBID;
2953 e->range.size = size;
2954 e->fileID = OID;
2955 e->status = EXTENTUNAVAILABLE;// @bug 1911 mark extent as in process
2956 e->partition.cprange.lo_val = numeric_limits<int64_t>::max();
2957 e->partition.cprange.hi_val = numeric_limits<int64_t>::min();
2958 e->partition.cprange.sequenceNum = 0;
2959 e->partition.cprange.isValid = CP_INVALID;
2960
2961 // If this is first extent for this OID, partition, segment then
2962 // everything is set to 0 or taken from user input
2963 // else
2964 // everything is extrapolated from the last extent
2965 if (lastExtentIndex == -1)
2966 {
2967 e->blockOffset = 0;
2968 e->HWM = 0;
2969 e->segmentNum = segmentNum;
2970 e->partitionNum = partitionNum;
2971 e->dbRoot = dbRoot;
2972 e->colWid = 0; // we don't store col width for dictionaries;
2973 // this helps to flag this as a dictionary extent
2974 }
2975 else
2976 {
2977 e->blockOffset = static_cast<uint64_t>
2978 (fExtentMap[lastExtentIndex].range.size) * 1024 +
2979 fExtentMap[lastExtentIndex].blockOffset;
2980 e->HWM = 0;
2981 e->segmentNum = fExtentMap[lastExtentIndex].segmentNum;
2982 e->partitionNum = fExtentMap[lastExtentIndex].partitionNum;
2983 e->dbRoot = fExtentMap[lastExtentIndex].dbRoot;
2984 e->colWid = fExtentMap[lastExtentIndex].colWid;
2985 }
2986
2987 makeUndoRecord(fEMShminfo, sizeof(MSTEntry));
2988 fEMShminfo->currentSize += sizeof(struct EMEntry);
2989
2990 return startLBID;
2991 }
2992
2993 //------------------------------------------------------------------------------
2994 // Finds and returns the starting LBID for an LBID range taken from the
2995 // free list.
2996 // input:
2997 // size - number of multiples of 1024 blocks needed from the free list
2998 // ex: size=1 --> 1024 blocks
2999 // size=2 --> 2048 blocks
3000 // size=3 --> 3072 blocks, etc.
3001 // returns selected starting LBID.
3002 //------------------------------------------------------------------------------
getLBIDsFromFreeList(uint32_t size)3003 LBID_t ExtentMap::getLBIDsFromFreeList ( uint32_t size )
3004 {
3005 LBID_t ret = -1;
3006 int i;
3007 int flEntries = fFLShminfo->allocdSize / sizeof(InlineLBIDRange);
3008
3009 for (i = 0; i < flEntries; i++)
3010 {
3011 if (size <= fFreeList[i].size)
3012 {
3013 makeUndoRecord(&fFreeList[i], sizeof(InlineLBIDRange));
3014 ret = fFreeList[i].start;
3015 fFreeList[i].start += size * 1024;
3016 fFreeList[i].size -= size;
3017
3018 if (fFreeList[i].size == 0)
3019 {
3020 makeUndoRecord(fFLShminfo, sizeof(MSTEntry));
3021 fFLShminfo->currentSize -= sizeof(InlineLBIDRange);
3022 }
3023
3024 break;
3025 }
3026 }
3027
3028 if (i == flEntries)
3029 {
3030 log("ExtentMap::getLBIDsFromFreeList(): out of LBID space");
3031 throw runtime_error(
3032 "ExtentMap::getLBIDsFromFreeList(): out of LBID space");
3033 }
3034
3035 return ret;
3036 }
3037
3038 #ifdef BRM_DEBUG
printEM(const EMEntry & em) const3039 void ExtentMap::printEM(const EMEntry& em) const
3040 {
3041 cout << " Start "
3042 << em.range.start << " Size "
3043 << (long) em.range.size << " OID "
3044 << (long) em.fileID << " offset "
3045 << (long) em.blockOffset
3046 << " LV " << em.partition.cprange.lo_val
3047 << " HV " << em.partition.cprange.hi_val;
3048 cout << endl;
3049 }
3050
3051
printEM(const OID_t & oid) const3052 void ExtentMap::printEM(const OID_t& oid) const
3053 {
3054 int emEntries = 0;
3055
3056 if (fEMShminfo)
3057 emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
3058
3059 cout << "Extent Map (OID=" << oid << ")" << endl;
3060
3061 for (int idx = 0; idx < emEntries ; idx++)
3062 {
3063 struct EMEntry& em = fExtentMap[idx];
3064
3065 if (em.fileID == oid && em.range.size != 0)
3066 printEM(em);
3067 }
3068
3069 cout << endl;
3070 }
3071
printEM() const3072 void ExtentMap::printEM() const
3073 {
3074
3075 int emEntries = 0;
3076
3077 if (fEMShminfo)
3078 emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
3079
3080 cout << "Extent Map (" << emEntries << ")" << endl;
3081
3082 for (int idx = 0; idx < emEntries ; idx++)
3083 {
3084 struct EMEntry& em = fExtentMap[idx];
3085
3086 if (em.range.size != 0)
3087 printEM(em);
3088 }
3089
3090 cout << endl;
3091 }
3092
printFL() const3093 void ExtentMap::printFL() const
3094 {
3095
3096 int flEntries = 0;
3097
3098 if (fFLShminfo)
3099 flEntries = fFLShminfo->allocdSize / sizeof(InlineLBIDRange);
3100
3101 cout << "Free List" << endl;
3102
3103 for (int idx = 0; idx < flEntries; idx++)
3104 {
3105
3106 cout << idx << " "
3107 << fFreeList[idx].start << " "
3108 << fFreeList[idx].size
3109 << endl;
3110 }
3111
3112 cout << endl;
3113 }
3114 #endif
3115
3116 //------------------------------------------------------------------------------
3117 // Rollback (delete) the extents that logically follow the specified extent for
3118 // the given OID and DBRoot. HWM for the last extent is reset to the specified
3119 // value.
3120 // input:
3121 // oid - OID of the last logical extent to be retained
3122 // bDeleteAll - Flag indicates whether all extents for oid and dbroot are
3123 // to be deleted; else part#, seg#, and hwm are used.
3124 // dbRoot - DBRoot of the extents to be considered.
3125 // partitionNum - partition number of the last logical extent to be retained
3126 // segmentNum - segment number of the last logical extent to be retained
3127 // hwm - HWM to be assigned to the last logical extent retained
3128 //------------------------------------------------------------------------------
rollbackColumnExtents_DBroot(int oid,bool bDeleteAll,uint16_t dbRoot,uint32_t partitionNum,uint16_t segmentNum,HWM_t hwm)3129 void ExtentMap::rollbackColumnExtents_DBroot ( int oid,
3130 bool bDeleteAll,
3131 uint16_t dbRoot,
3132 uint32_t partitionNum,
3133 uint16_t segmentNum,
3134 HWM_t hwm)
3135 {
3136 //bool oidExists = false;
3137
3138 #ifdef BRM_INFO
3139 if (fDebug)
3140 {
3141 TRACER_WRITELATER("rollbackColumnExtents");
3142 TRACER_ADDINPUT(oid);
3143 TRACER_ADDBOOLINPUT(bDeleteAll);
3144 TRACER_ADDSHORTINPUT(dbRoot);
3145 TRACER_ADDINPUT(partitionNum);
3146 TRACER_ADDSHORTINPUT(segmentNum);
3147 TRACER_ADDINPUT(hwm);
3148 TRACER_WRITE;
3149 }
3150
3151 #endif
3152
3153 #ifdef BRM_DEBUG
3154
3155 if (oid < 0)
3156 {
3157 log("ExtentMap::rollbackColumnExtents_DBroot(): OID must be >= 0",
3158 logging::LOG_TYPE_DEBUG);
3159 throw invalid_argument(
3160 "ExtentMap::rollbackColumnExtents_DBroot(): OID must be >= 0");
3161 }
3162
3163 #endif
3164
3165 uint32_t fboLo = 0;
3166 uint32_t fboHi = 0;
3167 uint32_t fboLoPreviousStripe = 0;
3168
3169 grabEMEntryTable(WRITE);
3170 grabFreeList(WRITE);
3171
3172 int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
3173
3174 for (int i = 0; i < emEntries; i++)
3175 {
3176 if ((fExtentMap[i].range.size != 0) &&
3177 (fExtentMap[i].fileID == oid) &&
3178 (fExtentMap[i].dbRoot == dbRoot))
3179 {
3180
3181 //oidExists = true;
3182
3183 // Don't rollback extents that are out of service
3184 if (fExtentMap[i].status == EXTENTOUTOFSERVICE)
3185 continue;
3186
3187 // If bDeleteAll is true, then we delete extent w/o regards to
3188 // partition number, segment number, or HWM
3189 if (bDeleteAll)
3190 {
3191 deleteExtent( i ); // case 0
3192 continue;
3193 }
3194
3195 // Calculate fbo range for the stripe containing the given hwm
3196 if (fboHi == 0)
3197 {
3198 uint32_t range = fExtentMap[i].range.size * 1024;
3199 fboLo = hwm - (hwm % range);
3200 fboHi = fboLo + range - 1;
3201
3202 if (fboLo > 0)
3203 fboLoPreviousStripe = fboLo - range;
3204 }
3205
3206 // Delete, update, or ignore this extent:
3207 // Later partition:
3208 // case 1: extent in later partition than last extent, so delete
3209 // Same partition:
3210 // case 2: extent is in later stripe than last extent, so delete
3211 // case 3: extent is in earlier stripe in the same partition.
3212 // No action necessary for case3B and case3C.
3213 // case 3A: extent is in trailing segment in previous stripe.
3214 // This extent is now the last extent in that segment
3215 // file, so reset the local HWM if it was altered.
3216 // case 3B: extent in previous stripe but not a trailing segment
3217 // case 3C: extent is in stripe that precedes previous stripe
3218 // case 4: extent is in the same partition and stripe as the
3219 // last logical extent we are to keep.
3220 // case 4A: extent is in later segment so can be deleted
3221 // case 4B: extent is in earlier segment, reset HWM if changed
3222 // case 4C: this is last logical extent, reset HWM if changed
3223 // Earlier partition:
3224 // case 5: extent is in earlier parition, no action necessary
3225
3226 if (fExtentMap[i].partitionNum > partitionNum)
3227 {
3228 deleteExtent( i ); // case 1
3229 }
3230 else if (fExtentMap[i].partitionNum == partitionNum)
3231 {
3232 if (fExtentMap[i].blockOffset > fboHi)
3233 {
3234 deleteExtent( i ); // case 2
3235 }
3236 else if (fExtentMap[i].blockOffset < fboLo)
3237 {
3238 if (fExtentMap[i].blockOffset >= fboLoPreviousStripe)
3239 {
3240 if (fExtentMap[i].segmentNum > segmentNum)
3241 {
3242 if (fExtentMap[i].HWM != (fboLo - 1))
3243 {
3244 makeUndoRecord(&fExtentMap[i], sizeof(EMEntry));
3245 fExtentMap[i].HWM = fboLo - 1; //case 3A
3246 fExtentMap[i].status = EXTENTAVAILABLE;
3247 }
3248 }
3249 else
3250 {
3251 // not a trailing segment in prev stripe case 3B
3252 }
3253 }
3254 else
3255 {
3256 // extent precedes previous stripe case 3C
3257 }
3258 }
3259 else // extent is in same stripe
3260 {
3261 if (fExtentMap[i].segmentNum > segmentNum)
3262 {
3263 deleteExtent( i ); // case 4A
3264 }
3265 else if (fExtentMap[i].segmentNum < segmentNum)
3266 {
3267 if (fExtentMap[i].HWM != fboHi)
3268 {
3269 makeUndoRecord(&fExtentMap[i], sizeof(EMEntry));
3270 fExtentMap[i].HWM = fboHi; // case 4B
3271 fExtentMap[i].status = EXTENTAVAILABLE;
3272 }
3273 }
3274 else // fExtentMap[i].segmentNum == segmentNum
3275 {
3276 if (fExtentMap[i].HWM != hwm)
3277 {
3278 makeUndoRecord(&fExtentMap[i], sizeof(EMEntry));
3279 fExtentMap[i].HWM = hwm; // case 4C
3280 fExtentMap[i].status = EXTENTAVAILABLE;
3281 }
3282 }
3283 }
3284 }
3285 else
3286 {
3287 // extent in earlier partition; no action necessary case 5
3288 }
3289 } // extent map entry with matching oid
3290 } // loop through the extent map
3291
3292 // If this function is called, we are already in error recovery mode; so
3293 // don't worry about reporting an error if the OID is not found, because
3294 // we don't want/need the extents for that OID anyway.
3295 //if (!oidExists)
3296 //{
3297 // ostringstream oss;
3298 // oss << "ExtentMap::rollbackColumnExtents_DBroot(): "
3299 // "Rollback failed: no extents exist for: OID-" << oid <<
3300 // "; dbRoot-" << dbRoot <<
3301 // "; partition-" << partitionNum <<
3302 // "; segment-" << segmentNum <<
3303 // "; hwm-" << hwm;
3304 // log(oss.str(), logging::LOG_TYPE_CRITICAL);
3305 // throw invalid_argument(oss.str());
3306 //}
3307 }
3308
3309 //------------------------------------------------------------------------------
3310 // Rollback (delete) the extents that follow the extents in partitionNum,
3311 // for the given dictionary OID & DBRoot. The specified hwms represent the HWMs
3312 // to be reset for each of segment store file in this partition. An HWM will
3313 // not be given for "every" segment file if we are rolling back to a point where
3314 // we had not yet created all the segment files in the partition. In any case,
3315 // any extents for the "oid" that follow partitionNum, should be deleted.
3316 // Likewise, any extents in the same partition, whose segment file is not in
3317 // segNums[], should be deleted as well. If hwms is empty, then this DBRoot
3318 // must have been empty at the start of the job, so all the extents for the
3319 // specified oid and dbRoot can be deleted.
3320 // input:
3321 // oid - OID of the "last" extents to be retained
3322 // dbRoot - DBRoot of the extents to be considered.
3323 // partitionNum - partition number of the last extents to be retained
3324 // segNums - list of segment files with extents to be restored
3325 // hwms - HWMs to be assigned to the last retained extent in each of
3326 // the corresponding segment store files in segNums.
3327 // hwms[0] applies to segment store file segNums[0];
3328 // hwms[1] applies to segment store file segNums[1]; etc.
3329 //------------------------------------------------------------------------------
rollbackDictStoreExtents_DBroot(int oid,uint16_t dbRoot,uint32_t partitionNum,const vector<uint16_t> & segNums,const vector<HWM_t> & hwms)3330 void ExtentMap::rollbackDictStoreExtents_DBroot ( int oid,
3331 uint16_t dbRoot,
3332 uint32_t partitionNum,
3333 const vector<uint16_t>& segNums,
3334 const vector<HWM_t>& hwms)
3335 {
3336 //bool oidExists = false;
3337
3338 #ifdef BRM_INFO
3339 if (fDebug)
3340 {
3341 ostringstream oss;
3342
3343 for (unsigned int k = 0; k < hwms.size(); k++)
3344 oss << "; hwms[" << k << "]-" << hwms[k];
3345
3346 const string& hwmString(oss.str());
3347
3348 // put TRACE inside separate scope {} to insure that temporary
3349 // hwmString still exists when tracer destructor tries to print it.
3350 {
3351 TRACER_WRITELATER("rollbackDictStoreExtents_DBroot");
3352 TRACER_ADDINPUT(oid);
3353 TRACER_ADDSHORTINPUT(dbRoot);
3354 TRACER_ADDINPUT(partitionNum);
3355 TRACER_ADDSTRINPUT(hwmString);
3356 TRACER_WRITE;
3357 }
3358 }
3359
3360 #endif
3361
3362 // Delete all extents for the specified OID and DBRoot,
3363 // if we are not given any hwms and segment files.
3364 bool bDeleteAll = false;
3365
3366 if (hwms.size() == 0)
3367 bDeleteAll = true;
3368
3369 // segToHwmMap maps segment file number to corresponding pair<hwm,fboLo>
3370 tr1::unordered_map<uint16_t, pair<uint32_t, uint32_t> > segToHwmMap;
3371 tr1::unordered_map<uint16_t, pair<uint32_t, uint32_t> >::const_iterator
3372 segToHwmMapIter;
3373
3374 grabEMEntryTable(WRITE);
3375 grabFreeList(WRITE);
3376
3377 int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
3378
3379 for (int i = 0; i < emEntries; i++)
3380 {
3381 if ((fExtentMap[i].range.size != 0) &&
3382 (fExtentMap[i].fileID == oid) &&
3383 (fExtentMap[i].dbRoot == dbRoot))
3384 {
3385
3386 //oidExists = true;
3387
3388 // Don't rollback extents that are out of service
3389 if (fExtentMap[i].status == EXTENTOUTOFSERVICE)
3390 continue;
3391
3392 // If bDeleteAll is true, then we delete extent w/o regards to
3393 // partition number, segment number, or HWM
3394 if (bDeleteAll)
3395 {
3396 deleteExtent( i ); // case 0
3397 continue;
3398 }
3399
3400 // Calculate fbo's for the list of hwms we are given; and store
3401 // the fbo and hwm in a map, using the segment file number as a key.
3402 if (segToHwmMap.size() == 0)
3403 {
3404 uint32_t range = fExtentMap[i].range.size * 1024;
3405 pair<uint32_t, uint32_t> segToHwmMapEntry;
3406
3407 for (unsigned int k = 0; k < hwms.size(); k++)
3408 {
3409 uint32_t fboLo = hwms[k] - (hwms[k] % range);
3410 segToHwmMapEntry.first = hwms[k];
3411 segToHwmMapEntry.second = fboLo;
3412 segToHwmMap[ segNums[k] ] = segToHwmMapEntry;
3413 }
3414 }
3415
3416 // Delete, update, or ignore this extent:
3417 // Later partition:
3418 // case 1: extent is in later partition, so delete the extent
3419 // Same partition:
3420 // case 2: extent is in trailing seg file we don't need; so delete
3421 // case 3: extent is in partition and segment file of interest
3422 // case 3A: earlier extent in segment file; no action necessary
3423 // case 3B: specified HWM falls in this extent, so reset HWM
3424 // case 3C: later extent in segment file; so delete the extent
3425 // Earlier partition:
3426 // case 4: extent is in earlier parition, no action necessary
3427
3428 if (fExtentMap[i].partitionNum > partitionNum)
3429 {
3430 deleteExtent( i ); // case 1
3431 }
3432 else if (fExtentMap[i].partitionNum == partitionNum)
3433 {
3434 unsigned segNum = fExtentMap[i].segmentNum;
3435 segToHwmMapIter = segToHwmMap.find( segNum );
3436
3437 if (segToHwmMapIter == segToHwmMap.end())
3438 {
3439 deleteExtent( i ); // case 2
3440 }
3441 else // segment number in the map of files to keep
3442 {
3443 uint32_t fboLo = segToHwmMapIter->second.second;
3444
3445 if (fExtentMap[i].blockOffset < fboLo)
3446 {
3447 // no action necessary case 3A
3448 }
3449 else if (fExtentMap[i].blockOffset == fboLo)
3450 {
3451 uint32_t hwm = segToHwmMapIter->second.first;
3452
3453 if (fExtentMap[i].HWM != hwm)
3454 {
3455 makeUndoRecord(&fExtentMap[i], sizeof(EMEntry));
3456 fExtentMap[i].HWM = hwm;
3457 fExtentMap[i].status = EXTENTAVAILABLE; // case 3B
3458 }
3459 }
3460 else
3461 {
3462 deleteExtent( i ); // case 3C
3463 }
3464 }
3465 }
3466 else
3467 {
3468 // extent in earlier partition; no action necessary case 4
3469 }
3470 } // extent map entry with matching oid
3471 } // loop through the extent map
3472
3473 // If this function is called, we are already in error recovery mode; so
3474 // don't worry about reporting an error if the OID is not found, because
3475 // we don't want/need the extents for that OID anyway.
3476 //if (!oidExists)
3477 //{
3478 // ostringstream oss;
3479 // oss << "ExtentMap::rollbackDictStoreExtents_DBroot(): "
3480 // "Rollback failed: no extents exist for: OID-" << oid <<
3481 // "; dbRoot-" << dbRoot <<
3482 // "; partition-" << partitionNum;
3483 // log(oss.str(), logging::LOG_TYPE_CRITICAL);
3484 // throw invalid_argument(oss.str());
3485 //}
3486 }
3487
3488 //------------------------------------------------------------------------------
3489 // Delete the extents specified and reset hwm
3490 //------------------------------------------------------------------------------
deleteEmptyColExtents(const ExtentsInfoMap_t & extentsInfo)3491 void ExtentMap::deleteEmptyColExtents(const ExtentsInfoMap_t& extentsInfo)
3492 {
3493 #ifdef BRM_INFO
3494
3495 if (fDebug)
3496 {
3497 TRACER_WRITELATER("deleteEmptyColExtents");
3498 TRACER_WRITE;
3499 }
3500
3501 #endif
3502
3503 grabEMEntryTable(WRITE);
3504 grabFreeList(WRITE);
3505
3506 uint32_t fboLo = 0;
3507 uint32_t fboHi = 0;
3508 uint32_t fboLoPreviousStripe = 0;
3509
3510 int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
3511 ExtentsInfoMap_t::const_iterator it;
3512
3513 for (int i = 0; i < emEntries; i++)
3514 {
3515 if (fExtentMap[i].range.size != 0)
3516 {
3517 it = extentsInfo.find ( fExtentMap[i].fileID );
3518
3519 if ( it != extentsInfo.end() )
3520 {
3521 // Don't rollback extents that are out of service
3522 if (fExtentMap[i].status == EXTENTOUTOFSERVICE)
3523 continue;
3524
3525 // Calculate fbo range for the stripe containing the given hwm
3526 if (fboHi == 0)
3527 {
3528 uint32_t range = fExtentMap[i].range.size * 1024;
3529 fboLo = it->second.hwm - (it->second.hwm % range);
3530 fboHi = fboLo + range - 1;
3531
3532 if (fboLo > 0)
3533 fboLoPreviousStripe = fboLo - range;
3534 }
3535
3536 // Delete, update, or ignore this extent:
3537 // Later partition:
3538 // case 1: extent in later partition than last extent, so delete
3539 // Same partition:
3540 // case 2: extent is in later stripe than last extent, so delete
3541 // case 3: extent is in earlier stripe in the same partition.
3542 // No action necessary for case3B and case3C.
3543 // case 3A: extent is in trailing segment in previous stripe.
3544 // This extent is now the last extent in that segment
3545 // file, so reset the local HWM if it was altered.
3546 // case 3B: extent in previous stripe but not a trailing segment
3547 // case 3C: extent is in stripe that precedes previous stripe
3548 // case 4: extent is in the same partition and stripe as the
3549 // last logical extent we are to keep.
3550 // case 4A: extent is in later segment so can be deleted
3551 // case 4B: extent is in earlier segment, reset HWM if changed
3552 // case 4C: this is last logical extent, reset HWM if changed
3553 // Earlier partition:
3554 // case 5: extent is in earlier parition, no action necessary
3555
3556 if (fExtentMap[i].partitionNum > it->second.partitionNum)
3557 {
3558 deleteExtent( i ); // case 1
3559 }
3560 else if (fExtentMap[i].partitionNum == it->second.partitionNum)
3561 {
3562 if (fExtentMap[i].blockOffset > fboHi)
3563 {
3564 deleteExtent( i ); // case 2
3565 }
3566 else if (fExtentMap[i].blockOffset < fboLo)
3567 {
3568 if (fExtentMap[i].blockOffset >= fboLoPreviousStripe)
3569 {
3570 if (fExtentMap[i].segmentNum > it->second.segmentNum)
3571 {
3572 if (fExtentMap[i].HWM != (fboLo - 1))
3573 {
3574 makeUndoRecord(&fExtentMap[i], sizeof(EMEntry));
3575 fExtentMap[i].HWM = fboLo - 1; //case 3A
3576 fExtentMap[i].status = EXTENTAVAILABLE;
3577 }
3578 }
3579 else
3580 {
3581 // not a trailing segment in prev stripe case 3B
3582 }
3583 }
3584 else
3585 {
3586 // extent precedes previous stripe case 3C
3587 }
3588 }
3589 else
3590 {
3591 // extent is in same stripe
3592 if (fExtentMap[i].segmentNum > it->second.segmentNum)
3593 {
3594 deleteExtent( i ); // case 4A
3595 }
3596 else if (fExtentMap[i].segmentNum < it->second.segmentNum)
3597 {
3598 if (fExtentMap[i].HWM != fboHi)
3599 {
3600 makeUndoRecord(&fExtentMap[i], sizeof(EMEntry));
3601 fExtentMap[i].HWM = fboHi; // case 4B
3602 fExtentMap[i].status = EXTENTAVAILABLE;
3603 }
3604 }
3605 else
3606 {
3607 // fExtentMap[i].segmentNum == segmentNum
3608 if (fExtentMap[i].HWM != it->second.hwm)
3609 {
3610 makeUndoRecord(&fExtentMap[i], sizeof(EMEntry));
3611 fExtentMap[i].HWM = it->second.hwm;// case 4C
3612 fExtentMap[i].status = EXTENTAVAILABLE;
3613 }
3614 }
3615 }
3616 }
3617 else
3618 {
3619 // extent in earlier partition; no action necessary case 5
3620 }
3621 } // extent map entry with matching oid
3622 }
3623 } // loop through the extent map
3624 }
3625
deleteEmptyDictStoreExtents(const ExtentsInfoMap_t & extentsInfo)3626 void ExtentMap::deleteEmptyDictStoreExtents(const ExtentsInfoMap_t& extentsInfo)
3627 {
3628 #ifdef BRM_INFO
3629
3630 if (fDebug)
3631 {
3632 TRACER_WRITELATER("deleteEmptyDictStoreExtents");
3633 TRACER_WRITE;
3634 }
3635
3636 #endif
3637
3638 grabEMEntryTable(WRITE);
3639 grabFreeList(WRITE);
3640
3641 ExtentsInfoMap_t::const_iterator it;
3642
3643 uint32_t fboLo = 0;
3644 uint32_t fboHi = 0;
3645
3646 int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
3647 it = extentsInfo.begin();
3648
3649 if ( it->second.newFile ) //The extent is the new extent
3650 {
3651 for (int i = 0; i < emEntries; i++)
3652 {
3653 if (fExtentMap[i].range.size != 0)
3654 {
3655 it = extentsInfo.find ( fExtentMap[i].fileID );
3656
3657 if ( it != extentsInfo.end() )
3658 {
3659 if ((fExtentMap[i].partitionNum == it->second.partitionNum)
3660 && (fExtentMap[i].segmentNum == it->second.segmentNum)
3661 && (fExtentMap[i].dbRoot == it->second.dbRoot) )
3662 deleteExtent( i );
3663 }
3664 }
3665 }
3666 }
3667 else //The extent is the old one
3668 {
3669
3670 for (int i = 0; i < emEntries; i++)
3671 {
3672 if (fExtentMap[i].range.size != 0)
3673 {
3674 it = extentsInfo.find ( fExtentMap[i].fileID );
3675
3676 if ( it != extentsInfo.end() )
3677 {
3678 // Don't rollback extents that are out of service
3679 if (fExtentMap[i].status == EXTENTOUTOFSERVICE)
3680 continue;
3681
3682 // Calculate fbo
3683 if (fboHi == 0)
3684 {
3685 uint32_t range = fExtentMap[i].range.size * 1024;
3686 fboLo = it->second.hwm - (it->second.hwm % range);
3687 fboHi = fboLo + range - 1;
3688 }
3689
3690 // Delete, update, or ignore this extent:
3691 // Later partition:
3692 // case 1: extent is in later partition, so delete the extent
3693 // Same partition:
3694 // case 2: extent is in partition and segment file of interest
3695 // case 2A: earlier extent in segment file; no action necessary
3696 // case 2B: specified HWM falls in this extent, so reset HWM
3697 // case 2C: later extent in segment file; so delete the extent
3698 // Earlier partition:
3699 // case 3: extent is in earlier parition, no action necessary
3700
3701 if (fExtentMap[i].partitionNum > it->second.partitionNum)
3702 {
3703 deleteExtent( i ); // case 1
3704 }
3705 else if (fExtentMap[i].partitionNum == it->second.partitionNum)
3706 {
3707 if ( fExtentMap[i].segmentNum == it->second.segmentNum)
3708 {
3709 if (fExtentMap[i].blockOffset < fboLo)
3710 {
3711 // no action necessary case 2A
3712 }
3713 else if (fExtentMap[i].blockOffset == fboLo)
3714 {
3715 if (fExtentMap[i].HWM != it->second.hwm)
3716 {
3717 makeUndoRecord(&fExtentMap[i], sizeof(EMEntry));
3718 fExtentMap[i].HWM = it->second.hwm;
3719 fExtentMap[i].status = EXTENTAVAILABLE;//case 2B
3720 }
3721 }
3722 else
3723 {
3724 deleteExtent( i ); // case 3C
3725 }
3726 }
3727 else
3728 {
3729 // no action necessary
3730 }
3731 }
3732 else
3733 {
3734 // extent in earlier partition; no action necessary case 4
3735 }
3736 } // extent map entry with matching oid
3737 }
3738 } // loop through the extent map
3739 }
3740 }
3741 //------------------------------------------------------------------------------
3742 // Delete all the extents for the specified OID
3743 //------------------------------------------------------------------------------
deleteOID(int OID)3744 void ExtentMap::deleteOID(int OID)
3745 {
3746 #ifdef BRM_INFO
3747
3748 if (fDebug)
3749 {
3750 TRACER_WRITELATER("deleteOID");
3751 TRACER_ADDINPUT(OID);
3752 TRACER_WRITE;
3753 }
3754
3755 #endif
3756
3757 bool OIDExists = false;
3758
3759 #ifdef BRM_DEBUG
3760
3761 if (OID < 0)
3762 {
3763 log("ExtentMap::deleteOID(): OID must be >= 0", logging::LOG_TYPE_DEBUG);
3764 throw invalid_argument("ExtentMap::deleteOID(): OID must be >= 0");
3765 }
3766
3767 #endif
3768
3769 grabEMEntryTable(WRITE);
3770 grabFreeList(WRITE);
3771
3772 int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
3773
3774 for (int emIndex = 0; emIndex < emEntries; emIndex++)
3775 {
3776
3777 if (fExtentMap[emIndex].range.size > 0 &&
3778 fExtentMap[emIndex].fileID == OID)
3779 {
3780 OIDExists = true;
3781
3782 deleteExtent( emIndex );
3783 }
3784 }
3785
3786 if (!OIDExists)
3787 {
3788 ostringstream oss;
3789 oss << "ExtentMap::deleteOID(): There are no extent entries for OID " << OID << endl;
3790 log(oss.str(), logging::LOG_TYPE_CRITICAL);
3791 throw invalid_argument(oss.str());
3792 }
3793 }
3794
3795
3796
3797 //------------------------------------------------------------------------------
3798 // Delete all the extents for the specified OIDs
3799 //------------------------------------------------------------------------------
deleteOIDs(const OidsMap_t & OIDs)3800 void ExtentMap::deleteOIDs(const OidsMap_t& OIDs)
3801 {
3802 #ifdef BRM_INFO
3803
3804 if (fDebug)
3805 {
3806 TRACER_WRITELATER("deleteOIDs");
3807 TRACER_WRITE;
3808 }
3809
3810 #endif
3811 grabEMEntryTable(WRITE);
3812 grabFreeList(WRITE);
3813 OidsMap_t::const_iterator it;
3814 int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
3815
3816 for (int emIndex = 0; emIndex < emEntries; emIndex++)
3817 {
3818 if (fExtentMap[emIndex].range.size > 0 )
3819 {
3820 it = OIDs.find ( fExtentMap[emIndex].fileID );
3821
3822 if ( it != OIDs.end() )
3823 deleteExtent( emIndex );
3824 }
3825 }
3826 }
3827
3828
3829 //------------------------------------------------------------------------------
3830 // Delete the specified extent from the extentmap and return to the free list.
3831 // emIndex - the index (from the extent map) of the extent to be deleted
3832 //------------------------------------------------------------------------------
deleteExtent(int emIndex)3833 void ExtentMap::deleteExtent(int emIndex)
3834 {
3835 int flIndex, freeFLIndex, flEntries, preceedingExtent, succeedingExtent;
3836 LBID_t flBlockEnd, emBlockEnd;
3837
3838 flEntries = fFLShminfo->allocdSize / sizeof(InlineLBIDRange);
3839
3840 emBlockEnd = fExtentMap[emIndex].range.start +
3841 (static_cast<LBID_t>(fExtentMap[emIndex].range.size) * 1024);
3842
3843 //scan the freelist to see where this entry fits in
3844 for (flIndex = 0, preceedingExtent = -1, succeedingExtent = -1, freeFLIndex = -1;
3845 flIndex < flEntries; flIndex++)
3846 {
3847 if (fFreeList[flIndex].size == 0)
3848 freeFLIndex = flIndex;
3849 else
3850 {
3851 flBlockEnd = fFreeList[flIndex].start +
3852 (static_cast<LBID_t>(fFreeList[flIndex].size) * 1024);
3853
3854 if (emBlockEnd == fFreeList[flIndex].start)
3855 succeedingExtent = flIndex;
3856 else if (flBlockEnd == fExtentMap[emIndex].range.start)
3857 preceedingExtent = flIndex;
3858 }
3859 }
3860
3861 //update the freelist
3862
3863 //this space is in between 2 blocks in the FL
3864 if (preceedingExtent != -1 && succeedingExtent != -1)
3865 {
3866 makeUndoRecord(&fFreeList[preceedingExtent], sizeof(InlineLBIDRange));
3867
3868 // migrate the entry upward if there's a space
3869 if (freeFLIndex < preceedingExtent && freeFLIndex != -1)
3870 {
3871 makeUndoRecord(&fFreeList[freeFLIndex], sizeof(InlineLBIDRange));
3872 memcpy(&fFreeList[freeFLIndex], &fFreeList[preceedingExtent], sizeof(InlineLBIDRange));
3873 fFreeList[preceedingExtent].size = 0;
3874 preceedingExtent = freeFLIndex;
3875 }
3876
3877 fFreeList[preceedingExtent].size += fFreeList[succeedingExtent].size +
3878 fExtentMap[emIndex].range.size;
3879 makeUndoRecord(&fFreeList[succeedingExtent], sizeof(InlineLBIDRange));
3880 fFreeList[succeedingExtent].size = 0;
3881 makeUndoRecord(fFLShminfo, sizeof(MSTEntry));
3882 fFLShminfo->currentSize -= sizeof(InlineLBIDRange);
3883 }
3884
3885 //this space has a free block at the end
3886 else if (succeedingExtent != -1)
3887 {
3888 makeUndoRecord(&fFreeList[succeedingExtent], sizeof(InlineLBIDRange));
3889
3890 // migrate the entry upward if there's a space
3891 if (freeFLIndex < succeedingExtent && freeFLIndex != -1)
3892 {
3893 makeUndoRecord(&fFreeList[freeFLIndex], sizeof(InlineLBIDRange));
3894 memcpy(&fFreeList[freeFLIndex], &fFreeList[succeedingExtent], sizeof(InlineLBIDRange));
3895 fFreeList[succeedingExtent].size = 0;
3896 succeedingExtent = freeFLIndex;
3897 }
3898
3899 fFreeList[succeedingExtent].start = fExtentMap[emIndex].range.start;
3900 fFreeList[succeedingExtent].size += fExtentMap[emIndex].range.size;
3901 }
3902
3903 //this space has a free block at the beginning
3904 else if (preceedingExtent != -1)
3905 {
3906 makeUndoRecord(&fFreeList[preceedingExtent], sizeof(InlineLBIDRange));
3907
3908 // migrate the entry upward if there's a space
3909 if (freeFLIndex < preceedingExtent && freeFLIndex != -1)
3910 {
3911 makeUndoRecord(&fFreeList[freeFLIndex], sizeof(InlineLBIDRange));
3912 memcpy(&fFreeList[freeFLIndex], &fFreeList[preceedingExtent], sizeof(InlineLBIDRange));
3913 fFreeList[preceedingExtent].size = 0;
3914 preceedingExtent = freeFLIndex;
3915 }
3916
3917 fFreeList[preceedingExtent].size += fExtentMap[emIndex].range.size;
3918 }
3919
3920 //the freelist has no adjacent blocks, so make a new entry
3921 else
3922 {
3923 if (fFLShminfo->currentSize == fFLShminfo->allocdSize)
3924 {
3925 growFLShmseg();
3926 #ifdef BRM_DEBUG
3927
3928 if (freeFLIndex != -1)
3929 {
3930 log("ExtentMap::deleteOID(): found a free FL entry in a supposedly full shmseg", logging::LOG_TYPE_DEBUG);
3931 throw logic_error("ExtentMap::deleteOID(): found a free FL entry in a supposedly full shmseg");
3932 }
3933
3934 #endif
3935 freeFLIndex = flEntries; // happens to be the right index
3936 flEntries = fFLShminfo->allocdSize / sizeof(InlineLBIDRange);
3937 }
3938
3939 #ifdef BRM_DEBUG
3940
3941 if (freeFLIndex == -1)
3942 {
3943 log("ExtentMap::deleteOID(): no available free list entries?", logging::LOG_TYPE_DEBUG);
3944 throw logic_error("ExtentMap::deleteOID(): no available free list entries?");
3945 }
3946
3947 #endif
3948 makeUndoRecord(&fFreeList[freeFLIndex], sizeof(InlineLBIDRange));
3949 fFreeList[freeFLIndex].start = fExtentMap[emIndex].range.start;
3950 fFreeList[freeFLIndex].size = fExtentMap[emIndex].range.size;
3951 makeUndoRecord(&fFLShminfo, sizeof(MSTEntry));
3952 fFLShminfo->currentSize += sizeof(InlineLBIDRange);
3953 }
3954
3955 //invalidate the entry in the Extent Map
3956 makeUndoRecord(&fExtentMap[emIndex], sizeof(EMEntry));
3957 fExtentMap[emIndex].range.size = 0;
3958 makeUndoRecord(&fEMShminfo, sizeof(MSTEntry));
3959 fEMShminfo->currentSize -= sizeof(struct EMEntry);
3960 }
3961
3962 //------------------------------------------------------------------------------
3963 // Returns the last local HWM for the specified OID for the given DBroot.
3964 // Also returns the DBRoot, and partition, and segment numbers for the relevant
3965 // segment file. Technically, this function finds the "last" extent for the
3966 // specified OID, and returns the HWM for that extent. It is assumed that the
3967 // HWM for the segment file containing this "last" extent, has been stored in
3968 // that extent's hwm; and that the hwm is not still hanging around in a previous
3969 // extent for the same segment file.
3970 // If no available or outOfService extent is found, then bFound is returned
3971 // as false.
3972 //------------------------------------------------------------------------------
getLastHWM_DBroot(int OID,uint16_t dbRoot,uint32_t & partitionNum,uint16_t & segmentNum,int & status,bool & bFound)3973 HWM_t ExtentMap::getLastHWM_DBroot(int OID, uint16_t dbRoot,
3974 uint32_t& partitionNum, uint16_t& segmentNum, int& status, bool& bFound)
3975 {
3976 #ifdef BRM_INFO
3977
3978 if (fDebug)
3979 {
3980 TRACER_WRITELATER("getLastHWM_DBroot");
3981 TRACER_ADDINPUT(OID);
3982 TRACER_ADDSHORTINPUT(dbRoot);
3983 TRACER_ADDOUTPUT(partitionNum);
3984 TRACER_ADDSHORTOUTPUT(segmentNum);
3985 TRACER_ADDOUTPUT(status);
3986 TRACER_WRITE;
3987 }
3988
3989 #endif
3990
3991 uint32_t lastExtent = 0;
3992 int lastExtentIndex = -1;
3993 partitionNum = 0;
3994 segmentNum = 0;
3995 HWM_t hwm = 0;
3996 bFound = false;
3997
3998 if (OID < 0)
3999 {
4000 ostringstream oss;
4001 oss << "ExtentMap::getLastHWM_DBroot(): invalid OID requested: " << OID;
4002 log(oss.str(), logging::LOG_TYPE_CRITICAL);
4003 throw invalid_argument(oss.str());
4004 }
4005
4006 grabEMEntryTable(READ);
4007
4008 // Searching the array in reverse order should be faster since the last
4009 // extent is usually at the bottom. We still have to search the entire
4010 // array (just in case), but the number of operations per loop iteration
4011 // will be less.
4012 int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
4013
4014 for (int i = emEntries - 1; i >= 0; i--)
4015 {
4016 if ((fExtentMap[i].range.size != 0) &&
4017 (fExtentMap[i].fileID == OID) &&
4018 (fExtentMap[i].dbRoot == dbRoot) &&
4019 ((fExtentMap[i].status == EXTENTAVAILABLE) ||
4020 (fExtentMap[i].status == EXTENTOUTOFSERVICE)))
4021 {
4022 if ( (fExtentMap[i].partitionNum > partitionNum) ||
4023 ((fExtentMap[i].partitionNum == partitionNum) &&
4024 (fExtentMap[i].blockOffset > lastExtent)) ||
4025 ((fExtentMap[i].partitionNum == partitionNum) &&
4026 (fExtentMap[i].blockOffset == lastExtent) &&
4027 (fExtentMap[i].segmentNum >= segmentNum)) )
4028 {
4029 lastExtent = fExtentMap[i].blockOffset;
4030 partitionNum = fExtentMap[i].partitionNum;
4031 segmentNum = fExtentMap[i].segmentNum;
4032 lastExtentIndex = i;
4033 }
4034 }
4035 }
4036
4037 // save additional information before we release the read-lock
4038 if (lastExtentIndex != -1)
4039 {
4040 hwm = fExtentMap[lastExtentIndex].HWM;
4041 status = fExtentMap[lastExtentIndex].status;
4042 bFound = true;
4043 }
4044
4045 releaseEMEntryTable(READ);
4046
4047 return hwm;
4048 }
4049
4050 //------------------------------------------------------------------------------
4051 // For the specified OID and PM number, this function will return a vector
4052 // of objects carrying HWM info (for the last segment file) and block count
4053 // information about each DBRoot assigned to the specified PM.
4054 //------------------------------------------------------------------------------
getDbRootHWMInfo(int OID,uint16_t pmNumber,EmDbRootHWMInfo_v & emDbRootHwmInfos)4055 void ExtentMap::getDbRootHWMInfo(int OID, uint16_t pmNumber,
4056 EmDbRootHWMInfo_v& emDbRootHwmInfos)
4057 {
4058 #ifdef BRM_INFO
4059
4060 if (fDebug)
4061 {
4062 TRACER_WRITELATER("getDbRootHWMInfo");
4063 TRACER_ADDINPUT(OID);
4064 TRACER_ADDSHORTINPUT(pmNumber);
4065 TRACER_WRITE;
4066 }
4067
4068 #endif
4069
4070 if (OID < 0)
4071 {
4072 ostringstream oss;
4073 oss << "ExtentMap::getDbRootHWMInfo(): invalid OID requested: " << OID;
4074 log(oss.str(), logging::LOG_TYPE_CRITICAL);
4075 throw invalid_argument(oss.str());
4076 }
4077
4078 // Determine List of DBRoots for specified PM, and construct map of
4079 // EmDbRootHWMInfo objects.
4080 tr1::unordered_map<uint16_t, EmDbRootHWMInfo> emDbRootMap;
4081 vector<int> dbRootList;
4082 getPmDbRoots( pmNumber, dbRootList );
4083
4084 if ( dbRootList.size() > 0 )
4085 {
4086 for (unsigned int iroot = 0; iroot < dbRootList.size(); iroot++)
4087 {
4088 uint16_t rootID = dbRootList[iroot];
4089 EmDbRootHWMInfo emDbRootInfo(rootID);
4090 emDbRootMap[rootID] = emDbRootInfo;
4091 }
4092 }
4093 else
4094 {
4095 ostringstream oss;
4096 oss << "ExtentMap::getDbRootHWMInfo(): "
4097 "There are no DBRoots for OID " << OID <<
4098 " and PM " << pmNumber << endl;
4099 log(oss.str(), logging::LOG_TYPE_CRITICAL);
4100 throw invalid_argument(oss.str());
4101 }
4102
4103 grabEMEntryTable(READ);
4104 tr1::unordered_map<uint16_t, EmDbRootHWMInfo>::iterator emIter;
4105
4106 // Searching the array in reverse order should be faster since the last
4107 // extent is usually at the bottom. We still have to search the entire
4108 // array (just in case), but the number of operations per loop iteration
4109 // will be less.
4110 int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
4111
4112 for (int i = emEntries - 1; i >= 0; i--)
4113 {
4114 if ((fExtentMap[i].range.size != 0) &&
4115 (fExtentMap[i].fileID == OID))
4116 {
4117
4118 // Include this extent in the search, only if the extent's
4119 // DBRoot falls in the list of DBRoots for this PM.
4120 emIter = emDbRootMap.find( fExtentMap[i].dbRoot );
4121
4122 if (emIter == emDbRootMap.end())
4123 continue;
4124
4125 EmDbRootHWMInfo& emDbRoot = emIter->second;
4126
4127 if ((fExtentMap[i].status != EXTENTOUTOFSERVICE) &&
4128 (fExtentMap[i].HWM != 0))
4129 emDbRoot.totalBlocks += (fExtentMap[i].HWM + 1);
4130
4131 if ( (fExtentMap[i].partitionNum > emDbRoot.partitionNum) ||
4132 ((fExtentMap[i].partitionNum == emDbRoot.partitionNum) &&
4133 (fExtentMap[i].blockOffset > emDbRoot.fbo)) ||
4134 ((fExtentMap[i].partitionNum == emDbRoot.partitionNum) &&
4135 (fExtentMap[i].blockOffset == emDbRoot.fbo) &&
4136 (fExtentMap[i].segmentNum >= emDbRoot.segmentNum)) )
4137 {
4138 emDbRoot.fbo = fExtentMap[i].blockOffset;
4139 emDbRoot.partitionNum = fExtentMap[i].partitionNum;
4140 emDbRoot.segmentNum = fExtentMap[i].segmentNum;
4141 emDbRoot.localHWM = fExtentMap[i].HWM;
4142 emDbRoot.startLbid = fExtentMap[i].range.start;
4143 emDbRoot.status = fExtentMap[i].status;
4144 emDbRoot.hwmExtentIndex = i;
4145 }
4146 }
4147 }
4148
4149 releaseEMEntryTable(READ);
4150
4151 for (tr1::unordered_map<uint16_t, EmDbRootHWMInfo>::iterator iter =
4152 emDbRootMap.begin(); iter != emDbRootMap.end(); ++iter)
4153 {
4154 EmDbRootHWMInfo& emDbRoot = iter->second;
4155
4156 if (emDbRoot.hwmExtentIndex != -1)
4157 {
4158 // @bug 5349: make sure HWM extent for each DBRoot is AVAILABLE
4159 if (emDbRoot.status == EXTENTUNAVAILABLE)
4160 {
4161 ostringstream oss;
4162 oss << "ExtentMap::getDbRootHWMInfo(): " <<
4163 "OID " << OID <<
4164 " has HWM extent that is UNAVAILABLE for " <<
4165 "DBRoot" << emDbRoot.dbRoot <<
4166 "; part#: " << emDbRoot.partitionNum <<
4167 ", seg#: " << emDbRoot.segmentNum <<
4168 ", fbo: " << emDbRoot.fbo <<
4169 ", localHWM: " << emDbRoot.localHWM <<
4170 ", lbid: " << emDbRoot.startLbid << endl;
4171 log(oss.str(), logging::LOG_TYPE_CRITICAL);
4172 throw runtime_error(oss.str());
4173 }
4174
4175 // In the loop above we ignored "all" the extents with HWM of 0,
4176 // which is okay most of the time, because each segment file's HWM
4177 // is carried in the last extent only. BUT if we have a segment
4178 // file with HWM=0, having a single extent and a single block at
4179 // the "end" of the data, we still need to account for this last
4180 // block. So we increment the block count for this isolated case.
4181 if ((emDbRoot.localHWM == 0) &&
4182 (emDbRoot.status == EXTENTAVAILABLE))
4183 {
4184 emDbRoot.totalBlocks++;
4185 }
4186 }
4187 }
4188
4189 // Copy internal map to the output vector argument
4190 for (tr1::unordered_map<uint16_t, EmDbRootHWMInfo>::iterator iter =
4191 emDbRootMap.begin(); iter != emDbRootMap.end(); ++iter)
4192 {
4193 emDbRootHwmInfos.push_back( iter->second );
4194 }
4195 }
4196
4197 //------------------------------------------------------------------------------
4198 // Return the existence (bFound) and state (status) for the segment file
4199 // containing the extents for the specified OID, partition, and segment.
4200 // If no extents are found, no exception is thrown. We instead just return
4201 // bFound=false, so that the application can take the necessary action.
4202 // The value returned in the "status" variable is based on the first extent
4203 // found, since all the extents in a segment file should have the same state.
4204 //------------------------------------------------------------------------------
getExtentState(int OID,uint32_t partitionNum,uint16_t segmentNum,bool & bFound,int & status)4205 void ExtentMap::getExtentState(int OID, uint32_t partitionNum,
4206 uint16_t segmentNum, bool& bFound, int& status)
4207 {
4208 #ifdef BRM_INFO
4209
4210 if (fDebug)
4211 {
4212 TRACER_WRITELATER("getExtentState");
4213 TRACER_ADDINPUT(OID);
4214 TRACER_ADDINPUT(partitionNum);
4215 TRACER_ADDSHORTINPUT(segmentNum);
4216 TRACER_ADDOUTPUT(status);
4217 TRACER_WRITE;
4218 }
4219
4220 #endif
4221 int i, emEntries;
4222 bFound = false;
4223 status = EXTENTAVAILABLE;
4224
4225 if (OID < 0)
4226 {
4227 ostringstream oss;
4228 oss << "ExtentMap::getExtentState(): invalid OID requested: " << OID;
4229 log(oss.str(), logging::LOG_TYPE_CRITICAL);
4230 throw invalid_argument(oss.str());
4231 }
4232
4233 grabEMEntryTable(READ);
4234
4235 emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
4236
4237 for (i = 0; i < emEntries; i++)
4238 {
4239 if ((fExtentMap[i].range.size != 0) &&
4240 (fExtentMap[i].fileID == OID) &&
4241 (fExtentMap[i].partitionNum == partitionNum) &&
4242 (fExtentMap[i].segmentNum == segmentNum))
4243 {
4244 bFound = true;
4245 status = fExtentMap[i].status;
4246 break;
4247 }
4248 }
4249
4250 releaseEMEntryTable(READ);
4251 }
4252
4253 //------------------------------------------------------------------------------
4254 // Returns the HWM for the specified OID, partition, and segment numbers.
4255 // Used to get the HWM for a specific column or dictionary store segment file.
4256 //------------------------------------------------------------------------------
getLocalHWM(int OID,uint32_t partitionNum,uint16_t segmentNum,int & status)4257 HWM_t ExtentMap::getLocalHWM(int OID, uint32_t partitionNum,
4258 uint16_t segmentNum, int& status)
4259 {
4260 #ifdef BRM_INFO
4261
4262 if (fDebug)
4263 {
4264 TRACER_WRITELATER("getLocalHWM");
4265 TRACER_ADDINPUT(OID);
4266 TRACER_ADDINPUT(partitionNum);
4267 TRACER_ADDSHORTINPUT(segmentNum);
4268 TRACER_ADDOUTPUT(status);
4269 TRACER_WRITE;
4270 }
4271
4272 #endif
4273
4274 #ifdef EM_AS_A_TABLE_POC__
4275
4276 if (OID == 1084)
4277 {
4278 return 0;
4279 }
4280
4281 #endif
4282
4283 int i, emEntries;
4284 HWM_t ret = 0;
4285 bool OIDPartSegExists = false;
4286
4287 if (OID < 0)
4288 {
4289 ostringstream oss;
4290 oss << "ExtentMap::getLocalHWM(): invalid OID requested: " << OID;
4291 log(oss.str(), logging::LOG_TYPE_CRITICAL);
4292 throw invalid_argument(oss.str());
4293 }
4294
4295 grabEMEntryTable(READ);
4296
4297 emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
4298
4299 for (i = 0; i < emEntries; i++)
4300 {
4301 if ((fExtentMap[i].range.size != 0) &&
4302 (fExtentMap[i].fileID == OID) &&
4303 (fExtentMap[i].partitionNum == partitionNum) &&
4304 (fExtentMap[i].segmentNum == segmentNum))
4305 {
4306 OIDPartSegExists = true;
4307 status = fExtentMap[i].status;
4308
4309 if (fExtentMap[i].HWM != 0)
4310 {
4311 ret = fExtentMap[i].HWM;
4312 releaseEMEntryTable(READ);
4313 return ret;
4314 }
4315 }
4316 }
4317
4318 releaseEMEntryTable(READ);
4319
4320 if (OIDPartSegExists)
4321 return 0;
4322 else
4323 {
4324 ostringstream oss;
4325 oss << "ExtentMap::getLocalHWM(): There are no extent entries for OID " <<
4326 OID << "; partition " << partitionNum << "; segment " <<
4327 segmentNum << endl;
4328 log(oss.str(), logging::LOG_TYPE_CRITICAL);
4329 throw invalid_argument(oss.str());
4330 }
4331 }
4332
4333 //------------------------------------------------------------------------------
4334 // Sets the HWM for the specified OID, partition, and segment number.
4335 // In addition, the HWM for the old HWM extent (for this segment file),
4336 // is set to 0, so that the latest HWM is only carried in the last extent
4337 // (per segment file).
4338 // Used for dictionary or column OIDs to set the HWM for specific segment file.
4339 //------------------------------------------------------------------------------
setLocalHWM(int OID,uint32_t partitionNum,uint16_t segmentNum,HWM_t newHWM,bool firstNode,bool uselock)4340 void ExtentMap::setLocalHWM(int OID, uint32_t partitionNum,
4341 uint16_t segmentNum, HWM_t newHWM, bool firstNode, bool uselock)
4342 {
4343 #ifdef BRM_INFO
4344
4345 if (fDebug)
4346 {
4347 TRACER_WRITELATER("setLocalHWM");
4348 TRACER_ADDINPUT(OID);
4349 TRACER_ADDINPUT(partitionNum);
4350 TRACER_ADDSHORTINPUT(segmentNum);
4351 TRACER_ADDINPUT(newHWM);
4352 TRACER_WRITE;
4353 }
4354
4355 bool addedAnExtent = false;
4356
4357 if (OID < 0)
4358 {
4359 log("ExtentMap::setLocalHWM(): OID must be >= 0",
4360 logging::LOG_TYPE_DEBUG);
4361 throw invalid_argument(
4362 "ExtentMap::setLocalHWM(): OID must be >= 0");
4363 }
4364
4365 #endif
4366
4367 int lastExtentIndex = -1;
4368 int oldHWMExtentIndex = -1;
4369 uint32_t highestOffset = 0;
4370
4371 if (uselock)
4372 grabEMEntryTable(WRITE);
4373
4374 int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
4375
4376 for (int i = 0; i < emEntries; i++)
4377 {
4378 if ((fExtentMap[i].range.size != 0) &&
4379 (fExtentMap[i].fileID == OID) &&
4380 (fExtentMap[i].partitionNum == partitionNum) &&
4381 (fExtentMap[i].segmentNum == segmentNum))
4382 {
4383
4384 // Find current HWM extent
4385 if (fExtentMap[i].blockOffset >= highestOffset)
4386 {
4387 highestOffset = fExtentMap[i].blockOffset;
4388 lastExtentIndex = i;
4389 }
4390
4391 // Find previous HWM extent
4392 if (fExtentMap[i].HWM != 0)
4393 {
4394 oldHWMExtentIndex = i;
4395 }
4396 }
4397 }
4398
4399 if (lastExtentIndex == -1)
4400 {
4401 ostringstream oss;
4402 oss << "ExtentMap::setLocalHWM(): Bad OID/partition/segment argument; "
4403 "no extent entries for OID " << OID << "; partition " <<
4404 partitionNum << "; segment " << segmentNum << endl;
4405 log(oss.str(), logging::LOG_TYPE_CRITICAL);
4406 throw invalid_argument(oss.str());
4407 }
4408
4409 if (newHWM >= (fExtentMap[lastExtentIndex].blockOffset +
4410 fExtentMap[lastExtentIndex].range.size * 1024))
4411 {
4412 ostringstream oss;
4413 oss << "ExtentMap::setLocalHWM(): "
4414 "new HWM is past the end of the file for OID " << OID << "; partition " <<
4415 partitionNum << "; segment " << segmentNum << "; HWM " << newHWM;
4416 log(oss.str(), logging::LOG_TYPE_DEBUG);
4417 throw invalid_argument(oss.str());
4418 }
4419
4420 // Save HWM in last extent for this segment file; and mark as AVAILABLE
4421 makeUndoRecord(&fExtentMap[lastExtentIndex], sizeof(EMEntry));
4422 fExtentMap[lastExtentIndex].HWM = newHWM;
4423 fExtentMap[lastExtentIndex].status = EXTENTAVAILABLE;
4424
4425 // Reset HWM in old HWM extent to 0
4426 if ((oldHWMExtentIndex != -1) && (oldHWMExtentIndex != lastExtentIndex))
4427 {
4428 makeUndoRecord(&fExtentMap[oldHWMExtentIndex], sizeof(EMEntry));
4429 fExtentMap[oldHWMExtentIndex].HWM = 0;
4430 #ifdef BRM_INFO
4431 addedAnExtent = true;
4432 #endif
4433 }
4434
4435 #ifdef BRM_INFO
4436
4437 if (firstNode)
4438 {
4439 ostringstream os;
4440 os << "ExtentMap::setLocalHWM(): firstLBID=" << fExtentMap[lastExtentIndex].range.start <<
4441 " lastLBID=" << fExtentMap[lastExtentIndex].range.start +
4442 fExtentMap[lastExtentIndex].range.size * 1024 - 1 << " newHWM=" << fExtentMap[lastExtentIndex].HWM
4443 << " min=" << fExtentMap[lastExtentIndex].partition.cprange.lo_val << " max=" <<
4444 fExtentMap[lastExtentIndex].partition.cprange.hi_val << " seq=" <<
4445 fExtentMap[lastExtentIndex].partition.cprange.sequenceNum << " status=";
4446
4447 switch (fExtentMap[lastExtentIndex].partition.cprange.isValid)
4448 {
4449 case CP_INVALID:
4450 os << "invalid.";
4451 break;
4452
4453 case CP_UPDATING:
4454 os << "updating.";
4455 break;
4456
4457 case CP_VALID:
4458 os << "valid.";
4459 break;
4460
4461 default:
4462 os << "unknown(!!).";
4463 break;
4464 }
4465
4466 if (addedAnExtent)
4467 os << " Data extended into a new extent.";
4468
4469 log(os.str(), logging::LOG_TYPE_DEBUG);
4470 }
4471
4472 #endif
4473 }
4474
bulkSetHWM(const vector<BulkSetHWMArg> & v,bool firstNode)4475 void ExtentMap::bulkSetHWM(const vector<BulkSetHWMArg>& v, bool firstNode)
4476 {
4477 grabEMEntryTable(WRITE);
4478
4479 for (uint32_t i = 0; i < v.size(); i++)
4480 setLocalHWM(v[i].oid, v[i].partNum, v[i].segNum, v[i].hwm, firstNode, false);
4481 }
4482
4483 class BUHasher
4484 {
4485 public:
operator ()(const BulkUpdateDBRootArg & b) const4486 inline uint64_t operator()(const BulkUpdateDBRootArg& b) const
4487 {
4488 return b.startLBID;
4489 }
4490 };
4491
4492 class BUEqual
4493 {
4494 public:
operator ()(const BulkUpdateDBRootArg & b1,const BulkUpdateDBRootArg & b2) const4495 inline bool operator()(const BulkUpdateDBRootArg& b1, const BulkUpdateDBRootArg& b2) const
4496 {
4497 return b1.startLBID == b2.startLBID;
4498 }
4499 };
4500
bulkUpdateDBRoot(const vector<BulkUpdateDBRootArg> & args)4501 void ExtentMap::bulkUpdateDBRoot(const vector<BulkUpdateDBRootArg>& args)
4502 {
4503 tr1::unordered_set<BulkUpdateDBRootArg, BUHasher, BUEqual> sArgs;
4504 tr1::unordered_set<BulkUpdateDBRootArg, BUHasher, BUEqual>::iterator sit;
4505 BulkUpdateDBRootArg key;
4506 int emEntries;
4507
4508 for (uint32_t i = 0; i < args.size(); i++)
4509 sArgs.insert(args[i]);
4510
4511 grabEMEntryTable(WRITE);
4512
4513 emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
4514
4515 for (int i = 0; i < emEntries; i++)
4516 {
4517 key.startLBID = fExtentMap[i].range.start;
4518 sit = sArgs.find(key);
4519
4520 if (sit != sArgs.end())
4521 fExtentMap[i].dbRoot = sit->dbRoot;
4522 }
4523 }
4524
getExtents(int OID,vector<struct EMEntry> & entries,bool sorted,bool notFoundErr,bool incOutOfService)4525 void ExtentMap::getExtents(int OID, vector<struct EMEntry>& entries,
4526 bool sorted, bool notFoundErr, bool incOutOfService)
4527 {
4528 #ifdef BRM_INFO
4529
4530 if (fDebug)
4531 {
4532 TRACER_WRITELATER("getExtents");
4533 TRACER_ADDINPUT(OID);
4534 TRACER_WRITE;
4535 }
4536
4537 #endif
4538 int i, emEntries;
4539
4540 entries.clear();
4541
4542 if (OID < 0)
4543 {
4544 ostringstream oss;
4545 oss << "ExtentMap::getExtents(): invalid OID requested: " << OID;
4546 log(oss.str(), logging::LOG_TYPE_CRITICAL);
4547 throw invalid_argument(oss.str());
4548 }
4549
4550 grabEMEntryTable(READ);
4551 emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
4552 // Pre-expand entries to stop lots of small allocs
4553 entries.reserve(emEntries);
4554
4555 if (incOutOfService)
4556 {
4557 for (i = 0 ; i < emEntries; i++)
4558 if ((fExtentMap[i].fileID == OID) &&
4559 (fExtentMap[i].range.size != 0))
4560 entries.push_back(fExtentMap[i]);
4561 }
4562 else
4563 {
4564 for (i = 0 ; i < emEntries; i++)
4565 if ((fExtentMap[i].fileID == OID) &&
4566 (fExtentMap[i].range.size != 0) &&
4567 (fExtentMap[i].status != EXTENTOUTOFSERVICE))
4568 entries.push_back(fExtentMap[i]);
4569 }
4570
4571 releaseEMEntryTable(READ);
4572
4573 if (sorted)
4574 sort<vector<struct EMEntry>::iterator>(entries.begin(), entries.end());
4575 }
4576
getExtents_dbroot(int OID,vector<struct EMEntry> & entries,const uint16_t dbroot)4577 void ExtentMap::getExtents_dbroot(int OID, vector<struct EMEntry>& entries, const uint16_t dbroot)
4578 {
4579 #ifdef BRM_INFO
4580
4581 if (fDebug)
4582 {
4583 TRACER_WRITELATER("getExtents");
4584 TRACER_ADDINPUT(OID);
4585 TRACER_WRITE;
4586 }
4587
4588 #endif
4589
4590 #ifdef EM_AS_A_TABLE_POC__
4591
4592 if (OID == 1084)
4593 {
4594 EMEntry fakeEntry;
4595 fakeEntry.range.start = (1LL << 54);
4596 fakeEntry.range.size = 4;
4597 fakeEntry.fileID = 1084;
4598 fakeEntry.blockOffset = 0;
4599 fakeEntry.HWM = 1;
4600 fakeEntry.partitionNum = 0;
4601 fakeEntry.segmentNum = 0;
4602 fakeEntry.dbRoot = 1;
4603 fakeEntry.colWid = 4;
4604 fakeEntry.status = EXTENTAVAILABLE;
4605 fakeEntry.partition.cprange.hi_val = numeric_limits<int64_t>::min() + 2;
4606 fakeEntry.partition.cprange.lo_val = numeric_limits<int64_t>::max();
4607 fakeEntry.partition.cprange.sequenceNum = 0;
4608 fakeEntry.partition.cprange.isValid = CP_INVALID;
4609 entries.push_back(fakeEntry);
4610 return;
4611 }
4612
4613 #endif
4614
4615 int i, emEntries;
4616
4617 entries.clear();
4618
4619 if (OID < 0)
4620 {
4621 ostringstream oss;
4622 oss << "ExtentMap::getExtents(): invalid OID requested: " << OID;
4623 log(oss.str(), logging::LOG_TYPE_CRITICAL);
4624 throw invalid_argument(oss.str());
4625 }
4626
4627 grabEMEntryTable(READ);
4628 emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
4629
4630 for (i = 0 ; i < emEntries; i++)
4631 if ((fExtentMap[i].fileID == OID) &&
4632 (fExtentMap[i].range.size != 0) && (fExtentMap[i].dbRoot == dbroot))
4633 entries.push_back(fExtentMap[i]);
4634
4635 releaseEMEntryTable(READ);
4636 }
4637
4638 //------------------------------------------------------------------------------
4639 // Get the number of extents for the specified OID and DBRoot.
4640 // OutOfService extents are included/excluded depending on the
4641 // value of the incOutOfService flag.
4642 //------------------------------------------------------------------------------
getExtentCount_dbroot(int OID,uint16_t dbroot,bool incOutOfService,uint64_t & numExtents)4643 void ExtentMap::getExtentCount_dbroot(int OID, uint16_t dbroot,
4644 bool incOutOfService, uint64_t& numExtents)
4645 {
4646 int i, emEntries;
4647
4648 if (OID < 0)
4649 {
4650 ostringstream oss;
4651 oss << "ExtentMap::getExtentsCount_dbroot(): invalid OID requested: " <<
4652 OID;
4653 log(oss.str(), logging::LOG_TYPE_CRITICAL);
4654 throw invalid_argument(oss.str());
4655 }
4656
4657 grabEMEntryTable(READ);
4658 emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
4659
4660 numExtents = 0;
4661
4662 if (incOutOfService)
4663 {
4664 for (i = 0 ; i < emEntries; i++)
4665 {
4666 if ((fExtentMap[i].fileID == OID) &&
4667 (fExtentMap[i].range.size != 0) &&
4668 (fExtentMap[i].dbRoot == dbroot))
4669 numExtents++;
4670 }
4671 }
4672 else
4673 {
4674 for (i = 0 ; i < emEntries; i++)
4675 {
4676 if ((fExtentMap[i].fileID == OID) &&
4677 (fExtentMap[i].range.size != 0) &&
4678 (fExtentMap[i].dbRoot == dbroot) &&
4679 (fExtentMap[i].status != EXTENTOUTOFSERVICE))
4680 numExtents++;
4681 }
4682 }
4683
4684 releaseEMEntryTable(READ);
4685 }
4686
4687 //------------------------------------------------------------------------------
4688 // Gets the DBRoot for the specified system catalog OID.
4689 // Function assumes the specified System Catalog OID is fully contained on
4690 // a single DBRoot, as the function only searches for and returns the first
4691 // DBRoot entry that is found in the extent map.
4692 //------------------------------------------------------------------------------
getSysCatDBRoot(OID_t oid,uint16_t & dbRoot)4693 void ExtentMap::getSysCatDBRoot(OID_t oid, uint16_t& dbRoot)
4694 {
4695 #ifdef BRM_INFO
4696
4697 if (fDebug)
4698 {
4699 TRACER_WRITELATER("getSysCatDBRoot");
4700 TRACER_ADDINPUT(oid);
4701 TRACER_ADDSHORTOUTPUT(dbRoot);
4702 TRACER_WRITE;
4703 }
4704
4705 #endif
4706
4707 bool bFound = false;
4708 grabEMEntryTable(READ);
4709 int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
4710
4711 for (int i = 0 ; i < emEntries; i++)
4712 {
4713 if ((fExtentMap[i].range.size != 0) &&
4714 (fExtentMap[i].fileID == oid))
4715 {
4716 dbRoot = fExtentMap[i].dbRoot;
4717 bFound = true;
4718 break;
4719 }
4720 }
4721
4722 releaseEMEntryTable(READ);
4723
4724 if (!bFound)
4725 {
4726 ostringstream oss;
4727 oss << "ExtentMap::getSysCatDBRoot(): OID not found: " << oid;
4728 log(oss.str(), logging::LOG_TYPE_WARNING);
4729 throw logic_error(oss.str());
4730 }
4731 }
4732
4733 //------------------------------------------------------------------------------
4734 // Delete all extents for the specified OID(s) and partition number.
4735 // @bug 5237 - Removed restriction that prevented deletion of segment files in
4736 // the last partition (for a DBRoot).
4737 //------------------------------------------------------------------------------
deletePartition(const set<OID_t> & oids,const set<LogicalPartition> & partitionNums,string & emsg)4738 void ExtentMap::deletePartition(const set<OID_t>& oids,
4739 const set<LogicalPartition>& partitionNums, string& emsg)
4740 {
4741 #ifdef BRM_INFO
4742
4743 if (fDebug)
4744 {
4745 TRACER_WRITENOW("deletePartition");
4746 ostringstream oss;
4747 set<LogicalPartition>::const_iterator partIt;
4748 oss << "partitionNums: ";
4749 for (partIt=partitionNums.begin(); partIt!=partitionNums.end(); ++partIt)
4750 oss << (*partIt) << " ";
4751
4752 oss << endl;
4753 oss << "OIDS: ";
4754 set<OID_t>::const_iterator it;
4755
4756 for (it = oids.begin(); it != oids.end(); ++it)
4757 {
4758 oss << (*it) << ", ";
4759 }
4760
4761 TRACER_WRITEDIRECT(oss.str());
4762 }
4763
4764 #endif
4765
4766 if (oids.size() == 0)
4767 return;
4768
4769 int rc = 0;
4770
4771 grabEMEntryTable(WRITE);
4772 grabFreeList(WRITE);
4773 set<LogicalPartition> foundPartitions;
4774 int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
4775 vector<uint32_t> extents;
4776
4777 // First: validate against referencing non-existent logical partitions
4778 std::set<OID_t>::const_iterator it;
4779
4780 for (int i = 0; i < emEntries; i++)
4781 {
4782 LogicalPartition lp(fExtentMap[i].dbRoot,
4783 fExtentMap[i].partitionNum, fExtentMap[i].segmentNum);
4784
4785 if ((fExtentMap[i].range.size != 0) &&
4786 (partitionNums.find(lp) != partitionNums.end()))
4787 {
4788 it = oids.find( fExtentMap[i].fileID );
4789
4790 if (it != oids.end())
4791 {
4792 foundPartitions.insert(lp);
4793 extents.push_back(i);
4794 }
4795 }
4796 }
4797
4798 if (foundPartitions.size() != partitionNums.size())
4799 {
4800 set<LogicalPartition>::const_iterator partIt;
4801 Message::Args args;
4802 ostringstream oss;
4803
4804 for (partIt = partitionNums.begin();
4805 partIt != partitionNums.end(); ++partIt)
4806 {
4807 if (foundPartitions.find((*partIt)) == foundPartitions.end())
4808 {
4809 if (!oss.str().empty())
4810 oss << ", ";
4811
4812 oss << (*partIt).toString();
4813 }
4814 }
4815
4816 args.add(oss.str());
4817 emsg = IDBErrorInfo::instance()->errorMsg(ERR_PARTITION_NOT_EXIST, args);
4818 rc = ERR_PARTITION_NOT_EXIST;
4819 }
4820
4821 // this has to be the last error code to set and can not be over-written
4822 if (foundPartitions.empty())
4823 rc = WARN_NO_PARTITION_PERFORMED;
4824
4825 // really delete extents
4826 for (uint32_t i = 0; i < extents.size(); i++)
4827 {
4828 deleteExtent(extents[i]);
4829 }
4830
4831 // @bug 4772 throw exception on any error because they are all warnings.
4832 if (rc)
4833 throw IDBExcept(emsg, rc);
4834 }
4835
4836 //------------------------------------------------------------------------------
4837 // Mark all extents as out of service, for the specified OID(s) and partition
4838 // number.
4839 // @bug 5237 - Removed restriction that prevented deletion of segment files in
4840 // the last partition (for a DBRoot).
4841 //------------------------------------------------------------------------------
markPartitionForDeletion(const set<OID_t> & oids,const set<LogicalPartition> & partitionNums,string & emsg)4842 void ExtentMap::markPartitionForDeletion(const set<OID_t>& oids,
4843 const set<LogicalPartition>& partitionNums, string& emsg)
4844 {
4845 #ifdef BRM_INFO
4846
4847 if (fDebug)
4848 {
4849 TRACER_WRITENOW("markPartitionForDeletion");
4850 ostringstream oss;
4851 set<LogicalPartition>::const_iterator partIt;
4852 oss << "partitionNums: ";
4853 for (partIt=partitionNums.begin(); partIt!=partitionNums.end(); ++partIt)
4854 oss << (*partIt) << " ";
4855
4856 oss << endl;
4857 oss << "OIDS: ";
4858 set<OID_t>::const_iterator it;
4859
4860 for (it = oids.begin(); it != oids.end(); ++it)
4861 {
4862 oss << (*it) << ", ";
4863 }
4864
4865 TRACER_WRITEDIRECT(oss.str());
4866 }
4867
4868 #endif
4869
4870 if (oids.size() == 0)
4871 return;
4872
4873 int rc = 0;
4874
4875 grabEMEntryTable(WRITE);
4876 int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
4877 set<LogicalPartition> foundPartitions;
4878 vector<uint32_t> extents;
4879 bool partitionAlreadyDisabled = false;
4880
4881 // Identify not exists partition first. Then mark disable.
4882 std::set<OID_t>::const_iterator it;
4883
4884 for (int i = 0; i < emEntries; i++)
4885 {
4886 LogicalPartition lp(fExtentMap[i].dbRoot,
4887 fExtentMap[i].partitionNum, fExtentMap[i].segmentNum);
4888
4889 if ((fExtentMap[i].range.size != 0) &&
4890 (partitionNums.find(lp) != partitionNums.end()))
4891 {
4892 it = oids.find( fExtentMap[i].fileID );
4893
4894 if (it != oids.end())
4895 {
4896 if (fExtentMap[i].status == EXTENTOUTOFSERVICE)
4897 {
4898 partitionAlreadyDisabled = true;
4899 }
4900
4901 foundPartitions.insert(lp);
4902 extents.push_back(i);
4903 }
4904 }
4905 }
4906
4907 // really disable partitions
4908 for (uint32_t i = 0; i < extents.size(); i++)
4909 {
4910 makeUndoRecord(&fExtentMap[extents[i]], sizeof(EMEntry));
4911 fExtentMap[extents[i]].status = EXTENTOUTOFSERVICE;
4912 }
4913
4914 // validate against referencing non-existent logical partitions
4915 if (foundPartitions.size() != partitionNums.size())
4916 {
4917 set<LogicalPartition>::const_iterator partIt;
4918 Message::Args args;
4919 ostringstream oss;
4920
4921 for (partIt = partitionNums.begin();
4922 partIt != partitionNums.end(); ++partIt)
4923 {
4924 if (foundPartitions.find((*partIt)) == foundPartitions.end())
4925 {
4926 if (!oss.str().empty())
4927 oss << ", ";
4928
4929 oss << (*partIt).toString();
4930 }
4931 }
4932
4933 args.add(oss.str());
4934 emsg = emsg + string("\n") + IDBErrorInfo::instance()->errorMsg(
4935 ERR_PARTITION_NOT_EXIST, args);
4936 rc = ERR_PARTITION_NOT_EXIST;
4937 }
4938
4939 // check already disabled error now, which could be a non-error
4940 if (partitionAlreadyDisabled)
4941 {
4942 emsg = emsg + string("\n") + IDBErrorInfo::instance()->errorMsg(
4943 ERR_PARTITION_ALREADY_DISABLED);
4944 rc = ERR_PARTITION_ALREADY_DISABLED;
4945 }
4946
4947 // this rc has to be the last one set and can not be over-written by others.
4948 if (foundPartitions.empty())
4949 {
4950 rc = WARN_NO_PARTITION_PERFORMED;
4951 }
4952
4953 // @bug 4772 throw exception on any error because they are all warnings.
4954 if (rc)
4955 throw IDBExcept(emsg, rc);
4956 }
4957
4958 //------------------------------------------------------------------------------
4959 // Mark all extents as out of service, for the specified OID(s)
4960 //------------------------------------------------------------------------------
markAllPartitionForDeletion(const set<OID_t> & oids)4961 void ExtentMap::markAllPartitionForDeletion(const set<OID_t>& oids)
4962 {
4963 #ifdef BRM_INFO
4964
4965 if (fDebug)
4966 {
4967 TRACER_WRITENOW("markPartitionForDeletion");
4968 ostringstream oss;
4969 oss << "OIDS: ";
4970 set<OID_t>::const_iterator it;
4971
4972 for (it = oids.begin(); it != oids.end(); ++it)
4973 {
4974 oss << (*it) << ", ";
4975 }
4976
4977 TRACER_WRITEDIRECT(oss.str());
4978 }
4979
4980 #endif
4981
4982 if (oids.size() == 0)
4983 return;
4984
4985 set<OID_t>::const_iterator it;
4986
4987 grabEMEntryTable(WRITE);
4988 int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
4989
4990 for (int i = 0; i < emEntries; i++)
4991 {
4992 if (fExtentMap[i].range.size != 0 )
4993 {
4994 it = oids.find( fExtentMap[i].fileID );
4995
4996 if (it != oids.end())
4997 {
4998 makeUndoRecord(&fExtentMap[i], sizeof(EMEntry));
4999 fExtentMap[i].status = EXTENTOUTOFSERVICE;
5000 }
5001 }
5002 }
5003 }
5004
5005 //------------------------------------------------------------------------------
5006 // Restore all extents for the specified OID(s) and partition number.
5007 //------------------------------------------------------------------------------
restorePartition(const set<OID_t> & oids,const set<LogicalPartition> & partitionNums,string & emsg)5008 void ExtentMap::restorePartition(const set<OID_t>& oids,
5009 const set<LogicalPartition>& partitionNums, string& emsg)
5010 {
5011 #ifdef BRM_INFO
5012
5013 if (fDebug)
5014 {
5015 TRACER_WRITENOW("restorePartition");
5016 ostringstream oss;
5017 set<LogicalPartition>::const_iterator partIt;
5018 oss << "partitionNums: ";
5019 for (partIt=partitionNums.begin(); partIt!=partitionNums.end(); ++partIt)
5020 oss << (*partIt) << " ";
5021
5022 oss << endl;
5023 oss << "OIDS: ";
5024 set<OID_t>::const_iterator it;
5025
5026 for (it = oids.begin(); it != oids.end(); ++it)
5027 {
5028 oss << (*it) << ", ";
5029 }
5030
5031 TRACER_WRITEDIRECT(oss.str());
5032 }
5033
5034 #endif
5035
5036 if (oids.size() == 0)
5037 return;
5038
5039 set<OID_t>::const_iterator it;
5040 grabEMEntryTable(WRITE);
5041
5042 int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
5043 vector<uint32_t> extents;
5044 set<LogicalPartition> foundPartitions;
5045 bool partitionAlreadyEnabled = false;
5046
5047 for (int i = 0; i < emEntries; i++)
5048 {
5049 LogicalPartition lp(fExtentMap[i].dbRoot, fExtentMap[i].partitionNum, fExtentMap[i].segmentNum);
5050
5051 if ((fExtentMap[i].range.size != 0 ) && partitionNums.find(lp) != partitionNums.end())
5052 {
5053 it = oids.find( fExtentMap[i].fileID );
5054
5055 if (it != oids.end())
5056 {
5057 if (fExtentMap[i].status == EXTENTAVAILABLE)
5058 {
5059 partitionAlreadyEnabled = true;
5060 }
5061
5062 extents.push_back(i);
5063 foundPartitions.insert(lp);
5064 }
5065 }
5066 }
5067
5068 if (foundPartitions.size() != partitionNums.size())
5069 {
5070 set<LogicalPartition>::const_iterator partIt;
5071 Message::Args args;
5072 ostringstream oss;
5073
5074 for (partIt = partitionNums.begin(); partIt != partitionNums.end(); ++partIt)
5075 {
5076 if (foundPartitions.empty() || foundPartitions.find((*partIt)) == foundPartitions.end())
5077 {
5078 if (!oss.str().empty())
5079 oss << ", ";
5080
5081 oss << (*partIt).toString();
5082 }
5083 }
5084
5085 args.add(oss.str());
5086 emsg = IDBErrorInfo::instance()->errorMsg(ERR_PARTITION_NOT_EXIST, args);
5087 throw IDBExcept(emsg, ERR_PARTITION_NOT_EXIST);
5088 }
5089
5090 // really enable partitions
5091 for (uint32_t i = 0; i < extents.size(); i++)
5092 {
5093 makeUndoRecord(&fExtentMap[extents[i]], sizeof(EMEntry));
5094 fExtentMap[extents[i]].status = EXTENTAVAILABLE;
5095 }
5096
5097 if (partitionAlreadyEnabled)
5098 {
5099 emsg = IDBErrorInfo::instance()->errorMsg(ERR_PARTITION_ALREADY_ENABLED);
5100 throw IDBExcept(emsg, ERR_PARTITION_ALREADY_ENABLED);
5101 }
5102 }
5103
5104 //------------------------------------------------------------------------------
5105 // Return all the out-of-service partitions for the specified OID.
5106 //------------------------------------------------------------------------------
getOutOfServicePartitions(OID_t oid,set<LogicalPartition> & partitionNums)5107 void ExtentMap::getOutOfServicePartitions(OID_t oid,
5108 set<LogicalPartition>& partitionNums)
5109 {
5110 #ifdef BRM_INFO
5111
5112 if (fDebug)
5113 {
5114 TRACER_WRITELATER("getExtents");
5115 TRACER_ADDINPUT(oid);
5116 TRACER_WRITE;
5117 }
5118
5119 #endif
5120
5121 partitionNums.clear();
5122
5123 if (oid < 0)
5124 {
5125 ostringstream oss;
5126 oss << "ExtentMap::getOutOfServicePartitions(): "
5127 "invalid OID requested: " << oid;
5128 log(oss.str(), logging::LOG_TYPE_CRITICAL);
5129 throw invalid_argument(oss.str());
5130 }
5131
5132 grabEMEntryTable(READ);
5133 int emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
5134
5135 for (int i = 0; i < emEntries; i++)
5136 {
5137 if ((fExtentMap[i].range.size != 0 ) &&
5138 (fExtentMap[i].fileID == oid) &&
5139 (fExtentMap[i].status == EXTENTOUTOFSERVICE))
5140 {
5141
5142 // need to be logical partition number
5143 LogicalPartition lp(fExtentMap[i].dbRoot,
5144 fExtentMap[i].partitionNum,
5145 fExtentMap[i].segmentNum);
5146 partitionNums.insert(lp);
5147 }
5148 }
5149
5150 releaseEMEntryTable(READ);
5151 }
5152
5153 //------------------------------------------------------------------------------
5154 // Delete all extents for the specified dbroot
5155 //------------------------------------------------------------------------------
deleteDBRoot(uint16_t dbroot)5156 void ExtentMap::deleteDBRoot(uint16_t dbroot)
5157 {
5158 #ifdef BRM_INFO
5159
5160 if (fDebug)
5161 {
5162 TRACER_WRITENOW("deleteDBRoot");
5163 ostringstream oss;
5164 oss << "dbroot: " << dbroot;
5165 TRACER_WRITEDIRECT(oss.str());
5166 }
5167
5168 #endif
5169
5170 grabEMEntryTable(WRITE);
5171 grabFreeList(WRITE);
5172
5173 for (unsigned i = 0; i < fEMShminfo->allocdSize / sizeof(struct EMEntry); i++)
5174 if (fExtentMap[i].range.size != 0 && fExtentMap[i].dbRoot == dbroot)
5175 deleteExtent(i);
5176 }
5177
5178 //------------------------------------------------------------------------------
5179 // Does the specified DBRoot have any extents.
5180 // Throws exception if extentmap shared memory is not loaded.
5181 //------------------------------------------------------------------------------
isDBRootEmpty(uint16_t dbroot)5182 bool ExtentMap::isDBRootEmpty(uint16_t dbroot)
5183 {
5184 #ifdef BRM_INFO
5185
5186 if (fDebug)
5187 {
5188 TRACER_WRITELATER("isDBRootEmpty");
5189 TRACER_ADDINPUT(dbroot);
5190 TRACER_WRITE;
5191 }
5192
5193 #endif
5194
5195 bool bEmpty = true;
5196 int i, emEntries;
5197 grabEMEntryTable(READ);
5198 emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
5199
5200 if (fEMShminfo->currentSize == 0)
5201 {
5202 throw runtime_error(
5203 "ExtentMap::isDBRootEmpty() shared memory not loaded");
5204 }
5205
5206 for (i = 0; i < emEntries; i++)
5207 {
5208 if ((fExtentMap[i].range.size != 0) &&
5209 (fExtentMap[i].dbRoot == dbroot))
5210 {
5211 bEmpty = false;
5212 break;
5213 }
5214 }
5215
5216 releaseEMEntryTable(READ);
5217
5218 return bEmpty;
5219 }
5220
lookup(OID_t OID,LBIDRange_v & ranges)5221 void ExtentMap::lookup(OID_t OID, LBIDRange_v& ranges)
5222 {
5223 #ifdef BRM_INFO
5224
5225 if (fDebug)
5226 {
5227 TRACER_WRITELATER("lookup");
5228 TRACER_ADDINPUT(OID);
5229 TRACER_WRITE;
5230 }
5231
5232 #endif
5233
5234 #ifdef EM_AS_A_TABLE_POC__
5235
5236 if (OID == 1084)
5237 {
5238 EMEntry fakeEntry;
5239 fakeEntry.range.start = (1LL << 54);
5240 fakeEntry.range.size = 4;
5241 #if 0
5242 fakeEntry.fileID = 1084;
5243 fakeEntry.blockOffset = 0;
5244 fakeEntry.HWM = 1;
5245 fakeEntry.partitionNum = 0;
5246 fakeEntry.segmentNum = 0;
5247 fakeEntry.dbRoot = 1;
5248 fakeEntry.colWid = 4;
5249 fakeEntry.status = EXTENTAVAILABLE;
5250 fakeEntry.partition.cprange.hi_val = numeric_limits<int64_t>::min() + 2;
5251 fakeEntry.partition.cprange.lo_val = numeric_limits<int64_t>::max();
5252 fakeEntry.partition.cprange.sequenceNum = 0;
5253 fakeEntry.partition.cprange.isValid = CP_INVALID;
5254 #endif
5255 ranges.push_back(fakeEntry.range);
5256 return;
5257 }
5258
5259 #endif
5260
5261 int i, emEntries;
5262 LBIDRange tmp;
5263
5264 ranges.clear();
5265
5266 if (OID < 0)
5267 {
5268 ostringstream oss;
5269 oss << "ExtentMap::lookup(): invalid OID requested: " << OID;
5270 log(oss.str(), logging::LOG_TYPE_CRITICAL);
5271 throw invalid_argument(oss.str());
5272 }
5273
5274 grabEMEntryTable(READ);
5275 emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
5276
5277 for (i = 0 ; i < emEntries; i++)
5278 if ((fExtentMap[i].fileID == OID) &&
5279 (fExtentMap[i].range.size != 0) &&
5280 (fExtentMap[i].status != EXTENTOUTOFSERVICE))
5281 {
5282 tmp.start = fExtentMap[i].range.start;
5283 tmp.size = fExtentMap[i].range.size * 1024;
5284 ranges.push_back(tmp);
5285 }
5286
5287 releaseEMEntryTable(READ);
5288 }
5289
5290
checkConsistency()5291 int ExtentMap::checkConsistency()
5292 {
5293 #ifdef BRM_INFO
5294
5295 if (fDebug) TRACER_WRITENOW("checkConsistency");
5296
5297 #endif
5298
5299 /*
5300 LBID space consistency checks
5301 1. verify that every LBID is either in the EM xor the freelist
5302 a. for every segment in the EM, make sure there is no overlapping entry in the FL
5303 b. scan both lists to verify that the entire space is represented
5304 2. verify that there are no adjacent entries in the freelist
5305 OID consistency
5306 3. make sure there are no gaps in the file offsets
5307 4. make sure that only the last extent has a non-zero HWM
5308 Struct integrity
5309 5. verify that the number of entries in each table is consistent with
5310 the recorded current size
5311 */
5312
5313 LBID_t emBegin, emEnd, flBegin, flEnd;
5314 int i, j, flEntries, emEntries;
5315 uint32_t usedEntries;
5316
5317 grabEMEntryTable(READ);
5318
5319 try
5320 {
5321 grabFreeList(READ);
5322 }
5323 catch (...)
5324 {
5325 releaseEMEntryTable(READ);
5326 throw;
5327 }
5328
5329 flEntries = fFLShminfo->allocdSize / sizeof(InlineLBIDRange);
5330 emEntries = fEMShminfo->allocdSize / sizeof(EMEntry);
5331
5332 // test 1a - make sure every entry in the EM is not overlapped by an entry in the FL
5333 for (i = 0; i < emEntries; i++)
5334 {
5335 if (fExtentMap[i].range.size != 0)
5336 {
5337 emBegin = fExtentMap[i].range.start;
5338 emEnd = emBegin + (fExtentMap[i].range.size * 1024) - 1;
5339
5340 for (j = 0; j < flEntries; j++)
5341 {
5342 if (fFreeList[j].size != 0)
5343 {
5344 flBegin = fFreeList[j].start;
5345 flEnd = flBegin + (fFreeList[j].size * 1024) - 1;
5346
5347 //em entry overlaps the beginning
5348 //em entry is contained within
5349 //em entry overlaps the end
5350 if ((emBegin <= flBegin && emEnd >= flBegin) ||
5351 (emBegin >= flBegin && emEnd <= flEnd) ||
5352 (emBegin <= flEnd && emEnd >= flEnd))
5353 {
5354 cerr << "EM::checkConsistency(): Improper LBID allocation detected" << endl;
5355 throw logic_error("EM checkConsistency test 1a (data structures are read-locked)");
5356 }
5357 }
5358 }
5359 }
5360 }
5361
5362 cout << "test 1a passed\n";
5363
5364 //test 1b - verify that the entire LBID space is accounted for
5365
5366 int lbid, oldlbid;
5367
5368 lbid = 0;
5369
5370 while (lbid < 67108864) // 2^26 (2^36/1024)
5371 {
5372 oldlbid = lbid;
5373
5374 for (i = 0; i < flEntries; i++)
5375 {
5376 if (fFreeList[i].start % 1024 != 0)
5377 {
5378 cerr << "EM::checkConsistency(): A freelist entry is not 1024-block aligned" << endl;
5379 throw logic_error("EM checkConsistency test 1b (data structures are read-locked)");
5380 }
5381
5382 if (fFreeList[i].start / 1024 == lbid)
5383 lbid += fFreeList[i].size;
5384 }
5385
5386 for (i = 0; i < emEntries; i++)
5387 {
5388 if (fExtentMap[i].range.start % 1024 != 0)
5389 {
5390 cerr << "EM::checkConsistency(): An extent map entry is not 1024-block aligned " << i << " " << fExtentMap[i].range.start << endl;
5391 throw logic_error("EM checkConsistency test 1b (data structures are read-locked)");
5392 }
5393
5394 if (fExtentMap[i].range.start / 1024 == lbid)
5395 lbid += fExtentMap[i].range.size;
5396 }
5397
5398 if (oldlbid == lbid)
5399 {
5400 cerr << "EM::checkConsistency(): There is a gap in the LBID space at block #" <<
5401 static_cast<uint64_t>(lbid * 1024) << endl;
5402 throw logic_error("EM checkConsistency test 1b (data structures are read-locked)");
5403 }
5404 }
5405
5406 cout << "test 1b passed\n";
5407
5408 // test 1c - verify that no dbroot is < 1
5409 bool errorOut = false;
5410
5411 for (i = 0; i < emEntries; i++)
5412 {
5413 if (fExtentMap[i].range.size != 0)
5414 {
5415 //cout << "EM[" << i << "]: dbRoot=" << fExtentMap[i].dbRoot(listMan) << endl;
5416 if (fExtentMap[i].dbRoot == 0)
5417 {
5418 errorOut = true;
5419 cerr << "EM::checkConsistency(): index " << i << " has a 0 dbroot\n";
5420 }
5421 }
5422 }
5423
5424 if (errorOut)
5425 throw logic_error("EM checkConsistency test 1c (data structures are read-locked)");
5426
5427 cout << "test 1c passed\n";
5428
5429 #if 0 // a test ported from the tek2 branch, which requires a RID field to be stored; not relevant here
5430 // test 1d - verify that each <OID, RID> pair is unique
5431 cout << "Running test 1d\n";
5432
5433 set<OIDRID> uniquer;
5434
5435 for (i = 0; i < emEntries; i++)
5436 {
5437 if (fExtentMap[i].size != 0 && !fExtentMap[i].isDict())
5438 {
5439 OIDRID element(fExtentMap[i].fileID, fExtentMap[i].rid);
5440
5441 if (uniquer.insert(element).second == false)
5442 throw logic_error("EM consistency test 1d failed (data structures are read-locked)");
5443 }
5444 }
5445
5446 uniquer.clear();
5447 cout << "Test 1d passed\n";
5448 #endif
5449
5450 // test 2 - verify that the freelist is consolidated
5451 for (i = 0; i < flEntries; i++)
5452 {
5453 if (fFreeList[i].size != 0)
5454 {
5455 flEnd = fFreeList[i].start + (fFreeList[i].size * 1024);
5456
5457 for (j = i + 1; j < flEntries; j++)
5458 if (fFreeList[j].size != 0 && fFreeList[j].start == flEnd)
5459 throw logic_error("EM checkConsistency test 2 (data structures are read-locked)");
5460 }
5461 }
5462
5463 cout << "test 2 passed\n";
5464
5465 // needs to be updated
5466 #if 0
5467 // test 3 - scan the extent map to make sure files have no LBID gaps
5468 vector<OID_t> oids;
5469 vector< vector<uint32_t> > fbos;
5470
5471 for (i = 0; i < emEntries; i++)
5472 {
5473 if (fExtentMap[i].size != 0)
5474 {
5475 for (j = 0; j < (int)oids.size(); j++)
5476 if (oids[j] == fExtentMap[i].fileID)
5477 break;
5478
5479 if (j == (int)oids.size())
5480 {
5481 oids.push_back(fExtentMap[i].fileID);
5482 fbos.push_back(vector<uint32_t>());
5483 }
5484
5485 fbos[j].push_back(fExtentMap[i].blockOffset);
5486 }
5487 }
5488
5489 for (i = 0; i < (int)fbos.size(); i++)
5490 sort<vector<uint32_t>::iterator>(fbos[i].begin(), fbos[i].end());
5491
5492 const unsigned EXTENT_SIZE = getExtentSize();
5493
5494 for (i = 0; i < (int)fbos.size(); i++)
5495 {
5496 for (j = 0; j < (int)fbos[i].size(); j++)
5497 {
5498 if (fbos[i][j] != static_cast<uint32_t>(j * EXTENT_SIZE))
5499 {
5500 cerr << "EM: OID " << oids[i] << " has no extent at FBO " <<
5501 j* EXTENT_SIZE << endl;
5502 throw logic_error("EM checkConsistency test 3 (data structures are read-locked)");
5503 }
5504 }
5505 }
5506
5507 fbos.clear();
5508 oids.clear();
5509 #endif
5510
5511
5512 // test 5a - scan freelist to make sure the current size is accurate
5513
5514 for (i = 0, usedEntries = 0; i < emEntries; i++)
5515 if (fExtentMap[i].range.size != 0)
5516 usedEntries++;
5517
5518 if (usedEntries != fEMShminfo->currentSize / sizeof(EMEntry))
5519 {
5520 cerr << "checkConsistency: used extent map entries = " << usedEntries
5521 << " metadata says " << fEMShminfo->currentSize / sizeof(EMEntry)
5522 << endl;
5523 throw logic_error("EM checkConsistency test 5a (data structures are read-locked)");
5524 }
5525
5526 for (i = 0, usedEntries = 0; i < flEntries; i++)
5527 if (fFreeList[i].size != 0)
5528 usedEntries++;
5529
5530 if (usedEntries != fFLShminfo->currentSize / sizeof(InlineLBIDRange))
5531 {
5532 cerr << "checkConsistency: used freelist entries = " << usedEntries
5533 << " metadata says " << fFLShminfo->currentSize / sizeof(InlineLBIDRange)
5534 << endl;
5535 throw logic_error("EM checkConsistency test 5a (data structures are read-locked)");
5536 }
5537
5538 cout << "test 5a passed\n";
5539
5540 releaseFreeList(READ);
5541 releaseEMEntryTable(READ);
5542 return 0;
5543 }
5544
5545
setReadOnly()5546 void ExtentMap::setReadOnly()
5547 {
5548 r_only = true;
5549 }
5550
undoChanges()5551 void ExtentMap::undoChanges()
5552 {
5553 #ifdef BRM_INFO
5554
5555 if (fDebug) TRACER_WRITENOW("undoChanges");
5556
5557 #endif
5558 Undoable::undoChanges();
5559 finishChanges();
5560 }
5561
confirmChanges()5562 void ExtentMap::confirmChanges()
5563 {
5564 #ifdef BRM_INFO
5565
5566 if (fDebug) TRACER_WRITENOW("confirmChanges");
5567
5568 #endif
5569 Undoable::confirmChanges();
5570 finishChanges();
5571 }
5572
finishChanges()5573 void ExtentMap::finishChanges()
5574 {
5575 if (flLocked)
5576 releaseFreeList(WRITE);
5577
5578 if (emLocked)
5579 releaseEMEntryTable(WRITE);
5580 }
5581
getEMFLLockStatus()5582 const bool* ExtentMap::getEMFLLockStatus()
5583 {
5584 return &flLocked;
5585 }
5586
getEMLockStatus()5587 const bool* ExtentMap::getEMLockStatus()
5588 {
5589 return &emLocked;
5590 }
5591
5592 //------------------------------------------------------------------------------
5593 // Reload Config cache if config file time stamp has changed
5594 //------------------------------------------------------------------------------
checkReloadConfig()5595 void ExtentMap::checkReloadConfig()
5596 {
5597 config::Config* cf = config::Config::makeConfig();
5598
5599 // Immediately return if Columnstore.xml timestamp has not changed
5600 if (cf->getCurrentMTime() == fCacheTime)
5601 return;
5602
5603 //--------------------------------------------------------------------------
5604 // Initialize outdated attribute still used by primitiveserver.
5605 // Hardcode to 8K for now, since that's all we support.
5606 //--------------------------------------------------------------------------
5607 ExtentSize = 0x2000;
5608
5609 // string es = cf->getConfig("ExtentMap", "ExtentSize");
5610 // if (es.length() == 0) es = "8K";
5611 // if (es == "8K" || es == "8k")
5612 // {
5613 // ExtentSize = 0x2000;
5614 // }
5615 // else if (es == "1K" || es == "1k")
5616 // {
5617 // ExtentSize = 0x400;
5618 // }
5619 // else if (es == "64K" || es == "64k")
5620 // {
5621 // ExtentSize = 0x10000;
5622 // }
5623 // else
5624 // {
5625 // throw logic_error("Invalid ExtentSize found in config file!");
5626 // }
5627
5628 //--------------------------------------------------------------------------
5629 // Initialize number of rows per extent
5630 // Hardcode to 8M for now, since that's all we support.
5631 //--------------------------------------------------------------------------
5632 ExtentRows = 0x800000;
5633
5634 // string er = cf->getConfig("ExtentMap", "ExtentRows");
5635 // if (er.length() == 0) er = "8M";
5636 // if (er == "8M" || er == "8m")
5637 // {
5638 // ExtentRows = 0x800000;
5639 // }
5640 // else if (er == "1M" || er == "1m")
5641 // {
5642 // ExtentRows = 0x100000;
5643 // }
5644 // else if (er == "64M" || er == "64m")
5645 // {
5646 // ExtentRows = 0x4000000;
5647 // }
5648 // else
5649 // {
5650 // throw logic_error("Invalid ExtentRows found in config file!");
5651 // }
5652
5653 //--------------------------------------------------------------------------
5654 // Initialize segment files per physical partition
5655 //--------------------------------------------------------------------------
5656 string fpc = cf->getConfig("ExtentMap", "FilesPerColumnPartition");
5657 filesPerColumnPartition = cf->uFromText(fpc);
5658
5659 if (filesPerColumnPartition == 0)
5660 filesPerColumnPartition = 4;
5661
5662 // Get latest Columnstore.xml timestamp after first access forced a reload
5663 fCacheTime = cf ->getLastMTime();
5664
5665 //--------------------------------------------------------------------------
5666 // Initialize extents per segment file
5667 //--------------------------------------------------------------------------
5668 string epsf = cf->getConfig("ExtentMap", "ExtentsPerSegmentFile");
5669 extentsPerSegmentFile = cf->uFromText(epsf);
5670
5671 if (extentsPerSegmentFile == 0)
5672 extentsPerSegmentFile = 2;
5673 }
5674
5675 //------------------------------------------------------------------------------
5676 // Returns the number of extents in a segment file.
5677 // Mutex lock and call to checkReloadConfig() not currently necessary since,
5678 // going with hardcoded value. See checkReloadConfig().
5679 //------------------------------------------------------------------------------
getExtentSize()5680 unsigned ExtentMap::getExtentSize() // dmc-should deprecate
5681 {
5682 // boost::mutex::scoped_lock lk(fConfigCacheMutex);
5683 // checkReloadConfig( );
5684
5685 ExtentSize = 0x2000;
5686 return ExtentSize;
5687 }
5688
5689 //------------------------------------------------------------------------------
5690 // Returns the number or rows per extent. Only supported values are 1m, 8m,
5691 // and 64m.
5692 // Mutex lock and call to checkReloadConfig() not currently necessary since,
5693 // going with hardcoded value. See checkReloadConfig().
5694 //------------------------------------------------------------------------------
getExtentRows()5695 unsigned ExtentMap::getExtentRows()
5696 {
5697 // boost::mutex::scoped_lock lk(fConfigCacheMutex);
5698 // checkReloadConfig( );
5699
5700 ExtentRows = 0x800000;
5701 return ExtentRows;
5702 }
5703
5704 //------------------------------------------------------------------------------
5705 // Returns the number of column segment files for an OID, that make up a
5706 // partition.
5707 //------------------------------------------------------------------------------
getFilesPerColumnPartition()5708 unsigned ExtentMap::getFilesPerColumnPartition()
5709 {
5710 boost::mutex::scoped_lock lk(fConfigCacheMutex);
5711 checkReloadConfig( );
5712
5713 return filesPerColumnPartition;
5714 }
5715
5716 //------------------------------------------------------------------------------
5717 // Returns the number of extents in a segment file.
5718 //------------------------------------------------------------------------------
getExtentsPerSegmentFile()5719 unsigned ExtentMap::getExtentsPerSegmentFile()
5720 {
5721 boost::mutex::scoped_lock lk(fConfigCacheMutex);
5722 checkReloadConfig( );
5723
5724 return extentsPerSegmentFile;
5725 }
5726
5727 //------------------------------------------------------------------------------
5728 // Returns the number of DBRoots to be used in storing db column files.
5729 //------------------------------------------------------------------------------
getDbRootCount()5730 unsigned ExtentMap::getDbRootCount()
5731 {
5732 oam::OamCache* oamcache = oam::OamCache::makeOamCache();
5733 unsigned int rootCnt = oamcache->getDBRootCount();
5734
5735 return rootCnt;
5736 }
5737
5738 //------------------------------------------------------------------------------
5739 // Get list of DBRoots that map to the specified PM. DBRoot list is cached
5740 // internally in fPmDbRootMap after getting from Columnstore.xml via OAM.
5741 //------------------------------------------------------------------------------
getPmDbRoots(int pm,vector<int> & dbRootList)5742 void ExtentMap::getPmDbRoots( int pm, vector<int>& dbRootList )
5743 {
5744 oam::OamCache* oamcache = oam::OamCache::makeOamCache();
5745 oam::OamCache::PMDbrootsMap_t pmDbroots = oamcache->getPMToDbrootsMap();
5746
5747 dbRootList.clear();
5748 dbRootList = (*pmDbroots)[pm];
5749 }
5750
getFreeListEntries()5751 vector<InlineLBIDRange> ExtentMap::getFreeListEntries()
5752 {
5753 vector<InlineLBIDRange> v;
5754 grabEMEntryTable(READ);
5755 grabFreeList(READ);
5756
5757 int allocdSize = fFLShminfo->allocdSize / sizeof(InlineLBIDRange);
5758
5759 for (int i = 0; i < allocdSize; i++)
5760 v.push_back(fFreeList[i]);
5761
5762 releaseFreeList(READ);
5763 releaseEMEntryTable(READ);
5764 return v;
5765 }
5766
dumpTo(ostream & os)5767 void ExtentMap::dumpTo(ostream& os)
5768 {
5769 grabEMEntryTable(READ);
5770 unsigned emEntries = fEMShminfo->allocdSize / sizeof(struct EMEntry);
5771
5772 for (unsigned i = 0; i < emEntries; i++)
5773 {
5774 if (fExtentMap[i].range.size != 0)
5775 {
5776 os << fExtentMap[i].range.start << '|'
5777 << fExtentMap[i].range.size << '|'
5778 << fExtentMap[i].fileID << '|'
5779 << fExtentMap[i].blockOffset << '|'
5780 << fExtentMap[i].HWM << '|'
5781 << fExtentMap[i].partitionNum << '|'
5782 << fExtentMap[i].segmentNum << '|'
5783 << fExtentMap[i].dbRoot << '|'
5784 << fExtentMap[i].colWid << '|'
5785 << fExtentMap[i].status << '|'
5786 << fExtentMap[i].partition.cprange.hi_val << '|'
5787 << fExtentMap[i].partition.cprange.lo_val << '|'
5788 << fExtentMap[i].partition.cprange.sequenceNum << '|'
5789 << (int)fExtentMap[i].partition.cprange.isValid << '|'
5790 << endl;
5791 }
5792 }
5793
5794 releaseEMEntryTable(READ);
5795 }
5796
5797 /*int ExtentMap::physicalPartitionNum(const set<OID_t>& oids,
5798 const set<uint32_t>& partitionNums,
5799 vector<PartitionInfo>& partitionInfos)
5800 {
5801 #ifdef BRM_INFO
5802 if (fDebug)
5803 {
5804 TRACER_WRITENOW("physicalPartitionNum");
5805 ostringstream oss;
5806 set<uint32_t>::const_iterator partIt;
5807 oss << "partitionNums: "
5808 for (partIt=partitionNums.begin(); it!=partitionNums.end(); ++it)
5809 oss << (*it) << " ";
5810 oss << endl;
5811 TRACER_WRITEDIRECT(oss.str());
5812 }
5813 #endif
5814
5815 set<OID_t>::const_iterator it;
5816 grabEMEntryTable(READ);
5817
5818 int emEntries = fEMShminfo->allocdSize/sizeof(struct EMEntry);
5819 PartitionInfo partInfo;
5820 vector<uint32_t> extents;
5821 set<uint32_t> foundPartitions;
5822 for (int i = 0; i < emEntries; i++)
5823 {
5824 if ((fExtentMap[i].range.size != 0 ) &&
5825 partitionNums.find(logicalPartitionNum(fExtentMap[i])) != partitionNums.end())
5826 {
5827 it = oids.find( fExtentMap[i].fileID );
5828 if (it != oids.end())
5829 {
5830 partInfo.oid = fExtentMap[i].fileID;
5831 partInfo.lp.dbroot = fExtentMap[i].dbRoot;
5832 partInfo.lp.pp = fExtentMap[i].partitionNum;
5833 partInfo.lp.seg = fExtentMap[i].segmentNum;
5834 partitionInfos.push_back(partInfo);
5835 }
5836 }
5837 }
5838 releaseEMEntryTable(READ);
5839 return 0;
5840 }
5841 */
5842
5843 } //namespace
5844 // vim:ts=4 sw=4:
5845
5846