1 /*
2 Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 #define DBTUP_C
26 #define DBTUP_INDEX_CPP
27 #include <dblqh/Dblqh.hpp>
28 #include "Dbtup.hpp"
29 #include <RefConvert.hpp>
30 #include <ndb_limits.h>
31 #include <pc.hpp>
32 #include <AttributeDescriptor.hpp>
33 #include "AttributeOffset.hpp"
34 #include <AttributeHeader.hpp>
35 #include <signaldata/TuxMaint.hpp>
36 #include <signaldata/AlterIndxImpl.hpp>
37
38 // methods used by ordered index
39
40 void
tuxGetTupAddr(Uint32 fragPtrI,Uint32 pageId,Uint32 pageIndex,Uint32 & lkey1,Uint32 & lkey2)41 Dbtup::tuxGetTupAddr(Uint32 fragPtrI,
42 Uint32 pageId,
43 Uint32 pageIndex,
44 Uint32& lkey1,
45 Uint32& lkey2)
46 {
47 jamEntry();
48 PagePtr pagePtr;
49 c_page_pool.getPtr(pagePtr, pageId);
50 lkey1 = pagePtr.p->frag_page_id;
51 lkey2 = pageIndex;
52 }
53
54 int
tuxAllocNode(EmulatedJamBuffer * jamBuf,Uint32 fragPtrI,Uint32 & pageId,Uint32 & pageOffset,Uint32 * & node)55 Dbtup::tuxAllocNode(EmulatedJamBuffer * jamBuf,
56 Uint32 fragPtrI,
57 Uint32& pageId,
58 Uint32& pageOffset,
59 Uint32*& node)
60 {
61 thrjamEntry(jamBuf);
62 FragrecordPtr fragPtr;
63 fragPtr.i= fragPtrI;
64 ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
65 TablerecPtr tablePtr;
66 tablePtr.i= fragPtr.p->fragTableId;
67 ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
68
69 Local_key key;
70 Uint32* ptr, frag_page_id, err;
71 if ((ptr= alloc_fix_rec(&err,fragPtr.p,tablePtr.p, &key, &frag_page_id)) == 0)
72 {
73 thrjam(jamBuf);
74 return err;
75 }
76 pageId= key.m_page_no;
77 pageOffset= key.m_page_idx;
78 Uint32 attrDescIndex= tablePtr.p->tabDescriptor + (0 << ZAD_LOG_SIZE);
79 Uint32 attrDataOffset= AttributeOffset::getOffset(
80 tableDescriptor[attrDescIndex + 1].tabDescr);
81 node= ptr + attrDataOffset;
82 return 0;
83 }
84
85 void
tuxFreeNode(Uint32 fragPtrI,Uint32 pageId,Uint32 pageOffset,Uint32 * node)86 Dbtup::tuxFreeNode(Uint32 fragPtrI,
87 Uint32 pageId,
88 Uint32 pageOffset,
89 Uint32* node)
90 {
91 jamEntry();
92 FragrecordPtr fragPtr;
93 fragPtr.i= fragPtrI;
94 ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
95 TablerecPtr tablePtr;
96 tablePtr.i= fragPtr.p->fragTableId;
97 ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
98
99 Local_key key;
100 key.m_page_no = pageId;
101 key.m_page_idx = pageOffset;
102 PagePtr pagePtr;
103 Tuple_header* ptr = (Tuple_header*)get_ptr(&pagePtr, &key, tablePtr.p);
104
105 Uint32 attrDescIndex= tablePtr.p->tabDescriptor + (0 << ZAD_LOG_SIZE);
106 Uint32 attrDataOffset= AttributeOffset::getOffset(tableDescriptor[attrDescIndex + 1].tabDescr);
107 ndbrequire(node == (Uint32*)ptr + attrDataOffset);
108
109 free_fix_rec(fragPtr.p, tablePtr.p, &key, (Fix_page*)pagePtr.p);
110 }
111
112 void
tuxGetNode(Uint32 fragPtrI,Uint32 pageId,Uint32 pageOffset,Uint32 * & node)113 Dbtup::tuxGetNode(Uint32 fragPtrI,
114 Uint32 pageId,
115 Uint32 pageOffset,
116 Uint32*& node)
117 {
118 FragrecordPtr fragPtr;
119 fragPtr.i= fragPtrI;
120 ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
121 TablerecPtr tablePtr;
122 tablePtr.i= fragPtr.p->fragTableId;
123 ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
124 PagePtr pagePtr;
125 c_page_pool.getPtr(pagePtr, pageId);
126 Uint32 attrDescIndex= tablePtr.p->tabDescriptor + (0 << ZAD_LOG_SIZE);
127 Uint32 attrDataOffset= AttributeOffset::getOffset(
128 tableDescriptor[attrDescIndex + 1].tabDescr);
129 node= ((Fix_page*)pagePtr.p)->
130 get_ptr(pageOffset, tablePtr.p->m_offsets[MM].m_fix_header_size) +
131 attrDataOffset;
132 }
133 int
tuxReadAttrs(EmulatedJamBuffer * jamBuf,Uint32 fragPtrI,Uint32 pageId,Uint32 pageIndex,Uint32 tupVersion,const Uint32 * attrIds,Uint32 numAttrs,Uint32 * dataOut,bool xfrmFlag)134 Dbtup::tuxReadAttrs(EmulatedJamBuffer * jamBuf,
135 Uint32 fragPtrI,
136 Uint32 pageId,
137 Uint32 pageIndex,
138 Uint32 tupVersion,
139 const Uint32* attrIds,
140 Uint32 numAttrs,
141 Uint32* dataOut,
142 bool xfrmFlag)
143 {
144 thrjamEntry(jamBuf);
145 // use own variables instead of globals
146 FragrecordPtr fragPtr;
147 fragPtr.i= fragPtrI;
148 ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
149 TablerecPtr tablePtr;
150 tablePtr.i= fragPtr.p->fragTableId;
151 ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
152
153 // search for tuple version if not original
154
155 Operationrec tmpOp;
156 KeyReqStruct req_struct(jamBuf);
157 req_struct.tablePtrP = tablePtr.p;
158 req_struct.fragPtrP = fragPtr.p;
159
160 tmpOp.m_tuple_location.m_page_no= pageId;
161 tmpOp.m_tuple_location.m_page_idx= pageIndex;
162 tmpOp.op_struct.op_type = ZREAD; // valgrind
163 setup_fixed_part(&req_struct, &tmpOp, tablePtr.p);
164 Tuple_header *tuple_ptr= req_struct.m_tuple_ptr;
165 if (tuple_ptr->get_tuple_version() != tupVersion)
166 {
167 jam();
168 OperationrecPtr opPtr;
169 opPtr.i= tuple_ptr->m_operation_ptr_i;
170 Uint32 loopGuard= 0;
171 while (opPtr.i != RNIL) {
172 c_operation_pool.getPtr(opPtr);
173 if (opPtr.p->tupVersion == tupVersion) {
174 jam();
175 if (!opPtr.p->m_copy_tuple_location.isNull()) {
176 req_struct.m_tuple_ptr=
177 get_copy_tuple(&opPtr.p->m_copy_tuple_location);
178 }
179 break;
180 }
181 jam();
182 opPtr.i= opPtr.p->prevActiveOp;
183 ndbrequire(++loopGuard < (1 << ZTUP_VERSION_BITS));
184 }
185 }
186 // read key attributes from found tuple version
187 // save globals
188 prepare_read(&req_struct, tablePtr.p, false);
189
190 // do it
191 int ret = readAttributes(&req_struct,
192 attrIds,
193 numAttrs,
194 dataOut,
195 ZNIL,
196 xfrmFlag);
197
198 // done
199 return ret;
200 }
201 int
tuxReadPk(Uint32 fragPtrI,Uint32 pageId,Uint32 pageIndex,Uint32 * dataOut,bool xfrmFlag)202 Dbtup::tuxReadPk(Uint32 fragPtrI, Uint32 pageId, Uint32 pageIndex, Uint32* dataOut, bool xfrmFlag)
203 {
204 jamEntry();
205 // use own variables instead of globals
206 FragrecordPtr fragPtr;
207 fragPtr.i= fragPtrI;
208 ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
209 TablerecPtr tablePtr;
210 tablePtr.i= fragPtr.p->fragTableId;
211 ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
212
213 Operationrec tmpOp;
214 tmpOp.m_tuple_location.m_page_no= pageId;
215 tmpOp.m_tuple_location.m_page_idx= pageIndex;
216
217 KeyReqStruct req_struct(this);
218 req_struct.tablePtrP = tablePtr.p;
219 req_struct.fragPtrP = fragPtr.p;
220
221 PagePtr page_ptr;
222 Uint32* ptr= get_ptr(&page_ptr, &tmpOp.m_tuple_location, tablePtr.p);
223 req_struct.m_page_ptr = page_ptr;
224 req_struct.m_tuple_ptr = (Tuple_header*)ptr;
225
226 int ret = 0;
227 if (! (req_struct.m_tuple_ptr->m_header_bits & Tuple_header::FREE))
228 {
229 req_struct.check_offset[MM]= tablePtr.p->get_check_offset(MM);
230 req_struct.check_offset[DD]= tablePtr.p->get_check_offset(DD);
231
232 Uint32 num_attr= tablePtr.p->m_no_of_attributes;
233 Uint32 descr_start= tablePtr.p->tabDescriptor;
234 TableDescriptor *tab_descr= &tableDescriptor[descr_start];
235 ndbrequire(descr_start + (num_attr << ZAD_LOG_SIZE) <= cnoOfTabDescrRec);
236 req_struct.attr_descr= tab_descr;
237
238 if(req_struct.m_tuple_ptr->m_header_bits & Tuple_header::ALLOC)
239 {
240 Uint32 opPtrI= req_struct.m_tuple_ptr->m_operation_ptr_i;
241 Operationrec* opPtrP= c_operation_pool.getPtr(opPtrI);
242 ndbassert(!opPtrP->m_copy_tuple_location.isNull());
243 req_struct.m_tuple_ptr=
244 get_copy_tuple(&opPtrP->m_copy_tuple_location);
245 }
246 prepare_read(&req_struct, tablePtr.p, false);
247
248 const Uint32* attrIds= &tableDescriptor[tablePtr.p->readKeyArray].tabDescr;
249 const Uint32 numAttrs= tablePtr.p->noOfKeyAttr;
250 // read pk attributes from original tuple
251
252 // do it
253 ret = readAttributes(&req_struct,
254 attrIds,
255 numAttrs,
256 dataOut,
257 ZNIL,
258 xfrmFlag);
259 // done
260 if (ret >= 0) {
261 // remove headers
262 Uint32 n= 0;
263 Uint32 i= 0;
264 while (n < numAttrs) {
265 const AttributeHeader ah(dataOut[i]);
266 Uint32 size= ah.getDataSize();
267 ndbrequire(size != 0);
268 for (Uint32 j= 0; j < size; j++) {
269 dataOut[i + j - n]= dataOut[i + j + 1];
270 }
271 n+= 1;
272 i+= 1 + size;
273 }
274 ndbrequire((int)i == ret);
275 ret -= numAttrs;
276 } else {
277 return ret;
278 }
279 }
280 if (tablePtr.p->m_bits & Tablerec::TR_RowGCI)
281 {
282 dataOut[ret] = *req_struct.m_tuple_ptr->get_mm_gci(tablePtr.p);
283 }
284 else
285 {
286 dataOut[ret] = 0;
287 }
288 return ret;
289 }
290
291 int
accReadPk(Uint32 tableId,Uint32 fragId,Uint32 fragPageId,Uint32 pageIndex,Uint32 * dataOut,bool xfrmFlag)292 Dbtup::accReadPk(Uint32 tableId, Uint32 fragId, Uint32 fragPageId, Uint32 pageIndex, Uint32* dataOut, bool xfrmFlag)
293 {
294 jamEntry();
295 // get table
296 TablerecPtr tablePtr;
297 tablePtr.i = tableId;
298 ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
299 // get fragment
300 FragrecordPtr fragPtr;
301 getFragmentrec(fragPtr, fragId, tablePtr.p);
302 // get real page id and tuple offset
303
304 Uint32 pageId = getRealpid(fragPtr.p, fragPageId);
305 // use TUX routine - optimize later
306 int ret = tuxReadPk(fragPtr.i, pageId, pageIndex, dataOut, xfrmFlag);
307 return ret;
308 }
309
310 /*
311 * TUX index contains all tuple versions. A scan in TUX has scanned
312 * one of them and asks if it can be returned as scan result. This
313 * depends on trans id, dirty read flag, and savepoint within trans.
314 *
315 * Previously this faked a ZREAD operation and used getPage().
316 * In TUP getPage() is run after ACC locking, but TUX comes here
317 * before ACC access. Instead of modifying getPage() it is more
318 * clear to do the full check here.
319 */
320 bool
tuxQueryTh(Uint32 fragPtrI,Uint32 pageId,Uint32 pageIndex,Uint32 tupVersion,Uint32 transId1,Uint32 transId2,bool dirty,Uint32 savepointId)321 Dbtup::tuxQueryTh(Uint32 fragPtrI,
322 Uint32 pageId,
323 Uint32 pageIndex,
324 Uint32 tupVersion,
325 Uint32 transId1,
326 Uint32 transId2,
327 bool dirty,
328 Uint32 savepointId)
329 {
330 jamEntry();
331 FragrecordPtr fragPtr;
332 fragPtr.i= fragPtrI;
333 ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
334 TablerecPtr tablePtr;
335 tablePtr.i= fragPtr.p->fragTableId;
336 ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
337 PagePtr pagePtr;
338 pagePtr.i = pageId;
339 c_page_pool.getPtr(pagePtr);
340
341 KeyReqStruct req_struct(this);
342
343 {
344 Operationrec tmpOp;
345 tmpOp.m_tuple_location.m_page_no = pageId;
346 tmpOp.m_tuple_location.m_page_idx = pageIndex;
347 tmpOp.op_struct.op_type = ZREAD; // valgrind
348 setup_fixed_part(&req_struct, &tmpOp, tablePtr.p);
349 }
350
351 Tuple_header* tuple_ptr = req_struct.m_tuple_ptr;
352
353 OperationrecPtr currOpPtr;
354 currOpPtr.i = tuple_ptr->m_operation_ptr_i;
355 if (currOpPtr.i == RNIL) {
356 jam();
357 // tuple has no operation, any scan can see it
358 return true;
359 }
360 c_operation_pool.getPtr(currOpPtr);
361
362 const bool sameTrans =
363 c_lqh->is_same_trans(currOpPtr.p->userpointer, transId1, transId2);
364
365 bool res = false;
366 OperationrecPtr loopOpPtr = currOpPtr;
367
368 if (!sameTrans) {
369 jam();
370 if (!dirty) {
371 jam();
372 if (currOpPtr.p->nextActiveOp == RNIL) {
373 jam();
374 // last op - TUX makes ACC lock request in same timeslice
375 res = true;
376 }
377 }
378 else {
379 // loop to first op (returns false)
380 find_savepoint(loopOpPtr, 0);
381 const Uint32 op_type = loopOpPtr.p->op_struct.op_type;
382
383 if (op_type != ZINSERT) {
384 jam();
385 // read committed version
386 const Uint32 origVersion = tuple_ptr->get_tuple_version();
387 if (origVersion == tupVersion) {
388 jam();
389 res = true;
390 }
391 }
392 }
393 }
394 else {
395 jam();
396 // for own trans, ignore dirty flag
397
398 if (find_savepoint(loopOpPtr, savepointId)) {
399 jam();
400 const Uint32 op_type = loopOpPtr.p->op_struct.op_type;
401
402 if (op_type != ZDELETE) {
403 jam();
404 // check if this op has produced the scanned version
405 Uint32 loopVersion = loopOpPtr.p->tupVersion;
406 if (loopVersion == tupVersion) {
407 jam();
408 res = true;
409 }
410 }
411 }
412 }
413
414 return res;
415 }
416
417 // ordered index build
418
419 //#define TIME_MEASUREMENT
420 #ifdef TIME_MEASUREMENT
421 static Uint32 time_events;
422 NDB_TICKS tot_time_passed;
423 Uint32 number_events;
424 #endif
425 void
execBUILD_INDX_IMPL_REQ(Signal * signal)426 Dbtup::execBUILD_INDX_IMPL_REQ(Signal* signal)
427 {
428 jamEntry();
429 #ifdef TIME_MEASUREMENT
430 time_events= 0;
431 tot_time_passed= 0;
432 number_events= 1;
433 #endif
434 const BuildIndxImplReq* const req =
435 (const BuildIndxImplReq*)signal->getDataPtr();
436 // get new operation
437 BuildIndexPtr buildPtr;
438 if (ERROR_INSERTED(4031) || ! c_buildIndexList.seize(buildPtr)) {
439 jam();
440 BuildIndexRec buildRec;
441 buildRec.m_request = *req;
442 buildRec.m_errorCode = BuildIndxImplRef::Busy;
443 if (ERROR_INSERTED(4031))
444 {
445 CLEAR_ERROR_INSERT_VALUE;
446 }
447 buildIndexReply(signal, &buildRec);
448 return;
449 }
450 buildPtr.p->m_request = *req;
451 const BuildIndxImplReq* buildReq = &buildPtr.p->m_request;
452 // check
453 buildPtr.p->m_errorCode= BuildIndxImplRef::NoError;
454 buildPtr.p->m_outstanding = 0;
455 do {
456 if (buildReq->tableId >= cnoOfTablerec) {
457 jam();
458 buildPtr.p->m_errorCode= BuildIndxImplRef::InvalidPrimaryTable;
459 break;
460 }
461 TablerecPtr tablePtr;
462 tablePtr.i= buildReq->tableId;
463 ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
464 if (tablePtr.p->tableStatus != DEFINED) {
465 jam();
466 buildPtr.p->m_errorCode= BuildIndxImplRef::InvalidPrimaryTable;
467 break;
468 }
469 // memory page format
470 buildPtr.p->m_build_vs =
471 (tablePtr.p->m_attributes[MM].m_no_of_varsize +
472 tablePtr.p->m_attributes[MM].m_no_of_dynamic) > 0;
473 if (DictTabInfo::isOrderedIndex(buildReq->indexType)) {
474 jam();
475 const DLList<TupTriggerData>& triggerList =
476 tablePtr.p->tuxCustomTriggers;
477
478 TriggerPtr triggerPtr;
479 triggerList.first(triggerPtr);
480 while (triggerPtr.i != RNIL) {
481 if (triggerPtr.p->indexId == buildReq->indexId) {
482 jam();
483 break;
484 }
485 triggerList.next(triggerPtr);
486 }
487 if (triggerPtr.i == RNIL) {
488 jam();
489 // trigger was not created
490 ndbassert(false);
491 buildPtr.p->m_errorCode = BuildIndxImplRef::InternalError;
492 break;
493 }
494 buildPtr.p->m_indexId = buildReq->indexId;
495 buildPtr.p->m_buildRef = DBTUX;
496 AlterIndxImplReq* req = (AlterIndxImplReq*)signal->getDataPtrSend();
497 req->indexId = buildReq->indexId;
498 req->senderRef = 0;
499 req->requestType = AlterIndxImplReq::AlterIndexBuilding;
500 EXECUTE_DIRECT(DBTUX, GSN_ALTER_INDX_IMPL_REQ, signal,
501 AlterIndxImplReq::SignalLength);
502 } else if(buildReq->indexId == RNIL) {
503 jam();
504 // REBUILD of acc
505 buildPtr.p->m_indexId = RNIL;
506 buildPtr.p->m_buildRef = DBACC;
507 } else {
508 jam();
509 buildPtr.p->m_errorCode = BuildIndxImplRef::InvalidIndexType;
510 break;
511 }
512
513 // set to first tuple position
514 const Uint32 firstTupleNo = 0;
515 buildPtr.p->m_fragNo= 0;
516 buildPtr.p->m_pageId= 0;
517 buildPtr.p->m_tupleNo= firstTupleNo;
518 // start build
519
520 bool offline = !!(buildReq->requestType&BuildIndxImplReq::RF_BUILD_OFFLINE);
521 if (offline && m_max_parallel_index_build > 1)
522 {
523 jam();
524 buildIndexOffline(signal, buildPtr.i);
525 }
526 else
527 {
528 jam();
529 buildIndex(signal, buildPtr.i);
530 }
531 return;
532 } while (0);
533 // check failed
534 buildIndexReply(signal, buildPtr.p);
535 c_buildIndexList.release(buildPtr);
536 }
537
538 void
buildIndex(Signal * signal,Uint32 buildPtrI)539 Dbtup::buildIndex(Signal* signal, Uint32 buildPtrI)
540 {
541 // get build record
542 BuildIndexPtr buildPtr;
543 buildPtr.i= buildPtrI;
544 c_buildIndexList.getPtr(buildPtr);
545 const BuildIndxImplReq* buildReq= &buildPtr.p->m_request;
546 // get table
547 TablerecPtr tablePtr;
548 tablePtr.i= buildReq->tableId;
549 ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
550
551 const Uint32 firstTupleNo = 0;
552 const Uint32 tupheadsize = tablePtr.p->m_offsets[MM].m_fix_header_size;
553
554 #ifdef TIME_MEASUREMENT
555 MicroSecondTimer start;
556 MicroSecondTimer stop;
557 NDB_TICKS time_passed;
558 #endif
559 do {
560 // get fragment
561 FragrecordPtr fragPtr;
562 if (buildPtr.p->m_fragNo == MAX_FRAG_PER_NODE) {
563 jam();
564 // build ready
565 buildIndexReply(signal, buildPtr.p);
566 c_buildIndexList.release(buildPtr);
567 return;
568 }
569 ndbrequire(buildPtr.p->m_fragNo < MAX_FRAG_PER_NODE);
570 fragPtr.i= tablePtr.p->fragrec[buildPtr.p->m_fragNo];
571 if (fragPtr.i == RNIL) {
572 jam();
573 buildPtr.p->m_fragNo++;
574 buildPtr.p->m_pageId= 0;
575 buildPtr.p->m_tupleNo= firstTupleNo;
576 break;
577 }
578 ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
579 // get page
580 PagePtr pagePtr;
581 if (buildPtr.p->m_pageId >= fragPtr.p->m_max_page_no) {
582 jam();
583 buildPtr.p->m_fragNo++;
584 buildPtr.p->m_pageId= 0;
585 buildPtr.p->m_tupleNo= firstTupleNo;
586 break;
587 }
588 Uint32 realPageId= getRealpidCheck(fragPtr.p, buildPtr.p->m_pageId);
589 // skip empty page
590 if (realPageId == RNIL)
591 {
592 jam();
593 goto next_tuple;
594 }
595
596 c_page_pool.getPtr(pagePtr, realPageId);
597
598 next_tuple:
599 // get tuple
600 Uint32 pageIndex = ~0;
601 const Tuple_header* tuple_ptr = 0;
602 pageIndex = buildPtr.p->m_tupleNo * tupheadsize;
603 if (pageIndex + tupheadsize > Fix_page::DATA_WORDS) {
604 jam();
605 buildPtr.p->m_pageId++;
606 buildPtr.p->m_tupleNo= firstTupleNo;
607 break;
608 }
609
610 if (realPageId == RNIL)
611 {
612 jam();
613 buildPtr.p->m_tupleNo++;
614 break;
615 }
616
617 tuple_ptr = (Tuple_header*)&pagePtr.p->m_data[pageIndex];
618 // skip over free tuple
619 if (tuple_ptr->m_header_bits & Tuple_header::FREE) {
620 jam();
621 buildPtr.p->m_tupleNo++;
622 break;
623 }
624 Uint32 tupVersion= tuple_ptr->get_tuple_version();
625 OperationrecPtr pageOperPtr;
626 pageOperPtr.i= tuple_ptr->m_operation_ptr_i;
627 #ifdef TIME_MEASUREMENT
628 NdbTick_getMicroTimer(&start);
629 #endif
630 // add to index
631 TuxMaintReq* const req = (TuxMaintReq*)signal->getDataPtrSend();
632 req->errorCode = RNIL;
633 req->tableId = tablePtr.i;
634 req->indexId = buildPtr.p->m_indexId;
635 req->fragId = tablePtr.p->fragid[buildPtr.p->m_fragNo];
636 req->pageId = realPageId;
637 req->tupVersion = tupVersion;
638 req->opInfo = TuxMaintReq::OpAdd;
639 req->tupFragPtrI = fragPtr.i;
640 req->fragPageId = buildPtr.p->m_pageId;
641 req->pageIndex = pageIndex;
642
643 if (pageOperPtr.i == RNIL)
644 {
645 EXECUTE_DIRECT(buildPtr.p->m_buildRef, GSN_TUX_MAINT_REQ,
646 signal, TuxMaintReq::SignalLength+2);
647 }
648 else
649 {
650 /*
651 If there is an ongoing operation on the tuple then it is either a
652 copy tuple or an original tuple with an ongoing transaction. In
653 both cases realPageId and pageOffset refer to the original tuple.
654 The tuple address stored in TUX will always be the original tuple
655 but with the tuple version of the tuple we found.
656
657 This is necessary to avoid having to update TUX at abort of
658 update. If an update aborts then the copy tuple is copied to
659 the original tuple. The build will however have found that
660 tuple as a copy tuple. The original tuple is stable and is thus
661 preferrable to store in TUX.
662 */
663 jam();
664
665 /**
666 * Since copy tuples now can't be found on real pages.
667 * we will here build all copies of the tuple
668 *
669 * Note only "real" tupVersion's should be added
670 * i.e delete's shouldnt be added
671 * (unless it's the first op, when "original" should be added)
672 */
673
674 /*
675 * Start from first operation. This is only to make things more
676 * clear. It is not required by ordered index implementation.
677 */
678 c_operation_pool.getPtr(pageOperPtr);
679 while (pageOperPtr.p->prevActiveOp != RNIL)
680 {
681 jam();
682 pageOperPtr.i = pageOperPtr.p->prevActiveOp;
683 c_operation_pool.getPtr(pageOperPtr);
684 }
685 /*
686 * Do not use req->errorCode as global control.
687 */
688 bool ok = true;
689 /*
690 * If first operation is an update, add previous version.
691 * This version does not appear as the version of any operation.
692 * At commit this version is removed by executeTuxCommitTriggers.
693 * At abort it is preserved by executeTuxAbortTriggers.
694 */
695 if (pageOperPtr.p->op_struct.op_type == ZUPDATE)
696 {
697 jam();
698 req->errorCode = RNIL;
699 req->tupVersion = decr_tup_version(pageOperPtr.p->tupVersion);
700 EXECUTE_DIRECT(buildPtr.p->m_buildRef, GSN_TUX_MAINT_REQ,
701 signal, TuxMaintReq::SignalLength+2);
702 ok = (req->errorCode == 0);
703 }
704 /*
705 * Add versions from all operations.
706 *
707 * Each operation has a tuple version. For insert and update it
708 * is the newly created version. For delete it is the version
709 * deleted. The existence of operation tuple version implies that
710 * a corresponding tuple version exists for TUX to read.
711 *
712 * We could be in the middle of a commit. The process here makes
713 * no assumptions about operation commit order. (It should be
714 * first to last but this is not the place to assert it).
715 *
716 * Duplicate versions are possible e.g. a delete in the middle
717 * may have same version as the previous operation. TUX ignores
718 * duplicate version errors during index build.
719 */
720 while (pageOperPtr.i != RNIL && ok)
721 {
722 jam();
723 c_operation_pool.getPtr(pageOperPtr);
724 req->errorCode = RNIL;
725 req->tupVersion = pageOperPtr.p->tupVersion;
726 EXECUTE_DIRECT(buildPtr.p->m_buildRef, GSN_TUX_MAINT_REQ,
727 signal, TuxMaintReq::SignalLength+2);
728 pageOperPtr.i = pageOperPtr.p->nextActiveOp;
729 ok = (req->errorCode == 0);
730 }
731 }
732
733 jamEntry();
734 if (req->errorCode != 0) {
735 switch (req->errorCode) {
736 case TuxMaintReq::NoMemError:
737 jam();
738 buildPtr.p->m_errorCode= BuildIndxImplRef::AllocationFailure;
739 break;
740 default:
741 ndbrequire(false);
742 break;
743 }
744 buildIndexReply(signal, buildPtr.p);
745 c_buildIndexList.release(buildPtr);
746 return;
747 }
748 #ifdef TIME_MEASUREMENT
749 NdbTick_getMicroTimer(&stop);
750 time_passed= NdbTick_getMicrosPassed(start, stop);
751 if (time_passed < 1000) {
752 time_events++;
753 tot_time_passed += time_passed;
754 if (time_events == number_events) {
755 NDB_TICKS mean_time_passed= tot_time_passed /
756 (NDB_TICKS)number_events;
757 ndbout << "Number of events= " << number_events;
758 ndbout << " Mean time passed= " << mean_time_passed << endl;
759 number_events <<= 1;
760 tot_time_passed= (NDB_TICKS)0;
761 time_events= 0;
762 }
763 }
764 #endif
765 // next tuple
766 buildPtr.p->m_tupleNo++;
767 break;
768 } while (0);
769 signal->theData[0]= ZBUILD_INDEX;
770 signal->theData[1]= buildPtr.i;
771 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
772 }
773
774 Uint32 Dbtux_mt_buildIndexFragment_wrapper_C(void*);
775
776 void
buildIndexOffline(Signal * signal,Uint32 buildPtrI)777 Dbtup::buildIndexOffline(Signal* signal, Uint32 buildPtrI)
778 {
779 jam();
780 /**
781 * We need to make table read-only...as mtoib does not work otherwise
782 */
783 BuildIndexPtr buildPtr;
784 buildPtr.i= buildPtrI;
785 c_buildIndexList.getPtr(buildPtr);
786 const BuildIndxImplReq* buildReq =
787 (const BuildIndxImplReq*)&buildPtr.p->m_request;
788
789 AlterTabReq* req = (AlterTabReq*)signal->getDataPtrSend();
790 bzero(req, sizeof(req));
791 req->senderRef = reference();
792 req->senderData = buildPtrI;
793 req->tableId = buildReq->tableId;
794 req->requestType = AlterTabReq::AlterTableReadOnly;
795 sendSignal(calcInstanceBlockRef(DBLQH), GSN_ALTER_TAB_REQ, signal,
796 AlterTabReq::SignalLength, JBB);
797 }
798
799 void
execALTER_TAB_CONF(Signal * signal)800 Dbtup::execALTER_TAB_CONF(Signal* signal)
801 {
802 jamEntry();
803 AlterTabConf* conf = (AlterTabConf*)signal->getDataPtr();
804
805 BuildIndexPtr buildPtr;
806 buildPtr.i = conf->senderData;
807 c_buildIndexList.getPtr(buildPtr);
808
809
810 if (buildPtr.p->m_fragNo == 0)
811 {
812 jam();
813 buildIndexOffline_table_readonly(signal, conf->senderData);
814 return;
815 }
816 else
817 {
818 jam();
819 ndbrequire(buildPtr.p->m_fragNo >= MAX_FRAG_PER_NODE);
820 buildIndexReply(signal, buildPtr.p);
821 c_buildIndexList.release(buildPtr);
822 return;
823 }
824 }
825
826 void
buildIndexOffline_table_readonly(Signal * signal,Uint32 buildPtrI)827 Dbtup::buildIndexOffline_table_readonly(Signal* signal, Uint32 buildPtrI)
828 {
829 // get build record
830 BuildIndexPtr buildPtr;
831 buildPtr.i= buildPtrI;
832 c_buildIndexList.getPtr(buildPtr);
833 const BuildIndxImplReq* buildReq =
834 (const BuildIndxImplReq*)&buildPtr.p->m_request;
835 // get table
836 TablerecPtr tablePtr;
837 tablePtr.i= buildReq->tableId;
838 ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
839
840 for (;buildPtr.p->m_fragNo < MAX_FRAG_PER_NODE;
841 buildPtr.p->m_fragNo++)
842 {
843 jam();
844 FragrecordPtr fragPtr;
845 fragPtr.i = tablePtr.p->fragrec[buildPtr.p->m_fragNo];
846 if (fragPtr.i == RNIL)
847 {
848 jam();
849 continue;
850 }
851 ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
852 mt_BuildIndxReq req;
853 bzero(&req, sizeof(req));
854 req.senderRef = reference();
855 req.senderData = buildPtr.i;
856 req.tableId = buildReq->tableId;
857 req.indexId = buildPtr.p->m_indexId;
858 req.fragId = tablePtr.p->fragid[buildPtr.p->m_fragNo];
859
860 SimulatedBlock * tux = globalData.getBlock(DBTUX);
861 if (instance() != 0)
862 {
863 tux = tux->getInstance(instance());
864 ndbrequire(tux != 0);
865 }
866 req.tux_ptr = tux;
867 req.tup_ptr = this;
868 req.func_ptr = Dbtux_mt_buildIndexFragment_wrapper_C;
869 req.buffer_size = 16*32768; // thread-local-buffer
870
871 Uint32 * req_ptr = signal->getDataPtrSend();
872 memcpy(req_ptr, &req, sizeof(req));
873
874 sendSignal(NDBFS_REF, GSN_BUILD_INDX_IMPL_REQ, signal,
875 (sizeof(req) + 15) / 4, JBB);
876
877 buildPtr.p->m_outstanding++;
878 if (buildPtr.p->m_outstanding >= m_max_parallel_index_build)
879 {
880 jam();
881 return;
882 }
883 }
884
885 if (buildPtr.p->m_outstanding == 0)
886 {
887 jam();
888 AlterTabReq* req = (AlterTabReq*)signal->getDataPtrSend();
889 bzero(req, sizeof(req));
890 req->senderRef = reference();
891 req->senderData = buildPtrI;
892 req->tableId = buildReq->tableId;
893 req->requestType = AlterTabReq::AlterTableReadWrite;
894 sendSignal(calcInstanceBlockRef(DBLQH), GSN_ALTER_TAB_REQ, signal,
895 AlterTabReq::SignalLength, JBB);
896 return;
897 }
898 else
899 {
900 jam();
901 // wait for replies
902 return;
903 }
904 }
905
906 int
mt_scan_init(Uint32 tableId,Uint32 fragId,Local_key * pos,Uint32 * fragPtrI)907 Dbtup::mt_scan_init(Uint32 tableId, Uint32 fragId,
908 Local_key* pos, Uint32 * fragPtrI)
909 {
910 TablerecPtr tablePtr;
911 tablePtr.i = tableId;
912 ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
913
914 FragrecordPtr fragPtr;
915 fragPtr.i = RNIL;
916 for (Uint32 i = 0; i<MAX_FRAG_PER_NODE; i++)
917 {
918 if (tablePtr.p->fragid[i] == fragId)
919 {
920 fragPtr.i = tablePtr.p->fragrec[i];
921 break;
922 }
923 }
924
925 if (fragPtr.i == RNIL)
926 return -1;
927
928 ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
929
930 Uint32 fragPageId = 0;
931 while (fragPageId < fragPtr.p->m_max_page_no)
932 {
933 Uint32 realPageId= getRealpidCheck(fragPtr.p, fragPageId);
934 if (realPageId != RNIL)
935 {
936 * fragPtrI = fragPtr.i;
937 pos->m_page_no = realPageId;
938 pos->m_page_idx = 0;
939 pos->m_file_no = 0;
940 return 0;
941 }
942 fragPageId++;
943 }
944
945 return 1;
946 }
947
948 int
mt_scan_next(Uint32 tableId,Uint32 fragPtrI,Local_key * pos,bool moveNext)949 Dbtup::mt_scan_next(Uint32 tableId, Uint32 fragPtrI,
950 Local_key* pos, bool moveNext)
951 {
952 TablerecPtr tablePtr;
953 tablePtr.i = tableId;
954 ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
955
956 FragrecordPtr fragPtr;
957 fragPtr.i = fragPtrI;
958 ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
959
960 Uint32 tupheadsize = tablePtr.p->m_offsets[MM].m_fix_header_size;
961 if (moveNext)
962 {
963 pos->m_page_idx += tupheadsize;
964 }
965
966 PagePtr pagePtr;
967 c_page_pool.getPtr(pagePtr, pos->m_page_no);
968
969 while (1)
970 {
971 Tuple_header* tuple_ptr;
972 while (pos->m_page_idx + tupheadsize <= Fix_page::DATA_WORDS)
973 {
974 tuple_ptr = (Tuple_header*)(pagePtr.p->m_data + pos->m_page_idx);
975 // skip over free tuple
976 if (tuple_ptr->m_header_bits & Tuple_header::FREE)
977 {
978 pos->m_page_idx += tupheadsize;
979 continue;
980 }
981 pos->m_file_no = tuple_ptr->get_tuple_version();
982 return 0; // Found
983 }
984
985 // End of page...move to next
986 Uint32 fragPageId = pagePtr.p->frag_page_id + 1;
987 while (fragPageId < fragPtr.p->m_max_page_no)
988 {
989 Uint32 realPageId = getRealpidCheck(fragPtr.p, fragPageId);
990 if (realPageId != RNIL)
991 {
992 pos->m_page_no = realPageId;
993 break;
994 }
995 fragPageId++;
996 }
997
998 if (fragPageId == fragPtr.p->m_max_page_no)
999 break;
1000
1001 pos->m_page_idx = 0;
1002 c_page_pool.getPtr(pagePtr, pos->m_page_no);
1003 }
1004
1005 return 1;
1006 }
1007
1008 void
execBUILD_INDX_IMPL_REF(Signal * signal)1009 Dbtup::execBUILD_INDX_IMPL_REF(Signal* signal)
1010 {
1011 jamEntry();
1012 BuildIndxImplRef* ref = (BuildIndxImplRef*)signal->getDataPtrSend();
1013 Uint32 ptr = ref->senderData;
1014 Uint32 err = ref->errorCode;
1015
1016 BuildIndexPtr buildPtr;
1017 c_buildIndexList.getPtr(buildPtr, ptr);
1018 ndbrequire(buildPtr.p->m_outstanding);
1019 buildPtr.p->m_outstanding--;
1020
1021 buildPtr.p->m_errorCode = (BuildIndxImplRef::ErrorCode)err;
1022 buildPtr.p->m_fragNo = MAX_FRAG_PER_NODE; // No point in starting any more
1023 buildIndexOffline_table_readonly(signal, ptr);
1024 }
1025
1026 void
execBUILD_INDX_IMPL_CONF(Signal * signal)1027 Dbtup::execBUILD_INDX_IMPL_CONF(Signal* signal)
1028 {
1029 jamEntry();
1030 BuildIndxImplConf* conf = (BuildIndxImplConf*)signal->getDataPtrSend();
1031 Uint32 ptr = conf->senderData;
1032
1033 BuildIndexPtr buildPtr;
1034 c_buildIndexList.getPtr(buildPtr, ptr);
1035 ndbrequire(buildPtr.p->m_outstanding);
1036 buildPtr.p->m_outstanding--;
1037 buildPtr.p->m_fragNo++;
1038
1039 buildIndexOffline_table_readonly(signal, ptr);
1040 }
1041
1042 void
buildIndexReply(Signal * signal,const BuildIndexRec * buildPtrP)1043 Dbtup::buildIndexReply(Signal* signal, const BuildIndexRec* buildPtrP)
1044 {
1045 const BuildIndxImplReq* buildReq = &buildPtrP->m_request;
1046
1047 AlterIndxImplReq* req = (AlterIndxImplReq*)signal->getDataPtrSend();
1048 req->indexId = buildReq->indexId;
1049 req->senderRef = 0; //
1050 if (buildPtrP->m_errorCode == BuildIndxImplRef::NoError)
1051 {
1052 jam();
1053 req->requestType = AlterIndxImplReq::AlterIndexOnline;
1054 }
1055 else
1056 {
1057 jam();
1058 req->requestType = AlterIndxImplReq::AlterIndexOffline;
1059 }
1060 EXECUTE_DIRECT(DBTUX, GSN_ALTER_INDX_IMPL_REQ, signal,
1061 AlterIndxImplReq::SignalLength);
1062
1063 if (buildPtrP->m_errorCode == BuildIndxImplRef::NoError) {
1064 jam();
1065 BuildIndxImplConf* conf =
1066 (BuildIndxImplConf*)signal->getDataPtrSend();
1067 conf->senderRef = reference();
1068 conf->senderData = buildReq->senderData;
1069
1070 sendSignal(buildReq->senderRef, GSN_BUILD_INDX_IMPL_CONF,
1071 signal, BuildIndxImplConf::SignalLength, JBB);
1072 } else {
1073 jam();
1074 BuildIndxImplRef* ref =
1075 (BuildIndxImplRef*)signal->getDataPtrSend();
1076 ref->senderRef = reference();
1077 ref->senderData = buildReq->senderData;
1078 ref->errorCode = buildPtrP->m_errorCode;
1079
1080 sendSignal(buildReq->senderRef, GSN_BUILD_INDX_IMPL_REF,
1081 signal, BuildIndxImplRef::SignalLength, JBB);
1082 }
1083 }
1084