1 /*
2 Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 #define DBTUP_C
26 #define DBTUP_INDEX_CPP
27 #include <dblqh/Dblqh.hpp>
28 #include "Dbtup.hpp"
29 #include <RefConvert.hpp>
30 #include <ndb_limits.h>
31 #include <pc.hpp>
32 #include <AttributeDescriptor.hpp>
33 #include "AttributeOffset.hpp"
34 #include <AttributeHeader.hpp>
35 #include <signaldata/TuxMaint.hpp>
36 #include <signaldata/AlterIndxImpl.hpp>
37
38 #define JAM_FILE_ID 418
39
40
41 // methods used by ordered index
42
43 void
tuxGetTupAddr(Uint32 fragPtrI,Uint32 pageId,Uint32 pageIndex,Uint32 & lkey1,Uint32 & lkey2)44 Dbtup::tuxGetTupAddr(Uint32 fragPtrI,
45 Uint32 pageId,
46 Uint32 pageIndex,
47 Uint32& lkey1,
48 Uint32& lkey2)
49 {
50 jamEntryDebug();
51 PagePtr pagePtr;
52 c_page_pool.getPtr(pagePtr, pageId);
53 lkey1 = pagePtr.p->frag_page_id;
54 lkey2 = pageIndex;
55 }
56
57 /**
58 * Can be called from MT-build of ordered indexes.
59 */
60 int
tuxAllocNode(EmulatedJamBuffer * jamBuf,Uint32 * fragPtrP_input,Uint32 * tablePtrP_input,Uint32 & pageId,Uint32 & pageOffset,Uint32 * & node)61 Dbtup::tuxAllocNode(EmulatedJamBuffer * jamBuf,
62 Uint32 *fragPtrP_input,
63 Uint32 *tablePtrP_input,
64 Uint32& pageId,
65 Uint32& pageOffset,
66 Uint32*& node)
67 {
68 thrjamEntry(jamBuf);
69 Tablerec* tablePtrP = (Tablerec*)tablePtrP_input;
70 Fragrecord* fragPtrP = (Fragrecord*)fragPtrP_input;
71
72 Local_key key;
73 Uint32* ptr, frag_page_id, err;
74 c_allow_alloc_spare_page=true;
75 if ((ptr = alloc_fix_rec(jamBuf,
76 &err,
77 fragPtrP,
78 tablePtrP,
79 &key,
80 &frag_page_id)) == 0)
81 {
82 c_allow_alloc_spare_page=false;
83 thrjam(jamBuf);
84 return err;
85 }
86 c_allow_alloc_spare_page=false;
87 pageId= key.m_page_no;
88 pageOffset= key.m_page_idx;
89 Uint32 attrDescIndex= tablePtrP->tabDescriptor + (0 << ZAD_LOG_SIZE);
90 Uint32 attrDataOffset= AttributeOffset::getOffset(
91 tableDescriptor[attrDescIndex + 1].tabDescr);
92 node= ptr + attrDataOffset;
93 return 0;
94 }
95
96 void
tuxFreeNode(Uint32 * fragPtrP_input,Uint32 * tablePtrP_input,Uint32 pageId,Uint32 pageOffset,Uint32 * node)97 Dbtup::tuxFreeNode(Uint32* fragPtrP_input,
98 Uint32* tablePtrP_input,
99 Uint32 pageId,
100 Uint32 pageOffset,
101 Uint32* node)
102 {
103 jamEntry();
104 Tablerec* tablePtrP = (Tablerec*)tablePtrP_input;
105 Fragrecord* fragPtrP = (Fragrecord*)fragPtrP_input;
106
107 Local_key key;
108 key.m_page_no = pageId;
109 key.m_page_idx = pageOffset;
110 PagePtr pagePtr;
111 Tuple_header* ptr = (Tuple_header*)get_ptr(&pagePtr, &key, tablePtrP);
112
113 Uint32 attrDescIndex= tablePtrP->tabDescriptor + (0 << ZAD_LOG_SIZE);
114 Uint32 attrDataOffset= AttributeOffset::getOffset(tableDescriptor[attrDescIndex + 1].tabDescr);
115 ndbrequire(node == (Uint32*)ptr + attrDataOffset);
116
117 free_fix_rec(fragPtrP, tablePtrP, &key, (Fix_page*)pagePtr.p);
118 }
119
120 int
tuxReadAttrsCurr(EmulatedJamBuffer * jamBuf,const Uint32 * attrIds,Uint32 numAttrs,Uint32 * dataOut,bool xfrmFlag,Uint32 tupVersion)121 Dbtup::tuxReadAttrsCurr(EmulatedJamBuffer *jamBuf,
122 const Uint32* attrIds,
123 Uint32 numAttrs,
124 Uint32* dataOut,
125 bool xfrmFlag,
126 Uint32 tupVersion)
127 {
128 thrjamEntryDebug(jamBuf);
129 // use own variables instead of globals
130 Fragrecord *fragPtrP = prepare_fragptr.p;
131 Tablerec *tablePtrP = prepare_tabptr.p;
132
133 // search for tuple version if not original
134 Operationrec tmpOp;
135 KeyReqStruct req_struct(jamBuf);
136 req_struct.tablePtrP = tablePtrP;
137 req_struct.fragPtrP = fragPtrP;
138
139 tmpOp.op_type = ZREAD; // valgrind
140 setup_fixed_tuple_ref_opt(&req_struct);
141 setup_fixed_part(&req_struct, &tmpOp, tablePtrP);
142
143 return tuxReadAttrsCommon(req_struct,
144 attrIds,
145 numAttrs,
146 dataOut,
147 xfrmFlag,
148 tupVersion);
149 }
150
151 /**
152 * This method can be called from MT-build of
153 * ordered indexes.
154 */
155 int
tuxReadAttrsOpt(EmulatedJamBuffer * jamBuf,Uint32 * fragPtrP,Uint32 * tablePtrP,Uint32 pageId,Uint32 pageIndex,Uint32 tupVersion,const Uint32 * attrIds,Uint32 numAttrs,Uint32 * dataOut,bool xfrmFlag)156 Dbtup::tuxReadAttrsOpt(EmulatedJamBuffer * jamBuf,
157 Uint32* fragPtrP,
158 Uint32* tablePtrP,
159 Uint32 pageId,
160 Uint32 pageIndex,
161 Uint32 tupVersion,
162 const Uint32* attrIds,
163 Uint32 numAttrs,
164 Uint32* dataOut,
165 bool xfrmFlag)
166 {
167 thrjamEntryDebug(jamBuf);
168 // search for tuple version if not original
169
170 Operationrec tmpOp;
171 KeyReqStruct req_struct(jamBuf);
172 req_struct.tablePtrP = (Tablerec*)tablePtrP;
173 req_struct.fragPtrP = (Fragrecord*)fragPtrP;
174
175 tmpOp.m_tuple_location.m_page_no= pageId;
176 tmpOp.m_tuple_location.m_page_idx= pageIndex;
177 tmpOp.op_type = ZREAD; // valgrind
178 setup_fixed_tuple_ref(&req_struct,
179 &tmpOp,
180 (Tablerec*)tablePtrP);
181 setup_fixed_part(&req_struct,
182 &tmpOp,
183 (Tablerec*)tablePtrP);
184 return tuxReadAttrsCommon(req_struct,
185 attrIds,
186 numAttrs,
187 dataOut,
188 xfrmFlag,
189 tupVersion);
190 }
191
192 int
tuxReadAttrsCommon(KeyReqStruct & req_struct,const Uint32 * attrIds,Uint32 numAttrs,Uint32 * dataOut,bool xfrmFlag,Uint32 tupVersion)193 Dbtup::tuxReadAttrsCommon(KeyReqStruct &req_struct,
194 const Uint32* attrIds,
195 Uint32 numAttrs,
196 Uint32* dataOut,
197 bool xfrmFlag,
198 Uint32 tupVersion)
199 {
200 Tuple_header *tuple_ptr = req_struct.m_tuple_ptr;
201 if (tuple_ptr->get_tuple_version() != tupVersion)
202 {
203 thrjamDebug(req_struct.jamBuffer);
204 OperationrecPtr opPtr;
205 opPtr.i= tuple_ptr->m_operation_ptr_i;
206 Uint32 loopGuard= 0;
207 while (opPtr.i != RNIL) {
208 ndbrequire(c_operation_pool.getValidPtr(opPtr));
209 if (opPtr.p->op_struct.bit_field.tupVersion == tupVersion) {
210 thrjamDebug(req_struct.jamBuffer);
211 if (!opPtr.p->m_copy_tuple_location.isNull()) {
212 req_struct.m_tuple_ptr=
213 get_copy_tuple(&opPtr.p->m_copy_tuple_location);
214 }
215 break;
216 }
217 thrjamDebug(req_struct.jamBuffer);
218 opPtr.i= opPtr.p->prevActiveOp;
219 ndbrequire(++loopGuard < (1 << ZTUP_VERSION_BITS));
220 }
221 }
222 // read key attributes from found tuple version
223 // save globals
224 prepare_read(&req_struct, req_struct.tablePtrP, false);
225
226 // do it
227 int ret = readAttributes(&req_struct,
228 attrIds,
229 numAttrs,
230 dataOut,
231 ZNIL,
232 xfrmFlag);
233 // done
234 return ret;
235 }
236
237 int
tuxReadPk(Uint32 * fragPtrP_input,Uint32 * tablePtrP_input,Uint32 pageId,Uint32 pageIndex,Uint32 * dataOut,bool xfrmFlag)238 Dbtup::tuxReadPk(Uint32* fragPtrP_input,
239 Uint32* tablePtrP_input,
240 Uint32 pageId,
241 Uint32 pageIndex,
242 Uint32* dataOut,
243 bool xfrmFlag)
244 {
245 jamEntryDebug();
246 Fragrecord* fragPtrP = (Fragrecord*)fragPtrP_input;
247 Tablerec* tablePtrP = (Tablerec*)tablePtrP_input;
248
249 Operationrec tmpOp;
250 tmpOp.m_tuple_location.m_page_no= pageId;
251 tmpOp.m_tuple_location.m_page_idx= pageIndex;
252
253 KeyReqStruct req_struct(this);
254 req_struct.tablePtrP = tablePtrP;
255 req_struct.fragPtrP = fragPtrP;
256
257 PagePtr page_ptr;
258 Uint32* ptr= get_ptr(&page_ptr, &tmpOp.m_tuple_location, tablePtrP);
259 req_struct.m_page_ptr = page_ptr;
260 req_struct.m_tuple_ptr = (Tuple_header*)ptr;
261
262 int ret = 0;
263 if (likely(! (req_struct.m_tuple_ptr->m_header_bits & Tuple_header::FREE)))
264 {
265 req_struct.check_offset[MM]= tablePtrP->get_check_offset(MM);
266 req_struct.check_offset[DD]= tablePtrP->get_check_offset(DD);
267
268 Uint32 num_attr= tablePtrP->m_no_of_attributes;
269 Uint32 descr_start= tablePtrP->tabDescriptor;
270 TableDescriptor *tab_descr= &tableDescriptor[descr_start];
271 ndbrequire(descr_start + (num_attr << ZAD_LOG_SIZE) <= cnoOfTabDescrRec);
272 req_struct.attr_descr= tab_descr;
273
274 if (unlikely(req_struct.m_tuple_ptr->m_header_bits & Tuple_header::ALLOC))
275 {
276 OperationrecPtr opPtr;
277 opPtr.i = req_struct.m_tuple_ptr->m_operation_ptr_i;
278 ndbrequire(c_operation_pool.getValidPtr(opPtr));
279 ndbassert(!opPtr.p->m_copy_tuple_location.isNull());
280 req_struct.m_tuple_ptr=
281 get_copy_tuple(&opPtr.p->m_copy_tuple_location);
282 }
283 prepare_read(&req_struct, tablePtrP, false);
284
285 const Uint32* attrIds= &tableDescriptor[tablePtrP->readKeyArray].tabDescr;
286 const Uint32 numAttrs= tablePtrP->noOfKeyAttr;
287 // read pk attributes from original tuple
288
289 // do it
290 ret = readAttributes(&req_struct,
291 attrIds,
292 numAttrs,
293 dataOut,
294 ZNIL,
295 xfrmFlag);
296 // done
297 if (ret >= 0) {
298 // remove headers
299 Uint32 n= 0;
300 Uint32 i= 0;
301 while (n < numAttrs) {
302 const AttributeHeader ah(dataOut[i]);
303 Uint32 size= ah.getDataSize();
304 ndbrequire(size != 0);
305 for (Uint32 j= 0; j < size; j++) {
306 dataOut[i + j - n]= dataOut[i + j + 1];
307 }
308 n+= 1;
309 i+= 1 + size;
310 }
311 ndbrequire((int)i == ret);
312 ret -= numAttrs;
313 }
314 else
315 {
316 jam();
317 return ret;
318 }
319 }
320 else
321 {
322 jam();
323 }
324 if (likely(tablePtrP->m_bits & Tablerec::TR_RowGCI))
325 {
326 dataOut[ret] = *req_struct.m_tuple_ptr->get_mm_gci(tablePtrP);
327 }
328 else
329 {
330 dataOut[ret] = 0;
331 }
332 return ret;
333 }
334
335 int
accReadPk(Uint32 tableId,Uint32 fragId,Uint32 fragPageId,Uint32 pageIndex,Uint32 * dataOut,bool xfrmFlag)336 Dbtup::accReadPk(Uint32 tableId, Uint32 fragId, Uint32 fragPageId, Uint32 pageIndex, Uint32* dataOut, bool xfrmFlag)
337 {
338 jamEntryDebug();
339 // get table
340 TablerecPtr tablePtr;
341 tablePtr.i = tableId;
342 ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
343 // get fragment
344 FragrecordPtr fragPtr;
345 getFragmentrec(fragPtr, fragId, tablePtr.p);
346 // get real page id and tuple offset
347
348 Uint32 pageId = getRealpid(fragPtr.p, fragPageId);
349 // use TUX routine - optimize later
350 int ret = tuxReadPk((Uint32*)fragPtr.p,
351 (Uint32*)tablePtr.p,
352 pageId,
353 pageIndex,
354 dataOut,
355 xfrmFlag);
356 return ret;
357 }
358
359 /*
360 * TUX index contains all tuple versions. A scan in TUX has scanned
361 * one of them and asks if it can be returned as scan result. This
362 * depends on trans id, dirty read flag, and savepoint within trans.
363 *
364 * Previously this faked a ZREAD operation and used getPage().
365 * In TUP getPage() is run after ACC locking, but TUX comes here
366 * before ACC access. Instead of modifying getPage() it is more
367 * clear to do the full check here.
368 */
369 bool
tuxQueryTh(Uint32 opPtrI,Uint32 tupVersion,Uint32 transId1,Uint32 transId2,bool dirty,Uint32 savepointId)370 Dbtup::tuxQueryTh(Uint32 opPtrI,
371 Uint32 tupVersion,
372 Uint32 transId1,
373 Uint32 transId2,
374 bool dirty,
375 Uint32 savepointId)
376 {
377 jamEntryDebug();
378
379 OperationrecPtr currOpPtr;
380 currOpPtr.i = opPtrI;
381 ndbrequire(c_operation_pool.getValidPtr(currOpPtr));
382
383 const bool sameTrans =
384 c_lqh->is_same_trans(currOpPtr.p->userpointer, transId1, transId2);
385
386 bool res = false;
387 OperationrecPtr loopOpPtr = currOpPtr;
388
389 if (!sameTrans)
390 {
391 jamDebug();
392 if (!dirty)
393 {
394 jamDebug();
395 if (currOpPtr.p->nextActiveOp == RNIL)
396 {
397 jamDebug();
398 // last op - TUX makes ACC lock request in same timeslice
399 res = true;
400 }
401 }
402 else
403 {
404 // loop to first op (returns false)
405 find_savepoint(loopOpPtr, 0);
406 const Uint32 op_type = loopOpPtr.p->op_type;
407
408 if (op_type != ZINSERT)
409 {
410 jamDebug();
411 // read committed version
412 Tuple_header *tuple_ptr = (Tuple_header*)prepare_tuple_ptr;
413 const Uint32 origVersion = tuple_ptr->get_tuple_version();
414 if (origVersion == tupVersion)
415 {
416 jamDebug();
417 res = true;
418 }
419 }
420 }
421 }
422 else
423 {
424 jamDebug();
425 // for own trans, ignore dirty flag
426
427 if (find_savepoint(loopOpPtr, savepointId))
428 {
429 jamDebug();
430 const Uint32 op_type = loopOpPtr.p->op_type;
431
432 if (op_type != ZDELETE)
433 {
434 jamDebug();
435 // check if this op has produced the scanned version
436 Uint32 loopVersion = loopOpPtr.p->op_struct.bit_field.tupVersion;
437 if (loopVersion == tupVersion)
438 {
439 jamDebug();
440 res = true;
441 }
442 }
443 }
444 }
445 return res;
446 }
447
448 /**
449 * This method is still used by index statistics and debug code.
450 */
451 int
tuxReadAttrs(EmulatedJamBuffer * jamBuf,Uint32 fragPtrI,Uint32 pageId,Uint32 pageIndex,Uint32 tupVersion,const Uint32 * attrIds,Uint32 numAttrs,Uint32 * dataOut,bool xfrmFlag)452 Dbtup::tuxReadAttrs(EmulatedJamBuffer * jamBuf,
453 Uint32 fragPtrI,
454 Uint32 pageId,
455 Uint32 pageIndex,
456 Uint32 tupVersion,
457 const Uint32* attrIds,
458 Uint32 numAttrs,
459 Uint32* dataOut,
460 bool xfrmFlag)
461 {
462 thrjamEntryDebug(jamBuf);
463 // use own variables instead of globals
464 FragrecordPtr fragPtr;
465 fragPtr.i= fragPtrI;
466 ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
467 TablerecPtr tablePtr;
468 tablePtr.i= fragPtr.p->fragTableId;
469 ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
470
471 // search for tuple version if not original
472
473 Operationrec tmpOp;
474 KeyReqStruct req_struct(jamBuf);
475 req_struct.tablePtrP = tablePtr.p;
476 req_struct.fragPtrP = fragPtr.p;
477
478 tmpOp.m_tuple_location.m_page_no= pageId;
479 tmpOp.m_tuple_location.m_page_idx= pageIndex;
480 tmpOp.op_type = ZREAD; // valgrind
481 setup_fixed_tuple_ref(&req_struct, &tmpOp, tablePtr.p);
482 setup_fixed_part(&req_struct, &tmpOp, tablePtr.p);
483 return tuxReadAttrsCommon(req_struct,
484 attrIds,
485 numAttrs,
486 dataOut,
487 xfrmFlag,
488 tupVersion);
489 }
490
491 // ordered index build
492
493 //#define TIME_MEASUREMENT
494 #ifdef TIME_MEASUREMENT
495 static Uint32 time_events;
496 Uint64 tot_time_passed;
497 Uint32 number_events;
498 #endif
499 void
execBUILD_INDX_IMPL_REQ(Signal * signal)500 Dbtup::execBUILD_INDX_IMPL_REQ(Signal* signal)
501 {
502 jamEntry();
503 #ifdef TIME_MEASUREMENT
504 time_events= 0;
505 tot_time_passed= 0;
506 number_events= 1;
507 #endif
508 const BuildIndxImplReq* const req =
509 (const BuildIndxImplReq*)signal->getDataPtr();
510 // get new operation
511 BuildIndexPtr buildPtr;
512 if (ERROR_INSERTED(4031) || ! c_buildIndexList.seizeFirst(buildPtr)) {
513 jam();
514 BuildIndexRec buildRec;
515 buildRec.m_request = *req;
516 buildRec.m_errorCode = BuildIndxImplRef::Busy;
517 if (ERROR_INSERTED(4031))
518 {
519 CLEAR_ERROR_INSERT_VALUE;
520 }
521 buildIndexReply(signal, &buildRec);
522 return;
523 }
524 buildPtr.p->m_request = *req;
525 const BuildIndxImplReq* buildReq = &buildPtr.p->m_request;
526 // check
527 buildPtr.p->m_errorCode= BuildIndxImplRef::NoError;
528 buildPtr.p->m_outstanding = 0;
529 do {
530 if (buildReq->tableId >= cnoOfTablerec) {
531 jam();
532 buildPtr.p->m_errorCode= BuildIndxImplRef::InvalidPrimaryTable;
533 break;
534 }
535 TablerecPtr tablePtr;
536 tablePtr.i= buildReq->tableId;
537 ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
538 if (tablePtr.p->tableStatus != DEFINED) {
539 jam();
540 buildPtr.p->m_errorCode= BuildIndxImplRef::InvalidPrimaryTable;
541 break;
542 }
543 // memory page format
544 buildPtr.p->m_build_vs =
545 (tablePtr.p->m_attributes[MM].m_no_of_varsize +
546 tablePtr.p->m_attributes[MM].m_no_of_dynamic) > 0;
547 if (DictTabInfo::isOrderedIndex(buildReq->indexType)) {
548 jam();
549 const TupTriggerData_list& triggerList =
550 tablePtr.p->tuxCustomTriggers;
551
552 TriggerPtr triggerPtr;
553 triggerList.first(triggerPtr);
554 while (triggerPtr.i != RNIL) {
555 if (triggerPtr.p->indexId == buildReq->indexId) {
556 jam();
557 break;
558 }
559 triggerList.next(triggerPtr);
560 }
561 if (triggerPtr.i == RNIL) {
562 jam();
563 // trigger was not created
564 ndbassert(false);
565 buildPtr.p->m_errorCode = BuildIndxImplRef::InternalError;
566 break;
567 }
568 buildPtr.p->m_indexId = buildReq->indexId;
569 buildPtr.p->m_buildRef = DBTUX;
570 AlterIndxImplReq* req = (AlterIndxImplReq*)signal->getDataPtrSend();
571 req->indexId = buildReq->indexId;
572 req->senderRef = 0;
573 req->requestType = AlterIndxImplReq::AlterIndexBuilding;
574 EXECUTE_DIRECT(DBTUX, GSN_ALTER_INDX_IMPL_REQ, signal,
575 AlterIndxImplReq::SignalLength);
576 } else if(buildReq->indexId == RNIL) {
577 jam();
578 // REBUILD of acc
579 buildPtr.p->m_indexId = RNIL;
580 buildPtr.p->m_buildRef = DBACC;
581 } else {
582 jam();
583 buildPtr.p->m_errorCode = BuildIndxImplRef::InvalidIndexType;
584 break;
585 }
586
587 // set to first tuple position
588 const Uint32 firstTupleNo = 0;
589 buildPtr.p->m_fragNo= 0;
590 buildPtr.p->m_pageId= 0;
591 buildPtr.p->m_tupleNo= firstTupleNo;
592 // start build
593
594 bool offline = !!(buildReq->requestType&BuildIndxImplReq::RF_BUILD_OFFLINE);
595 if (offline && m_max_parallel_index_build > 1)
596 {
597 jam();
598 buildIndexOffline(signal, buildPtr.i);
599 }
600 else
601 {
602 jam();
603 buildIndex(signal, buildPtr.i);
604 }
605 return;
606 } while (0);
607 // check failed
608 buildIndexReply(signal, buildPtr.p);
609 c_buildIndexList.release(buildPtr);
610 }
611
612 void
buildIndex(Signal * signal,Uint32 buildPtrI)613 Dbtup::buildIndex(Signal* signal, Uint32 buildPtrI)
614 {
615 // get build record
616 BuildIndexPtr buildPtr;
617 buildPtr.i= buildPtrI;
618 c_buildIndexList.getPtr(buildPtr);
619 const BuildIndxImplReq* buildReq= &buildPtr.p->m_request;
620 // get table
621 TablerecPtr tablePtr;
622 tablePtr.i= buildReq->tableId;
623 ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
624
625 const Uint32 firstTupleNo = 0;
626 const Uint32 tupheadsize = tablePtr.p->m_offsets[MM].m_fix_header_size;
627
628 #ifdef TIME_MEASUREMENT
629 NDB_TICKS start;
630 NDB_TICKS stop;
631 Uint64 time_passed;
632 #endif
633 do {
634 // get fragment
635 FragrecordPtr fragPtr;
636 if (buildPtr.p->m_fragNo == NDB_ARRAY_SIZE(tablePtr.p->fragrec)) {
637 jam();
638 // build ready
639 buildIndexReply(signal, buildPtr.p);
640 c_buildIndexList.release(buildPtr);
641 return;
642 }
643 ndbrequire(buildPtr.p->m_fragNo < NDB_ARRAY_SIZE(tablePtr.p->fragrec));
644 fragPtr.i= tablePtr.p->fragrec[buildPtr.p->m_fragNo];
645 if (fragPtr.i == RNIL) {
646 jam();
647 buildPtr.p->m_fragNo++;
648 buildPtr.p->m_pageId= 0;
649 buildPtr.p->m_tupleNo= firstTupleNo;
650 break;
651 }
652 ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
653 // get page
654 PagePtr pagePtr;
655 if (buildPtr.p->m_pageId >= fragPtr.p->m_max_page_cnt)
656 {
657 jam();
658 buildPtr.p->m_fragNo++;
659 buildPtr.p->m_pageId= 0;
660 buildPtr.p->m_tupleNo= firstTupleNo;
661 break;
662 }
663 Uint32 realPageId= getRealpidCheck(fragPtr.p, buildPtr.p->m_pageId);
664 // skip empty page
665 if (realPageId == RNIL)
666 {
667 jam();
668 goto next_tuple;
669 }
670
671 c_page_pool.getPtr(pagePtr, realPageId);
672
673 next_tuple:
674 // get tuple
675 Uint32 pageIndex = ~0;
676 const Tuple_header* tuple_ptr = 0;
677 pageIndex = buildPtr.p->m_tupleNo * tupheadsize;
678 if (pageIndex + tupheadsize > Fix_page::DATA_WORDS) {
679 jam();
680 buildPtr.p->m_pageId++;
681 buildPtr.p->m_tupleNo= firstTupleNo;
682 break;
683 }
684
685 if (realPageId == RNIL)
686 {
687 jam();
688 buildPtr.p->m_tupleNo++;
689 break;
690 }
691
692 tuple_ptr = (Tuple_header*)&pagePtr.p->m_data[pageIndex];
693 // skip over free tuple
694 if (tuple_ptr->m_header_bits & Tuple_header::FREE) {
695 jam();
696 buildPtr.p->m_tupleNo++;
697 break;
698 }
699 Uint32 tupVersion= tuple_ptr->get_tuple_version();
700 OperationrecPtr pageOperPtr;
701 pageOperPtr.i= tuple_ptr->m_operation_ptr_i;
702 #ifdef TIME_MEASUREMENT
703 start = NdbTick_getCurrentTicks();
704 #endif
705 // add to index
706 TuxMaintReq* const req = (TuxMaintReq*)signal->getDataPtrSend();
707 req->errorCode = RNIL;
708 req->tableId = tablePtr.i;
709 req->indexId = buildPtr.p->m_indexId;
710 req->fragId = tablePtr.p->fragid[buildPtr.p->m_fragNo];
711 req->pageId = realPageId;
712 req->tupVersion = tupVersion;
713 req->opInfo = TuxMaintReq::OpAdd;
714 req->tupFragPtrI = fragPtr.i;
715 req->fragPageId = buildPtr.p->m_pageId;
716 req->pageIndex = pageIndex;
717
718 if (pageOperPtr.i == RNIL)
719 {
720 EXECUTE_DIRECT(buildPtr.p->m_buildRef, GSN_TUX_MAINT_REQ,
721 signal, TuxMaintReq::SignalLength+2);
722 }
723 else
724 {
725 /*
726 If there is an ongoing operation on the tuple then it is either a
727 copy tuple or an original tuple with an ongoing transaction. In
728 both cases realPageId and pageOffset refer to the original tuple.
729 The tuple address stored in TUX will always be the original tuple
730 but with the tuple version of the tuple we found.
731
732 This is necessary to avoid having to update TUX at abort of
733 update. If an update aborts then the copy tuple is copied to
734 the original tuple. The build will however have found that
735 tuple as a copy tuple. The original tuple is stable and is thus
736 preferrable to store in TUX.
737 */
738 jam();
739
740 /**
741 * Since copy tuples now can't be found on real pages.
742 * we will here build all copies of the tuple
743 *
744 * Note only "real" tupVersion's should be added
745 * i.e delete's shouldnt be added
746 * (unless it's the first op, when "original" should be added)
747 */
748
749 /*
750 * Start from first operation. This is only to make things more
751 * clear. It is not required by ordered index implementation.
752 */
753 ndbrequire(c_operation_pool.getValidPtr(pageOperPtr));
754 while (pageOperPtr.p->prevActiveOp != RNIL)
755 {
756 jam();
757 pageOperPtr.i = pageOperPtr.p->prevActiveOp;
758 ndbrequire(c_operation_pool.getValidPtr(pageOperPtr));
759 }
760 /*
761 * Do not use req->errorCode as global control.
762 */
763 bool ok = true;
764 /*
765 * If first operation is an update, add previous version.
766 * This version does not appear as the version of any operation.
767 * At commit this version is removed by executeTuxCommitTriggers.
768 * At abort it is preserved by executeTuxAbortTriggers.
769 */
770 if (pageOperPtr.p->op_type == ZUPDATE)
771 {
772 jam();
773 req->errorCode = RNIL;
774 req->tupVersion =
775 decr_tup_version(pageOperPtr.p->op_struct.bit_field.tupVersion);
776 EXECUTE_DIRECT(buildPtr.p->m_buildRef, GSN_TUX_MAINT_REQ,
777 signal, TuxMaintReq::SignalLength+2);
778 ok = (req->errorCode == 0);
779 }
780 /*
781 * Add versions from all operations.
782 *
783 * Each operation has a tuple version. For insert and update it
784 * is the newly created version. For delete it is the version
785 * deleted. The existence of operation tuple version implies that
786 * a corresponding tuple version exists for TUX to read.
787 *
788 * We could be in the middle of a commit. The process here makes
789 * no assumptions about operation commit order. (It should be
790 * first to last but this is not the place to assert it).
791 *
792 * Duplicate versions are possible e.g. a delete in the middle
793 * may have same version as the previous operation. TUX ignores
794 * duplicate version errors during index build.
795 */
796 while (pageOperPtr.i != RNIL && ok)
797 {
798 jam();
799 ndbrequire(c_operation_pool.getValidPtr(pageOperPtr));
800 req->errorCode = RNIL;
801 req->tupVersion = pageOperPtr.p->op_struct.bit_field.tupVersion;
802 EXECUTE_DIRECT(buildPtr.p->m_buildRef, GSN_TUX_MAINT_REQ,
803 signal, TuxMaintReq::SignalLength+2);
804 pageOperPtr.i = pageOperPtr.p->nextActiveOp;
805 ok = (req->errorCode == 0);
806 }
807 }
808
809 jamEntry();
810 if (req->errorCode != 0) {
811 switch (req->errorCode) {
812 case TuxMaintReq::NoMemError:
813 case TuxMaintReq::NoTransMemError:
814 jam();
815 buildPtr.p->m_errorCode= BuildIndxImplRef::AllocationFailure;
816 break;
817 default:
818 ndbabort();
819 }
820 buildIndexReply(signal, buildPtr.p);
821 c_buildIndexList.release(buildPtr);
822 return;
823 }
824 #ifdef TIME_MEASUREMENT
825 stop = NdbTick_getCurrentTicks();
826 time_passed= NdbTick_Elapsed(start, stop).microSec();
827 if (time_passed < 1000) {
828 time_events++;
829 tot_time_passed += time_passed;
830 if (time_events == number_events) {
831 Uint64 mean_time_passed= tot_time_passed /
832 (Uint64)number_events;
833 ndbout << "Number of events= " << number_events;
834 ndbout << " Mean time passed= " << mean_time_passed << endl;
835 number_events <<= 1;
836 tot_time_passed= 0;
837 time_events= 0;
838 }
839 }
840 #endif
841 // next tuple
842 buildPtr.p->m_tupleNo++;
843 break;
844 } while (0);
845 signal->theData[0]= ZBUILD_INDEX;
846 signal->theData[1]= buildPtr.i;
847 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
848 }
849
850 Uint32 Dbtux_mt_buildIndexFragment_wrapper_C(void*);
851
852 void
buildIndexOffline(Signal * signal,Uint32 buildPtrI)853 Dbtup::buildIndexOffline(Signal* signal, Uint32 buildPtrI)
854 {
855 jam();
856 /**
857 * We need to make table read-only...as mtoib does not work otherwise
858 */
859 BuildIndexPtr buildPtr;
860 buildPtr.i= buildPtrI;
861 c_buildIndexList.getPtr(buildPtr);
862 const BuildIndxImplReq* buildReq =
863 (const BuildIndxImplReq*)&buildPtr.p->m_request;
864
865 AlterTabReq* req = (AlterTabReq*)signal->getDataPtrSend();
866 /**
867 * Note: before 7.3.4, 7.2.15, 7.1.30 fifth word and
868 * up was undefined.
869 */
870 bzero(req, sizeof(*req));
871 req->senderRef = reference();
872 req->senderData = buildPtrI;
873 req->tableId = buildReq->tableId;
874 req->requestType = AlterTabReq::AlterTableReadOnly;
875 sendSignal(calcInstanceBlockRef(DBLQH), GSN_ALTER_TAB_REQ, signal,
876 AlterTabReq::SignalLength, JBB);
877 }
878
879 void
execALTER_TAB_CONF(Signal * signal)880 Dbtup::execALTER_TAB_CONF(Signal* signal)
881 {
882 jamEntry();
883 AlterTabConf* conf = (AlterTabConf*)signal->getDataPtr();
884
885 BuildIndexPtr buildPtr;
886 buildPtr.i = conf->senderData;
887 c_buildIndexList.getPtr(buildPtr);
888
889
890 if (buildPtr.p->m_fragNo == 0)
891 {
892 jam();
893 buildIndexOffline_table_readonly(signal, conf->senderData);
894 return;
895 }
896 else
897 {
898 jam();
899 TablerecPtr tablePtr;
900 (void)tablePtr; // hide unused warning
901 ndbrequire(buildPtr.p->m_fragNo >= NDB_ARRAY_SIZE(tablePtr.p->fragid));
902 buildIndexReply(signal, buildPtr.p);
903 c_buildIndexList.release(buildPtr);
904 return;
905 }
906 }
907
908 void
buildIndexOffline_table_readonly(Signal * signal,Uint32 buildPtrI)909 Dbtup::buildIndexOffline_table_readonly(Signal* signal, Uint32 buildPtrI)
910 {
911 // get build record
912 BuildIndexPtr buildPtr;
913 buildPtr.i= buildPtrI;
914 c_buildIndexList.getPtr(buildPtr);
915 const BuildIndxImplReq* buildReq =
916 (const BuildIndxImplReq*)&buildPtr.p->m_request;
917 // get table
918 TablerecPtr tablePtr;
919 tablePtr.i= buildReq->tableId;
920 ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
921
922 for (;buildPtr.p->m_fragNo < NDB_ARRAY_SIZE(tablePtr.p->fragrec);
923 buildPtr.p->m_fragNo++)
924 {
925 jam();
926 FragrecordPtr fragPtr;
927 fragPtr.i = tablePtr.p->fragrec[buildPtr.p->m_fragNo];
928 if (fragPtr.i == RNIL)
929 {
930 jam();
931 continue;
932 }
933 ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
934 mt_BuildIndxReq req;
935 bzero(&req, sizeof(req));
936 req.senderRef = reference();
937 req.senderData = buildPtr.i;
938 req.tableId = buildReq->tableId;
939 req.indexId = buildPtr.p->m_indexId;
940 req.fragId = tablePtr.p->fragid[buildPtr.p->m_fragNo];
941
942 SimulatedBlock * tux = globalData.getBlock(DBTUX);
943 if (instance() != 0)
944 {
945 tux = tux->getInstance(instance());
946 ndbrequire(tux != 0);
947 }
948 req.tux_ptr = tux;
949 req.tup_ptr = this;
950 req.func_ptr = Dbtux_mt_buildIndexFragment_wrapper_C;
951 req.buffer_size = 32*32768; // thread-local-buffer
952
953 Uint32 * req_ptr = signal->getDataPtrSend();
954 memcpy(req_ptr, &req, sizeof(req));
955
956 sendSignal(NDBFS_REF, GSN_BUILD_INDX_IMPL_REQ, signal,
957 (sizeof(req) + 15) / 4, JBB);
958
959 buildPtr.p->m_outstanding++;
960 if (buildPtr.p->m_outstanding >= m_max_parallel_index_build)
961 {
962 jam();
963 return;
964 }
965 }
966
967 if (buildPtr.p->m_outstanding == 0)
968 {
969 jam();
970 AlterTabReq* req = (AlterTabReq*)signal->getDataPtrSend();
971 /**
972 * Note: before 7.3.4, 7.2.15, 7.1.30 fifth word and
973 * up was undefined.
974 */
975 bzero(req, sizeof(*req));
976 req->senderRef = reference();
977 req->senderData = buildPtrI;
978 req->tableId = buildReq->tableId;
979 req->requestType = AlterTabReq::AlterTableReadWrite;
980 sendSignal(calcInstanceBlockRef(DBLQH), GSN_ALTER_TAB_REQ, signal,
981 AlterTabReq::SignalLength, JBB);
982 return;
983 }
984 else
985 {
986 jam();
987 // wait for replies
988 return;
989 }
990 }
991
992 int
mt_scan_init(Uint32 tableId,Uint32 fragId,Local_key * pos,Uint32 * fragPtrI)993 Dbtup::mt_scan_init(Uint32 tableId, Uint32 fragId,
994 Local_key* pos, Uint32 * fragPtrI)
995 {
996 TablerecPtr tablePtr;
997 tablePtr.i = tableId;
998 ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
999
1000 FragrecordPtr fragPtr;
1001 fragPtr.i = RNIL;
1002 for (Uint32 i = 0; i<NDB_ARRAY_SIZE(tablePtr.p->fragid); i++)
1003 {
1004 if (tablePtr.p->fragid[i] == fragId)
1005 {
1006 fragPtr.i = tablePtr.p->fragrec[i];
1007 break;
1008 }
1009 }
1010
1011 if (fragPtr.i == RNIL)
1012 return -1;
1013
1014 ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
1015
1016 Uint32 fragPageId = 0;
1017 while (fragPageId < fragPtr.p->m_max_page_cnt)
1018 {
1019 Uint32 realPageId= getRealpidCheck(fragPtr.p, fragPageId);
1020 if (realPageId != RNIL)
1021 {
1022 * fragPtrI = fragPtr.i;
1023 pos->m_page_no = realPageId;
1024 pos->m_page_idx = 0;
1025 pos->m_file_no = 0;
1026 return 0;
1027 }
1028 fragPageId++;
1029 }
1030
1031 return 1;
1032 }
1033
1034 int
mt_scan_next(Uint32 tableId,Uint32 fragPtrI,Local_key * pos,bool moveNext)1035 Dbtup::mt_scan_next(Uint32 tableId, Uint32 fragPtrI,
1036 Local_key* pos, bool moveNext)
1037 {
1038 TablerecPtr tablePtr;
1039 tablePtr.i = tableId;
1040 ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
1041
1042 FragrecordPtr fragPtr;
1043 fragPtr.i = fragPtrI;
1044 ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
1045
1046 Uint32 tupheadsize = tablePtr.p->m_offsets[MM].m_fix_header_size;
1047 if (moveNext)
1048 {
1049 pos->m_page_idx += tupheadsize;
1050 }
1051
1052 PagePtr pagePtr;
1053 c_page_pool.getPtr(pagePtr, pos->m_page_no);
1054
1055 while (1)
1056 {
1057 Tuple_header* tuple_ptr;
1058 while (pos->m_page_idx + tupheadsize <= Fix_page::DATA_WORDS)
1059 {
1060 tuple_ptr = (Tuple_header*)(pagePtr.p->m_data + pos->m_page_idx);
1061 // skip over free tuple
1062 if (tuple_ptr->m_header_bits & Tuple_header::FREE)
1063 {
1064 pos->m_page_idx += tupheadsize;
1065 continue;
1066 }
1067 pos->m_file_no = tuple_ptr->get_tuple_version();
1068 return 0; // Found
1069 }
1070
1071 // End of page...move to next
1072 Uint32 fragPageId = pagePtr.p->frag_page_id + 1;
1073 while (fragPageId < fragPtr.p->m_max_page_cnt)
1074 {
1075 Uint32 realPageId = getRealpidCheck(fragPtr.p, fragPageId);
1076 if (realPageId != RNIL)
1077 {
1078 pos->m_page_no = realPageId;
1079 break;
1080 }
1081 fragPageId++;
1082 }
1083
1084 if (fragPageId == fragPtr.p->m_max_page_cnt)
1085 break;
1086
1087 pos->m_page_idx = 0;
1088 c_page_pool.getPtr(pagePtr, pos->m_page_no);
1089 }
1090
1091 return 1;
1092 }
1093
1094 void
execBUILD_INDX_IMPL_REF(Signal * signal)1095 Dbtup::execBUILD_INDX_IMPL_REF(Signal* signal)
1096 {
1097 jamEntry();
1098 BuildIndxImplRef* ref = (BuildIndxImplRef*)signal->getDataPtrSend();
1099 Uint32 ptr = ref->senderData;
1100 Uint32 err = ref->errorCode;
1101
1102 BuildIndexPtr buildPtr;
1103 c_buildIndexList.getPtr(buildPtr, ptr);
1104 ndbrequire(buildPtr.p->m_outstanding);
1105 buildPtr.p->m_outstanding--;
1106
1107 TablerecPtr tablePtr;
1108 (void)tablePtr; // hide unused warning
1109 buildPtr.p->m_errorCode = (BuildIndxImplRef::ErrorCode)err;
1110 // No point in starting any more
1111 buildPtr.p->m_fragNo = NDB_ARRAY_SIZE(tablePtr.p->fragrec);
1112 buildIndexOffline_table_readonly(signal, ptr);
1113 }
1114
1115 void
execBUILD_INDX_IMPL_CONF(Signal * signal)1116 Dbtup::execBUILD_INDX_IMPL_CONF(Signal* signal)
1117 {
1118 jamEntry();
1119 BuildIndxImplConf* conf = (BuildIndxImplConf*)signal->getDataPtrSend();
1120 Uint32 ptr = conf->senderData;
1121
1122 BuildIndexPtr buildPtr;
1123 c_buildIndexList.getPtr(buildPtr, ptr);
1124 ndbrequire(buildPtr.p->m_outstanding);
1125 buildPtr.p->m_outstanding--;
1126 buildPtr.p->m_fragNo++;
1127
1128 buildIndexOffline_table_readonly(signal, ptr);
1129 }
1130
1131 void
buildIndexReply(Signal * signal,const BuildIndexRec * buildPtrP)1132 Dbtup::buildIndexReply(Signal* signal, const BuildIndexRec* buildPtrP)
1133 {
1134 const BuildIndxImplReq* buildReq = &buildPtrP->m_request;
1135
1136 AlterIndxImplReq* req = (AlterIndxImplReq*)signal->getDataPtrSend();
1137 req->indexId = buildReq->indexId;
1138 req->senderRef = 0; //
1139 if (buildPtrP->m_errorCode == BuildIndxImplRef::NoError)
1140 {
1141 jam();
1142 req->requestType = AlterIndxImplReq::AlterIndexOnline;
1143 }
1144 else
1145 {
1146 jam();
1147 req->requestType = AlterIndxImplReq::AlterIndexOffline;
1148 }
1149 EXECUTE_DIRECT(DBTUX, GSN_ALTER_INDX_IMPL_REQ, signal,
1150 AlterIndxImplReq::SignalLength);
1151
1152 if (buildPtrP->m_errorCode == BuildIndxImplRef::NoError) {
1153 jam();
1154 BuildIndxImplConf* conf =
1155 (BuildIndxImplConf*)signal->getDataPtrSend();
1156 conf->senderRef = reference();
1157 conf->senderData = buildReq->senderData;
1158
1159 sendSignal(buildReq->senderRef, GSN_BUILD_INDX_IMPL_CONF,
1160 signal, BuildIndxImplConf::SignalLength, JBB);
1161 } else {
1162 jam();
1163 BuildIndxImplRef* ref =
1164 (BuildIndxImplRef*)signal->getDataPtrSend();
1165 ref->senderRef = reference();
1166 ref->senderData = buildReq->senderData;
1167 ref->errorCode = buildPtrP->m_errorCode;
1168
1169 sendSignal(buildReq->senderRef, GSN_BUILD_INDX_IMPL_REF,
1170 signal, BuildIndxImplRef::SignalLength, JBB);
1171 }
1172 }
1173