1 /*
2    Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
23 */
24 
25 #define DBTUP_C
26 #define DBTUP_INDEX_CPP
27 #include <dblqh/Dblqh.hpp>
28 #include "Dbtup.hpp"
29 #include <RefConvert.hpp>
30 #include <ndb_limits.h>
31 #include <pc.hpp>
32 #include <AttributeDescriptor.hpp>
33 #include "AttributeOffset.hpp"
34 #include <AttributeHeader.hpp>
35 #include <signaldata/TuxMaint.hpp>
36 #include <signaldata/AlterIndxImpl.hpp>
37 
38 #define JAM_FILE_ID 418
39 
40 
41 // methods used by ordered index
42 
43 void
tuxGetTupAddr(Uint32 fragPtrI,Uint32 pageId,Uint32 pageIndex,Uint32 & lkey1,Uint32 & lkey2)44 Dbtup::tuxGetTupAddr(Uint32 fragPtrI,
45                      Uint32 pageId,
46                      Uint32 pageIndex,
47                      Uint32& lkey1,
48                      Uint32& lkey2)
49 {
50   jamEntryDebug();
51   PagePtr pagePtr;
52   c_page_pool.getPtr(pagePtr, pageId);
53   lkey1 = pagePtr.p->frag_page_id;
54   lkey2 = pageIndex;
55 }
56 
57 /**
58  * Can be called from MT-build of ordered indexes.
59  */
60 int
tuxAllocNode(EmulatedJamBuffer * jamBuf,Uint32 * fragPtrP_input,Uint32 * tablePtrP_input,Uint32 & pageId,Uint32 & pageOffset,Uint32 * & node)61 Dbtup::tuxAllocNode(EmulatedJamBuffer * jamBuf,
62                     Uint32 *fragPtrP_input,
63                     Uint32 *tablePtrP_input,
64                     Uint32& pageId,
65                     Uint32& pageOffset,
66                     Uint32*& node)
67 {
68   thrjamEntry(jamBuf);
69   Tablerec* tablePtrP = (Tablerec*)tablePtrP_input;
70   Fragrecord* fragPtrP = (Fragrecord*)fragPtrP_input;
71 
72   Local_key key;
73   Uint32* ptr, frag_page_id, err;
74   c_allow_alloc_spare_page=true;
75   if ((ptr = alloc_fix_rec(jamBuf,
76                            &err,
77                            fragPtrP,
78                            tablePtrP,
79                            &key,
80                            &frag_page_id)) == 0)
81   {
82     c_allow_alloc_spare_page=false;
83     thrjam(jamBuf);
84     return err;
85   }
86   c_allow_alloc_spare_page=false;
87   pageId= key.m_page_no;
88   pageOffset= key.m_page_idx;
89   Uint32 attrDescIndex= tablePtrP->tabDescriptor + (0 << ZAD_LOG_SIZE);
90   Uint32 attrDataOffset= AttributeOffset::getOffset(
91                               tableDescriptor[attrDescIndex + 1].tabDescr);
92   node= ptr + attrDataOffset;
93   return 0;
94 }
95 
96 void
tuxFreeNode(Uint32 * fragPtrP_input,Uint32 * tablePtrP_input,Uint32 pageId,Uint32 pageOffset,Uint32 * node)97 Dbtup::tuxFreeNode(Uint32* fragPtrP_input,
98                    Uint32* tablePtrP_input,
99                    Uint32 pageId,
100                    Uint32 pageOffset,
101                    Uint32* node)
102 {
103   jamEntry();
104   Tablerec* tablePtrP = (Tablerec*)tablePtrP_input;
105   Fragrecord* fragPtrP = (Fragrecord*)fragPtrP_input;
106 
107   Local_key key;
108   key.m_page_no = pageId;
109   key.m_page_idx = pageOffset;
110   PagePtr pagePtr;
111   Tuple_header* ptr = (Tuple_header*)get_ptr(&pagePtr, &key, tablePtrP);
112 
113   Uint32 attrDescIndex= tablePtrP->tabDescriptor + (0 << ZAD_LOG_SIZE);
114   Uint32 attrDataOffset= AttributeOffset::getOffset(tableDescriptor[attrDescIndex + 1].tabDescr);
115   ndbrequire(node == (Uint32*)ptr + attrDataOffset);
116 
117   free_fix_rec(fragPtrP, tablePtrP, &key, (Fix_page*)pagePtr.p);
118 }
119 
120 int
tuxReadAttrsCurr(EmulatedJamBuffer * jamBuf,const Uint32 * attrIds,Uint32 numAttrs,Uint32 * dataOut,bool xfrmFlag,Uint32 tupVersion)121 Dbtup::tuxReadAttrsCurr(EmulatedJamBuffer *jamBuf,
122                         const Uint32* attrIds,
123                         Uint32 numAttrs,
124                         Uint32* dataOut,
125                         bool xfrmFlag,
126                         Uint32 tupVersion)
127 {
128   thrjamEntryDebug(jamBuf);
129   // use own variables instead of globals
130   Fragrecord *fragPtrP = prepare_fragptr.p;
131   Tablerec *tablePtrP = prepare_tabptr.p;
132 
133   // search for tuple version if not original
134   Operationrec tmpOp;
135   KeyReqStruct req_struct(jamBuf);
136   req_struct.tablePtrP = tablePtrP;
137   req_struct.fragPtrP = fragPtrP;
138 
139   tmpOp.op_type = ZREAD; // valgrind
140   setup_fixed_tuple_ref_opt(&req_struct);
141   setup_fixed_part(&req_struct, &tmpOp, tablePtrP);
142 
143   return tuxReadAttrsCommon(req_struct,
144                             attrIds,
145                             numAttrs,
146                             dataOut,
147                             xfrmFlag,
148                             tupVersion);
149 }
150 
151 /**
152  * This method can be called from MT-build of
153  * ordered indexes.
154  */
155 int
tuxReadAttrsOpt(EmulatedJamBuffer * jamBuf,Uint32 * fragPtrP,Uint32 * tablePtrP,Uint32 pageId,Uint32 pageIndex,Uint32 tupVersion,const Uint32 * attrIds,Uint32 numAttrs,Uint32 * dataOut,bool xfrmFlag)156 Dbtup::tuxReadAttrsOpt(EmulatedJamBuffer * jamBuf,
157                        Uint32* fragPtrP,
158                        Uint32* tablePtrP,
159                        Uint32 pageId,
160                        Uint32 pageIndex,
161                        Uint32 tupVersion,
162                        const Uint32* attrIds,
163                        Uint32 numAttrs,
164                        Uint32* dataOut,
165                        bool xfrmFlag)
166 {
167   thrjamEntryDebug(jamBuf);
168   // search for tuple version if not original
169 
170   Operationrec tmpOp;
171   KeyReqStruct req_struct(jamBuf);
172   req_struct.tablePtrP = (Tablerec*)tablePtrP;
173   req_struct.fragPtrP = (Fragrecord*)fragPtrP;
174 
175   tmpOp.m_tuple_location.m_page_no= pageId;
176   tmpOp.m_tuple_location.m_page_idx= pageIndex;
177   tmpOp.op_type = ZREAD; // valgrind
178   setup_fixed_tuple_ref(&req_struct,
179                         &tmpOp,
180                         (Tablerec*)tablePtrP);
181   setup_fixed_part(&req_struct,
182                    &tmpOp,
183                    (Tablerec*)tablePtrP);
184   return tuxReadAttrsCommon(req_struct,
185                             attrIds,
186                             numAttrs,
187                             dataOut,
188                             xfrmFlag,
189                             tupVersion);
190 }
191 
192 int
tuxReadAttrsCommon(KeyReqStruct & req_struct,const Uint32 * attrIds,Uint32 numAttrs,Uint32 * dataOut,bool xfrmFlag,Uint32 tupVersion)193 Dbtup::tuxReadAttrsCommon(KeyReqStruct &req_struct,
194                           const Uint32* attrIds,
195                           Uint32 numAttrs,
196                           Uint32* dataOut,
197                           bool xfrmFlag,
198                           Uint32 tupVersion)
199 {
200   Tuple_header *tuple_ptr = req_struct.m_tuple_ptr;
201   if (tuple_ptr->get_tuple_version() != tupVersion)
202   {
203     thrjamDebug(req_struct.jamBuffer);
204     OperationrecPtr opPtr;
205     opPtr.i= tuple_ptr->m_operation_ptr_i;
206     Uint32 loopGuard= 0;
207     while (opPtr.i != RNIL) {
208       ndbrequire(c_operation_pool.getValidPtr(opPtr));
209       if (opPtr.p->op_struct.bit_field.tupVersion == tupVersion) {
210         thrjamDebug(req_struct.jamBuffer);
211 	if (!opPtr.p->m_copy_tuple_location.isNull()) {
212 	  req_struct.m_tuple_ptr=
213             get_copy_tuple(&opPtr.p->m_copy_tuple_location);
214         }
215 	break;
216       }
217       thrjamDebug(req_struct.jamBuffer);
218       opPtr.i= opPtr.p->prevActiveOp;
219       ndbrequire(++loopGuard < (1 << ZTUP_VERSION_BITS));
220     }
221   }
222   // read key attributes from found tuple version
223   // save globals
224   prepare_read(&req_struct, req_struct.tablePtrP, false);
225 
226   // do it
227   int ret = readAttributes(&req_struct,
228                            attrIds,
229                            numAttrs,
230                            dataOut,
231                            ZNIL,
232                            xfrmFlag);
233   // done
234   return ret;
235 }
236 
237 int
tuxReadPk(Uint32 * fragPtrP_input,Uint32 * tablePtrP_input,Uint32 pageId,Uint32 pageIndex,Uint32 * dataOut,bool xfrmFlag)238 Dbtup::tuxReadPk(Uint32* fragPtrP_input,
239                  Uint32* tablePtrP_input,
240                  Uint32 pageId,
241                  Uint32 pageIndex,
242                  Uint32* dataOut,
243                  bool xfrmFlag)
244 {
245   jamEntryDebug();
246   Fragrecord* fragPtrP = (Fragrecord*)fragPtrP_input;
247   Tablerec* tablePtrP = (Tablerec*)tablePtrP_input;
248 
249   Operationrec tmpOp;
250   tmpOp.m_tuple_location.m_page_no= pageId;
251   tmpOp.m_tuple_location.m_page_idx= pageIndex;
252 
253   KeyReqStruct req_struct(this);
254   req_struct.tablePtrP = tablePtrP;
255   req_struct.fragPtrP = fragPtrP;
256 
257   PagePtr page_ptr;
258   Uint32* ptr= get_ptr(&page_ptr, &tmpOp.m_tuple_location, tablePtrP);
259   req_struct.m_page_ptr = page_ptr;
260   req_struct.m_tuple_ptr = (Tuple_header*)ptr;
261 
262   int ret = 0;
263   if (likely(! (req_struct.m_tuple_ptr->m_header_bits & Tuple_header::FREE)))
264   {
265     req_struct.check_offset[MM]= tablePtrP->get_check_offset(MM);
266     req_struct.check_offset[DD]= tablePtrP->get_check_offset(DD);
267 
268     Uint32 num_attr= tablePtrP->m_no_of_attributes;
269     Uint32 descr_start= tablePtrP->tabDescriptor;
270     TableDescriptor *tab_descr= &tableDescriptor[descr_start];
271     ndbrequire(descr_start + (num_attr << ZAD_LOG_SIZE) <= cnoOfTabDescrRec);
272     req_struct.attr_descr= tab_descr;
273 
274     if (unlikely(req_struct.m_tuple_ptr->m_header_bits & Tuple_header::ALLOC))
275     {
276       OperationrecPtr opPtr;
277       opPtr.i = req_struct.m_tuple_ptr->m_operation_ptr_i;
278       ndbrequire(c_operation_pool.getValidPtr(opPtr));
279       ndbassert(!opPtr.p->m_copy_tuple_location.isNull());
280       req_struct.m_tuple_ptr=
281 	get_copy_tuple(&opPtr.p->m_copy_tuple_location);
282     }
283     prepare_read(&req_struct, tablePtrP, false);
284 
285     const Uint32* attrIds= &tableDescriptor[tablePtrP->readKeyArray].tabDescr;
286     const Uint32 numAttrs= tablePtrP->noOfKeyAttr;
287     // read pk attributes from original tuple
288 
289     // do it
290     ret = readAttributes(&req_struct,
291 			 attrIds,
292 			 numAttrs,
293 			 dataOut,
294 			 ZNIL,
295 			 xfrmFlag);
296     // done
297     if (ret >= 0) {
298       // remove headers
299       Uint32 n= 0;
300       Uint32 i= 0;
301       while (n < numAttrs) {
302 	const AttributeHeader ah(dataOut[i]);
303 	Uint32 size= ah.getDataSize();
304 	ndbrequire(size != 0);
305 	for (Uint32 j= 0; j < size; j++) {
306 	  dataOut[i + j - n]= dataOut[i + j + 1];
307 	}
308 	n+= 1;
309 	i+= 1 + size;
310       }
311       ndbrequire((int)i == ret);
312       ret -= numAttrs;
313     }
314     else
315     {
316       jam();
317       return ret;
318     }
319   }
320   else
321   {
322     jam();
323   }
324   if (likely(tablePtrP->m_bits & Tablerec::TR_RowGCI))
325   {
326     dataOut[ret] = *req_struct.m_tuple_ptr->get_mm_gci(tablePtrP);
327   }
328   else
329   {
330     dataOut[ret] = 0;
331   }
332   return ret;
333 }
334 
335 int
accReadPk(Uint32 tableId,Uint32 fragId,Uint32 fragPageId,Uint32 pageIndex,Uint32 * dataOut,bool xfrmFlag)336 Dbtup::accReadPk(Uint32 tableId, Uint32 fragId, Uint32 fragPageId, Uint32 pageIndex, Uint32* dataOut, bool xfrmFlag)
337 {
338   jamEntryDebug();
339   // get table
340   TablerecPtr tablePtr;
341   tablePtr.i = tableId;
342   ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
343   // get fragment
344   FragrecordPtr fragPtr;
345   getFragmentrec(fragPtr, fragId, tablePtr.p);
346   // get real page id and tuple offset
347 
348   Uint32 pageId = getRealpid(fragPtr.p, fragPageId);
349   // use TUX routine - optimize later
350   int ret = tuxReadPk((Uint32*)fragPtr.p,
351                       (Uint32*)tablePtr.p,
352                       pageId,
353                       pageIndex,
354                       dataOut,
355                       xfrmFlag);
356   return ret;
357 }
358 
359 /*
360  * TUX index contains all tuple versions.  A scan in TUX has scanned
361  * one of them and asks if it can be returned as scan result.  This
362  * depends on trans id, dirty read flag, and savepoint within trans.
363  *
364  * Previously this faked a ZREAD operation and used getPage().
365  * In TUP getPage() is run after ACC locking, but TUX comes here
366  * before ACC access.  Instead of modifying getPage() it is more
367  * clear to do the full check here.
368  */
369 bool
tuxQueryTh(Uint32 opPtrI,Uint32 tupVersion,Uint32 transId1,Uint32 transId2,bool dirty,Uint32 savepointId)370 Dbtup::tuxQueryTh(Uint32 opPtrI,
371                   Uint32 tupVersion,
372                   Uint32 transId1,
373                   Uint32 transId2,
374                   bool dirty,
375                   Uint32 savepointId)
376 {
377   jamEntryDebug();
378 
379   OperationrecPtr currOpPtr;
380   currOpPtr.i = opPtrI;
381   ndbrequire(c_operation_pool.getValidPtr(currOpPtr));
382 
383   const bool sameTrans =
384     c_lqh->is_same_trans(currOpPtr.p->userpointer, transId1, transId2);
385 
386   bool res = false;
387   OperationrecPtr loopOpPtr = currOpPtr;
388 
389   if (!sameTrans)
390   {
391     jamDebug();
392     if (!dirty)
393     {
394       jamDebug();
395       if (currOpPtr.p->nextActiveOp == RNIL)
396       {
397         jamDebug();
398         // last op - TUX makes ACC lock request in same timeslice
399         res = true;
400       }
401     }
402     else
403     {
404       // loop to first op (returns false)
405       find_savepoint(loopOpPtr, 0);
406       const Uint32 op_type = loopOpPtr.p->op_type;
407 
408       if (op_type != ZINSERT)
409       {
410         jamDebug();
411         // read committed version
412         Tuple_header *tuple_ptr = (Tuple_header*)prepare_tuple_ptr;
413         const Uint32 origVersion = tuple_ptr->get_tuple_version();
414         if (origVersion == tupVersion)
415         {
416           jamDebug();
417           res = true;
418         }
419       }
420     }
421   }
422   else
423   {
424     jamDebug();
425     // for own trans, ignore dirty flag
426 
427     if (find_savepoint(loopOpPtr, savepointId))
428     {
429       jamDebug();
430       const Uint32 op_type = loopOpPtr.p->op_type;
431 
432       if (op_type != ZDELETE)
433       {
434         jamDebug();
435         // check if this op has produced the scanned version
436         Uint32 loopVersion = loopOpPtr.p->op_struct.bit_field.tupVersion;
437         if (loopVersion == tupVersion)
438         {
439           jamDebug();
440           res = true;
441         }
442       }
443     }
444   }
445   return res;
446 }
447 
448 /**
449  * This method is still used by index statistics and debug code.
450  */
451 int
tuxReadAttrs(EmulatedJamBuffer * jamBuf,Uint32 fragPtrI,Uint32 pageId,Uint32 pageIndex,Uint32 tupVersion,const Uint32 * attrIds,Uint32 numAttrs,Uint32 * dataOut,bool xfrmFlag)452 Dbtup::tuxReadAttrs(EmulatedJamBuffer * jamBuf,
453                     Uint32 fragPtrI,
454                     Uint32 pageId,
455                     Uint32 pageIndex,
456                     Uint32 tupVersion,
457                     const Uint32* attrIds,
458                     Uint32 numAttrs,
459                     Uint32* dataOut,
460                     bool xfrmFlag)
461 {
462   thrjamEntryDebug(jamBuf);
463   // use own variables instead of globals
464   FragrecordPtr fragPtr;
465   fragPtr.i= fragPtrI;
466   ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
467   TablerecPtr tablePtr;
468   tablePtr.i= fragPtr.p->fragTableId;
469   ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
470 
471   // search for tuple version if not original
472 
473   Operationrec tmpOp;
474   KeyReqStruct req_struct(jamBuf);
475   req_struct.tablePtrP = tablePtr.p;
476   req_struct.fragPtrP = fragPtr.p;
477 
478   tmpOp.m_tuple_location.m_page_no= pageId;
479   tmpOp.m_tuple_location.m_page_idx= pageIndex;
480   tmpOp.op_type = ZREAD; // valgrind
481   setup_fixed_tuple_ref(&req_struct, &tmpOp, tablePtr.p);
482   setup_fixed_part(&req_struct, &tmpOp, tablePtr.p);
483   return tuxReadAttrsCommon(req_struct,
484                             attrIds,
485                             numAttrs,
486                             dataOut,
487                             xfrmFlag,
488                             tupVersion);
489 }
490 
491 // ordered index build
492 
493 //#define TIME_MEASUREMENT
494 #ifdef TIME_MEASUREMENT
495   static Uint32 time_events;
496   Uint64 tot_time_passed;
497   Uint32 number_events;
498 #endif
499 void
execBUILD_INDX_IMPL_REQ(Signal * signal)500 Dbtup::execBUILD_INDX_IMPL_REQ(Signal* signal)
501 {
502   jamEntry();
503 #ifdef TIME_MEASUREMENT
504   time_events= 0;
505   tot_time_passed= 0;
506   number_events= 1;
507 #endif
508   const BuildIndxImplReq* const req =
509     (const BuildIndxImplReq*)signal->getDataPtr();
510   // get new operation
511   BuildIndexPtr buildPtr;
512   if (ERROR_INSERTED(4031) || ! c_buildIndexList.seizeFirst(buildPtr)) {
513     jam();
514     BuildIndexRec buildRec;
515     buildRec.m_request = *req;
516     buildRec.m_errorCode = BuildIndxImplRef::Busy;
517     if (ERROR_INSERTED(4031))
518     {
519       CLEAR_ERROR_INSERT_VALUE;
520     }
521     buildIndexReply(signal, &buildRec);
522     return;
523   }
524   buildPtr.p->m_request = *req;
525   const BuildIndxImplReq* buildReq = &buildPtr.p->m_request;
526   // check
527   buildPtr.p->m_errorCode= BuildIndxImplRef::NoError;
528   buildPtr.p->m_outstanding = 0;
529   do {
530     if (buildReq->tableId >= cnoOfTablerec) {
531       jam();
532       buildPtr.p->m_errorCode= BuildIndxImplRef::InvalidPrimaryTable;
533       break;
534     }
535     TablerecPtr tablePtr;
536     tablePtr.i= buildReq->tableId;
537     ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
538     if (tablePtr.p->tableStatus != DEFINED) {
539       jam();
540       buildPtr.p->m_errorCode= BuildIndxImplRef::InvalidPrimaryTable;
541       break;
542     }
543     // memory page format
544     buildPtr.p->m_build_vs =
545       (tablePtr.p->m_attributes[MM].m_no_of_varsize +
546        tablePtr.p->m_attributes[MM].m_no_of_dynamic) > 0;
547     if (DictTabInfo::isOrderedIndex(buildReq->indexType)) {
548       jam();
549       const TupTriggerData_list& triggerList =
550 	tablePtr.p->tuxCustomTriggers;
551 
552       TriggerPtr triggerPtr;
553       triggerList.first(triggerPtr);
554       while (triggerPtr.i != RNIL) {
555 	if (triggerPtr.p->indexId == buildReq->indexId) {
556 	  jam();
557 	  break;
558 	}
559 	triggerList.next(triggerPtr);
560       }
561       if (triggerPtr.i == RNIL) {
562 	jam();
563 	// trigger was not created
564         ndbassert(false);
565 	buildPtr.p->m_errorCode = BuildIndxImplRef::InternalError;
566 	break;
567       }
568       buildPtr.p->m_indexId = buildReq->indexId;
569       buildPtr.p->m_buildRef = DBTUX;
570       AlterIndxImplReq* req = (AlterIndxImplReq*)signal->getDataPtrSend();
571       req->indexId = buildReq->indexId;
572       req->senderRef = 0;
573       req->requestType = AlterIndxImplReq::AlterIndexBuilding;
574       EXECUTE_DIRECT(DBTUX, GSN_ALTER_INDX_IMPL_REQ, signal,
575                      AlterIndxImplReq::SignalLength);
576     } else if(buildReq->indexId == RNIL) {
577       jam();
578       // REBUILD of acc
579       buildPtr.p->m_indexId = RNIL;
580       buildPtr.p->m_buildRef = DBACC;
581     } else {
582       jam();
583       buildPtr.p->m_errorCode = BuildIndxImplRef::InvalidIndexType;
584       break;
585     }
586 
587     // set to first tuple position
588     const Uint32 firstTupleNo = 0;
589     buildPtr.p->m_fragNo= 0;
590     buildPtr.p->m_pageId= 0;
591     buildPtr.p->m_tupleNo= firstTupleNo;
592     // start build
593 
594     bool offline = !!(buildReq->requestType&BuildIndxImplReq::RF_BUILD_OFFLINE);
595     if (offline && m_max_parallel_index_build > 1)
596     {
597       jam();
598       buildIndexOffline(signal, buildPtr.i);
599     }
600     else
601     {
602       jam();
603       buildIndex(signal, buildPtr.i);
604     }
605     return;
606   } while (0);
607   // check failed
608   buildIndexReply(signal, buildPtr.p);
609   c_buildIndexList.release(buildPtr);
610 }
611 
612 void
buildIndex(Signal * signal,Uint32 buildPtrI)613 Dbtup::buildIndex(Signal* signal, Uint32 buildPtrI)
614 {
615   // get build record
616   BuildIndexPtr buildPtr;
617   buildPtr.i= buildPtrI;
618   c_buildIndexList.getPtr(buildPtr);
619   const BuildIndxImplReq* buildReq= &buildPtr.p->m_request;
620   // get table
621   TablerecPtr tablePtr;
622   tablePtr.i= buildReq->tableId;
623   ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
624 
625   const Uint32 firstTupleNo = 0;
626   const Uint32 tupheadsize = tablePtr.p->m_offsets[MM].m_fix_header_size;
627 
628 #ifdef TIME_MEASUREMENT
629   NDB_TICKS start;
630   NDB_TICKS stop;
631   Uint64 time_passed;
632 #endif
633   do {
634     // get fragment
635     FragrecordPtr fragPtr;
636     if (buildPtr.p->m_fragNo == NDB_ARRAY_SIZE(tablePtr.p->fragrec)) {
637       jam();
638       // build ready
639       buildIndexReply(signal, buildPtr.p);
640       c_buildIndexList.release(buildPtr);
641       return;
642     }
643     ndbrequire(buildPtr.p->m_fragNo < NDB_ARRAY_SIZE(tablePtr.p->fragrec));
644     fragPtr.i= tablePtr.p->fragrec[buildPtr.p->m_fragNo];
645     if (fragPtr.i == RNIL) {
646       jam();
647       buildPtr.p->m_fragNo++;
648       buildPtr.p->m_pageId= 0;
649       buildPtr.p->m_tupleNo= firstTupleNo;
650       break;
651     }
652     ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
653     // get page
654     PagePtr pagePtr;
655     if (buildPtr.p->m_pageId >= fragPtr.p->m_max_page_cnt)
656     {
657       jam();
658       buildPtr.p->m_fragNo++;
659       buildPtr.p->m_pageId= 0;
660       buildPtr.p->m_tupleNo= firstTupleNo;
661       break;
662     }
663     Uint32 realPageId= getRealpidCheck(fragPtr.p, buildPtr.p->m_pageId);
664     // skip empty page
665     if (realPageId == RNIL)
666     {
667       jam();
668       goto next_tuple;
669     }
670 
671     c_page_pool.getPtr(pagePtr, realPageId);
672 
673 next_tuple:
674     // get tuple
675     Uint32 pageIndex = ~0;
676     const Tuple_header* tuple_ptr = 0;
677     pageIndex = buildPtr.p->m_tupleNo * tupheadsize;
678     if (pageIndex + tupheadsize > Fix_page::DATA_WORDS) {
679       jam();
680       buildPtr.p->m_pageId++;
681       buildPtr.p->m_tupleNo= firstTupleNo;
682       break;
683     }
684 
685     if (realPageId == RNIL)
686     {
687       jam();
688       buildPtr.p->m_tupleNo++;
689       break;
690     }
691 
692     tuple_ptr = (Tuple_header*)&pagePtr.p->m_data[pageIndex];
693     // skip over free tuple
694     if (tuple_ptr->m_header_bits & Tuple_header::FREE) {
695       jam();
696       buildPtr.p->m_tupleNo++;
697       break;
698     }
699     Uint32 tupVersion= tuple_ptr->get_tuple_version();
700     OperationrecPtr pageOperPtr;
701     pageOperPtr.i= tuple_ptr->m_operation_ptr_i;
702 #ifdef TIME_MEASUREMENT
703     start = NdbTick_getCurrentTicks();
704 #endif
705     // add to index
706     TuxMaintReq* const req = (TuxMaintReq*)signal->getDataPtrSend();
707     req->errorCode = RNIL;
708     req->tableId = tablePtr.i;
709     req->indexId = buildPtr.p->m_indexId;
710     req->fragId = tablePtr.p->fragid[buildPtr.p->m_fragNo];
711     req->pageId = realPageId;
712     req->tupVersion = tupVersion;
713     req->opInfo = TuxMaintReq::OpAdd;
714     req->tupFragPtrI = fragPtr.i;
715     req->fragPageId = buildPtr.p->m_pageId;
716     req->pageIndex = pageIndex;
717 
718     if (pageOperPtr.i == RNIL)
719     {
720       EXECUTE_DIRECT(buildPtr.p->m_buildRef, GSN_TUX_MAINT_REQ,
721 		     signal, TuxMaintReq::SignalLength+2);
722     }
723     else
724     {
725       /*
726       If there is an ongoing operation on the tuple then it is either a
727       copy tuple or an original tuple with an ongoing transaction. In
728       both cases realPageId and pageOffset refer to the original tuple.
729       The tuple address stored in TUX will always be the original tuple
730       but with the tuple version of the tuple we found.
731 
732       This is necessary to avoid having to update TUX at abort of
733       update. If an update aborts then the copy tuple is copied to
734       the original tuple. The build will however have found that
735       tuple as a copy tuple. The original tuple is stable and is thus
736       preferrable to store in TUX.
737       */
738       jam();
739 
740       /**
741        * Since copy tuples now can't be found on real pages.
742        *   we will here build all copies of the tuple
743        *
744        * Note only "real" tupVersion's should be added
745        *      i.e delete's shouldnt be added
746        *      (unless it's the first op, when "original" should be added)
747        */
748 
749       /*
750        * Start from first operation.  This is only to make things more
751        * clear.  It is not required by ordered index implementation.
752        */
753       ndbrequire(c_operation_pool.getValidPtr(pageOperPtr));
754       while (pageOperPtr.p->prevActiveOp != RNIL)
755       {
756         jam();
757         pageOperPtr.i = pageOperPtr.p->prevActiveOp;
758         ndbrequire(c_operation_pool.getValidPtr(pageOperPtr));
759       }
760       /*
761        * Do not use req->errorCode as global control.
762        */
763       bool ok = true;
764       /*
765        * If first operation is an update, add previous version.
766        * This version does not appear as the version of any operation.
767        * At commit this version is removed by executeTuxCommitTriggers.
768        * At abort it is preserved by executeTuxAbortTriggers.
769        */
770       if (pageOperPtr.p->op_type == ZUPDATE)
771       {
772         jam();
773         req->errorCode = RNIL;
774         req->tupVersion =
775           decr_tup_version(pageOperPtr.p->op_struct.bit_field.tupVersion);
776         EXECUTE_DIRECT(buildPtr.p->m_buildRef, GSN_TUX_MAINT_REQ,
777                        signal, TuxMaintReq::SignalLength+2);
778         ok = (req->errorCode == 0);
779       }
780       /*
781        * Add versions from all operations.
782        *
783        * Each operation has a tuple version.  For insert and update it
784        * is the newly created version.  For delete it is the version
785        * deleted.  The existence of operation tuple version implies that
786        * a corresponding tuple version exists for TUX to read.
787        *
788        * We could be in the middle of a commit.  The process here makes
789        * no assumptions about operation commit order.  (It should be
790        * first to last but this is not the place to assert it).
791        *
792        * Duplicate versions are possible e.g. a delete in the middle
793        * may have same version as the previous operation.  TUX ignores
794        * duplicate version errors during index build.
795        */
796       while (pageOperPtr.i != RNIL && ok)
797       {
798         jam();
799         ndbrequire(c_operation_pool.getValidPtr(pageOperPtr));
800         req->errorCode = RNIL;
801         req->tupVersion = pageOperPtr.p->op_struct.bit_field.tupVersion;
802         EXECUTE_DIRECT(buildPtr.p->m_buildRef, GSN_TUX_MAINT_REQ,
803                        signal, TuxMaintReq::SignalLength+2);
804         pageOperPtr.i = pageOperPtr.p->nextActiveOp;
805         ok = (req->errorCode == 0);
806       }
807     }
808 
809     jamEntry();
810     if (req->errorCode != 0) {
811       switch (req->errorCode) {
812       case TuxMaintReq::NoMemError:
813       case TuxMaintReq::NoTransMemError:
814         jam();
815         buildPtr.p->m_errorCode= BuildIndxImplRef::AllocationFailure;
816         break;
817       default:
818         ndbabort();
819       }
820       buildIndexReply(signal, buildPtr.p);
821       c_buildIndexList.release(buildPtr);
822       return;
823     }
824 #ifdef TIME_MEASUREMENT
825     stop = NdbTick_getCurrentTicks();
826     time_passed= NdbTick_Elapsed(start, stop).microSec();
827     if (time_passed < 1000) {
828       time_events++;
829       tot_time_passed += time_passed;
830       if (time_events == number_events) {
831         Uint64 mean_time_passed= tot_time_passed /
832                                      (Uint64)number_events;
833         ndbout << "Number of events= " << number_events;
834         ndbout << " Mean time passed= " << mean_time_passed << endl;
835         number_events <<= 1;
836         tot_time_passed= 0;
837         time_events= 0;
838       }
839     }
840 #endif
841     // next tuple
842     buildPtr.p->m_tupleNo++;
843     break;
844   } while (0);
845   signal->theData[0]= ZBUILD_INDEX;
846   signal->theData[1]= buildPtr.i;
847   sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
848 }
849 
850 Uint32 Dbtux_mt_buildIndexFragment_wrapper_C(void*);
851 
852 void
buildIndexOffline(Signal * signal,Uint32 buildPtrI)853 Dbtup::buildIndexOffline(Signal* signal, Uint32 buildPtrI)
854 {
855   jam();
856   /**
857    * We need to make table read-only...as mtoib does not work otherwise
858    */
859   BuildIndexPtr buildPtr;
860   buildPtr.i= buildPtrI;
861   c_buildIndexList.getPtr(buildPtr);
862   const BuildIndxImplReq* buildReq =
863     (const BuildIndxImplReq*)&buildPtr.p->m_request;
864 
865   AlterTabReq* req = (AlterTabReq*)signal->getDataPtrSend();
866   /**
867    * Note: before 7.3.4, 7.2.15, 7.1.30 fifth word and
868    * up was undefined.
869    */
870   bzero(req, sizeof(*req));
871   req->senderRef = reference();
872   req->senderData = buildPtrI;
873   req->tableId = buildReq->tableId;
874   req->requestType = AlterTabReq::AlterTableReadOnly;
875   sendSignal(calcInstanceBlockRef(DBLQH), GSN_ALTER_TAB_REQ, signal,
876              AlterTabReq::SignalLength, JBB);
877 }
878 
879 void
execALTER_TAB_CONF(Signal * signal)880 Dbtup::execALTER_TAB_CONF(Signal* signal)
881 {
882   jamEntry();
883   AlterTabConf* conf = (AlterTabConf*)signal->getDataPtr();
884 
885   BuildIndexPtr buildPtr;
886   buildPtr.i = conf->senderData;
887   c_buildIndexList.getPtr(buildPtr);
888 
889 
890   if (buildPtr.p->m_fragNo == 0)
891   {
892     jam();
893     buildIndexOffline_table_readonly(signal, conf->senderData);
894     return;
895   }
896   else
897   {
898     jam();
899     TablerecPtr tablePtr;
900     (void)tablePtr; // hide unused warning
901     ndbrequire(buildPtr.p->m_fragNo >= NDB_ARRAY_SIZE(tablePtr.p->fragid));
902     buildIndexReply(signal, buildPtr.p);
903     c_buildIndexList.release(buildPtr);
904     return;
905   }
906 }
907 
908 void
buildIndexOffline_table_readonly(Signal * signal,Uint32 buildPtrI)909 Dbtup::buildIndexOffline_table_readonly(Signal* signal, Uint32 buildPtrI)
910 {
911   // get build record
912   BuildIndexPtr buildPtr;
913   buildPtr.i= buildPtrI;
914   c_buildIndexList.getPtr(buildPtr);
915   const BuildIndxImplReq* buildReq =
916     (const BuildIndxImplReq*)&buildPtr.p->m_request;
917   // get table
918   TablerecPtr tablePtr;
919   tablePtr.i= buildReq->tableId;
920   ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
921 
922   for (;buildPtr.p->m_fragNo < NDB_ARRAY_SIZE(tablePtr.p->fragrec);
923        buildPtr.p->m_fragNo++)
924   {
925     jam();
926     FragrecordPtr fragPtr;
927     fragPtr.i = tablePtr.p->fragrec[buildPtr.p->m_fragNo];
928     if (fragPtr.i == RNIL)
929     {
930       jam();
931       continue;
932     }
933     ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
934     mt_BuildIndxReq req;
935     bzero(&req, sizeof(req));
936     req.senderRef = reference();
937     req.senderData = buildPtr.i;
938     req.tableId = buildReq->tableId;
939     req.indexId = buildPtr.p->m_indexId;
940     req.fragId = tablePtr.p->fragid[buildPtr.p->m_fragNo];
941 
942     SimulatedBlock * tux = globalData.getBlock(DBTUX);
943     if (instance() != 0)
944     {
945       tux = tux->getInstance(instance());
946       ndbrequire(tux != 0);
947     }
948     req.tux_ptr = tux;
949     req.tup_ptr = this;
950     req.func_ptr = Dbtux_mt_buildIndexFragment_wrapper_C;
951     req.buffer_size = 32*32768; // thread-local-buffer
952 
953     Uint32 * req_ptr = signal->getDataPtrSend();
954     memcpy(req_ptr, &req, sizeof(req));
955 
956     sendSignal(NDBFS_REF, GSN_BUILD_INDX_IMPL_REQ, signal,
957                (sizeof(req) + 15) / 4, JBB);
958 
959     buildPtr.p->m_outstanding++;
960     if (buildPtr.p->m_outstanding >= m_max_parallel_index_build)
961     {
962       jam();
963       return;
964     }
965   }
966 
967   if (buildPtr.p->m_outstanding == 0)
968   {
969     jam();
970     AlterTabReq* req = (AlterTabReq*)signal->getDataPtrSend();
971     /**
972      * Note: before 7.3.4, 7.2.15, 7.1.30 fifth word and
973      * up was undefined.
974      */
975     bzero(req, sizeof(*req));
976     req->senderRef = reference();
977     req->senderData = buildPtrI;
978     req->tableId = buildReq->tableId;
979     req->requestType = AlterTabReq::AlterTableReadWrite;
980     sendSignal(calcInstanceBlockRef(DBLQH), GSN_ALTER_TAB_REQ, signal,
981                AlterTabReq::SignalLength, JBB);
982     return;
983   }
984   else
985   {
986     jam();
987     // wait for replies
988     return;
989   }
990 }
991 
992 int
mt_scan_init(Uint32 tableId,Uint32 fragId,Local_key * pos,Uint32 * fragPtrI)993 Dbtup::mt_scan_init(Uint32 tableId, Uint32 fragId,
994                     Local_key* pos, Uint32 * fragPtrI)
995 {
996   TablerecPtr tablePtr;
997   tablePtr.i = tableId;
998   ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
999 
1000   FragrecordPtr fragPtr;
1001   fragPtr.i = RNIL;
1002   for (Uint32 i = 0; i<NDB_ARRAY_SIZE(tablePtr.p->fragid); i++)
1003   {
1004     if (tablePtr.p->fragid[i] == fragId)
1005     {
1006       fragPtr.i = tablePtr.p->fragrec[i];
1007       break;
1008     }
1009   }
1010 
1011   if (fragPtr.i == RNIL)
1012     return -1;
1013 
1014   ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
1015 
1016   Uint32 fragPageId = 0;
1017   while (fragPageId < fragPtr.p->m_max_page_cnt)
1018   {
1019     Uint32 realPageId= getRealpidCheck(fragPtr.p, fragPageId);
1020     if (realPageId != RNIL)
1021     {
1022       * fragPtrI = fragPtr.i;
1023       pos->m_page_no = realPageId;
1024       pos->m_page_idx = 0;
1025       pos->m_file_no = 0;
1026       return 0;
1027     }
1028     fragPageId++;
1029   }
1030 
1031   return 1;
1032 }
1033 
1034 int
mt_scan_next(Uint32 tableId,Uint32 fragPtrI,Local_key * pos,bool moveNext)1035 Dbtup::mt_scan_next(Uint32 tableId, Uint32 fragPtrI,
1036                     Local_key* pos, bool moveNext)
1037 {
1038   TablerecPtr tablePtr;
1039   tablePtr.i = tableId;
1040   ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
1041 
1042   FragrecordPtr fragPtr;
1043   fragPtr.i = fragPtrI;
1044   ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
1045 
1046   Uint32 tupheadsize = tablePtr.p->m_offsets[MM].m_fix_header_size;
1047   if (moveNext)
1048   {
1049     pos->m_page_idx += tupheadsize;
1050   }
1051 
1052   PagePtr pagePtr;
1053   c_page_pool.getPtr(pagePtr, pos->m_page_no);
1054 
1055   while (1)
1056   {
1057     Tuple_header* tuple_ptr;
1058     while (pos->m_page_idx + tupheadsize <= Fix_page::DATA_WORDS)
1059     {
1060       tuple_ptr = (Tuple_header*)(pagePtr.p->m_data + pos->m_page_idx);
1061       // skip over free tuple
1062       if (tuple_ptr->m_header_bits & Tuple_header::FREE)
1063       {
1064         pos->m_page_idx += tupheadsize;
1065         continue;
1066       }
1067       pos->m_file_no = tuple_ptr->get_tuple_version();
1068       return 0; // Found
1069     }
1070 
1071     // End of page...move to next
1072     Uint32 fragPageId = pagePtr.p->frag_page_id + 1;
1073     while (fragPageId < fragPtr.p->m_max_page_cnt)
1074     {
1075       Uint32 realPageId = getRealpidCheck(fragPtr.p, fragPageId);
1076       if (realPageId != RNIL)
1077       {
1078         pos->m_page_no = realPageId;
1079         break;
1080       }
1081       fragPageId++;
1082     }
1083 
1084     if (fragPageId == fragPtr.p->m_max_page_cnt)
1085       break;
1086 
1087     pos->m_page_idx = 0;
1088     c_page_pool.getPtr(pagePtr, pos->m_page_no);
1089   }
1090 
1091   return 1;
1092 }
1093 
1094 void
execBUILD_INDX_IMPL_REF(Signal * signal)1095 Dbtup::execBUILD_INDX_IMPL_REF(Signal* signal)
1096 {
1097   jamEntry();
1098   BuildIndxImplRef* ref = (BuildIndxImplRef*)signal->getDataPtrSend();
1099   Uint32 ptr = ref->senderData;
1100   Uint32 err = ref->errorCode;
1101 
1102   BuildIndexPtr buildPtr;
1103   c_buildIndexList.getPtr(buildPtr, ptr);
1104   ndbrequire(buildPtr.p->m_outstanding);
1105   buildPtr.p->m_outstanding--;
1106 
1107   TablerecPtr tablePtr;
1108   (void)tablePtr; // hide unused warning
1109   buildPtr.p->m_errorCode = (BuildIndxImplRef::ErrorCode)err;
1110   // No point in starting any more
1111   buildPtr.p->m_fragNo = NDB_ARRAY_SIZE(tablePtr.p->fragrec);
1112   buildIndexOffline_table_readonly(signal, ptr);
1113 }
1114 
1115 void
execBUILD_INDX_IMPL_CONF(Signal * signal)1116 Dbtup::execBUILD_INDX_IMPL_CONF(Signal* signal)
1117 {
1118   jamEntry();
1119   BuildIndxImplConf* conf = (BuildIndxImplConf*)signal->getDataPtrSend();
1120   Uint32 ptr = conf->senderData;
1121 
1122   BuildIndexPtr buildPtr;
1123   c_buildIndexList.getPtr(buildPtr, ptr);
1124   ndbrequire(buildPtr.p->m_outstanding);
1125   buildPtr.p->m_outstanding--;
1126   buildPtr.p->m_fragNo++;
1127 
1128   buildIndexOffline_table_readonly(signal, ptr);
1129 }
1130 
1131 void
buildIndexReply(Signal * signal,const BuildIndexRec * buildPtrP)1132 Dbtup::buildIndexReply(Signal* signal, const BuildIndexRec* buildPtrP)
1133 {
1134   const BuildIndxImplReq* buildReq = &buildPtrP->m_request;
1135 
1136   AlterIndxImplReq* req = (AlterIndxImplReq*)signal->getDataPtrSend();
1137   req->indexId = buildReq->indexId;
1138   req->senderRef = 0; //
1139   if (buildPtrP->m_errorCode == BuildIndxImplRef::NoError)
1140   {
1141     jam();
1142     req->requestType = AlterIndxImplReq::AlterIndexOnline;
1143   }
1144   else
1145   {
1146     jam();
1147     req->requestType = AlterIndxImplReq::AlterIndexOffline;
1148   }
1149   EXECUTE_DIRECT(DBTUX, GSN_ALTER_INDX_IMPL_REQ, signal,
1150                  AlterIndxImplReq::SignalLength);
1151 
1152   if (buildPtrP->m_errorCode == BuildIndxImplRef::NoError) {
1153     jam();
1154     BuildIndxImplConf* conf =
1155       (BuildIndxImplConf*)signal->getDataPtrSend();
1156     conf->senderRef = reference();
1157     conf->senderData = buildReq->senderData;
1158 
1159     sendSignal(buildReq->senderRef, GSN_BUILD_INDX_IMPL_CONF,
1160                signal, BuildIndxImplConf::SignalLength, JBB);
1161   } else {
1162     jam();
1163     BuildIndxImplRef* ref =
1164       (BuildIndxImplRef*)signal->getDataPtrSend();
1165     ref->senderRef = reference();
1166     ref->senderData = buildReq->senderData;
1167     ref->errorCode = buildPtrP->m_errorCode;
1168 
1169     sendSignal(buildReq->senderRef, GSN_BUILD_INDX_IMPL_REF,
1170                signal, BuildIndxImplRef::SignalLength, JBB);
1171   }
1172 }
1173