1 /*
2    Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
23 */
24 
25 #define DBTUP_C
26 #define DBTUP_INDEX_CPP
27 #include <dblqh/Dblqh.hpp>
28 #include "Dbtup.hpp"
29 #include <RefConvert.hpp>
30 #include <ndb_limits.h>
31 #include <pc.hpp>
32 #include <AttributeDescriptor.hpp>
33 #include "AttributeOffset.hpp"
34 #include <AttributeHeader.hpp>
35 #include <signaldata/TuxMaint.hpp>
36 #include <signaldata/AlterIndxImpl.hpp>
37 
38 // methods used by ordered index
39 
40 void
tuxGetTupAddr(Uint32 fragPtrI,Uint32 pageId,Uint32 pageIndex,Uint32 & lkey1,Uint32 & lkey2)41 Dbtup::tuxGetTupAddr(Uint32 fragPtrI,
42                      Uint32 pageId,
43                      Uint32 pageIndex,
44                      Uint32& lkey1,
45                      Uint32& lkey2)
46 {
47   jamEntry();
48   PagePtr pagePtr;
49   c_page_pool.getPtr(pagePtr, pageId);
50   lkey1 = pagePtr.p->frag_page_id;
51   lkey2 = pageIndex;
52 }
53 
54 int
tuxAllocNode(EmulatedJamBuffer * jamBuf,Uint32 fragPtrI,Uint32 & pageId,Uint32 & pageOffset,Uint32 * & node)55 Dbtup::tuxAllocNode(EmulatedJamBuffer * jamBuf,
56                     Uint32 fragPtrI,
57                     Uint32& pageId,
58                     Uint32& pageOffset,
59                     Uint32*& node)
60 {
61   thrjamEntry(jamBuf);
62   FragrecordPtr fragPtr;
63   fragPtr.i= fragPtrI;
64   ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
65   TablerecPtr tablePtr;
66   tablePtr.i= fragPtr.p->fragTableId;
67   ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
68 
69   Local_key key;
70   Uint32* ptr, frag_page_id, err;
71   if ((ptr= alloc_fix_rec(&err,fragPtr.p,tablePtr.p, &key, &frag_page_id)) == 0)
72   {
73     thrjam(jamBuf);
74     return err;
75   }
76   pageId= key.m_page_no;
77   pageOffset= key.m_page_idx;
78   Uint32 attrDescIndex= tablePtr.p->tabDescriptor + (0 << ZAD_LOG_SIZE);
79   Uint32 attrDataOffset= AttributeOffset::getOffset(
80                               tableDescriptor[attrDescIndex + 1].tabDescr);
81   node= ptr + attrDataOffset;
82   return 0;
83 }
84 
85 void
tuxFreeNode(Uint32 fragPtrI,Uint32 pageId,Uint32 pageOffset,Uint32 * node)86 Dbtup::tuxFreeNode(Uint32 fragPtrI,
87                    Uint32 pageId,
88                    Uint32 pageOffset,
89                    Uint32* node)
90 {
91   jamEntry();
92   FragrecordPtr fragPtr;
93   fragPtr.i= fragPtrI;
94   ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
95   TablerecPtr tablePtr;
96   tablePtr.i= fragPtr.p->fragTableId;
97   ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
98 
99   Local_key key;
100   key.m_page_no = pageId;
101   key.m_page_idx = pageOffset;
102   PagePtr pagePtr;
103   Tuple_header* ptr = (Tuple_header*)get_ptr(&pagePtr, &key, tablePtr.p);
104 
105   Uint32 attrDescIndex= tablePtr.p->tabDescriptor + (0 << ZAD_LOG_SIZE);
106   Uint32 attrDataOffset= AttributeOffset::getOffset(tableDescriptor[attrDescIndex + 1].tabDescr);
107   ndbrequire(node == (Uint32*)ptr + attrDataOffset);
108 
109   free_fix_rec(fragPtr.p, tablePtr.p, &key, (Fix_page*)pagePtr.p);
110 }
111 
112 void
tuxGetNode(Uint32 fragPtrI,Uint32 pageId,Uint32 pageOffset,Uint32 * & node)113 Dbtup::tuxGetNode(Uint32 fragPtrI,
114                   Uint32 pageId,
115                   Uint32 pageOffset,
116                   Uint32*& node)
117 {
118   FragrecordPtr fragPtr;
119   fragPtr.i= fragPtrI;
120   ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
121   TablerecPtr tablePtr;
122   tablePtr.i= fragPtr.p->fragTableId;
123   ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
124   PagePtr pagePtr;
125   c_page_pool.getPtr(pagePtr, pageId);
126   Uint32 attrDescIndex= tablePtr.p->tabDescriptor + (0 << ZAD_LOG_SIZE);
127   Uint32 attrDataOffset= AttributeOffset::getOffset(
128                             tableDescriptor[attrDescIndex + 1].tabDescr);
129   node= ((Fix_page*)pagePtr.p)->
130     get_ptr(pageOffset, tablePtr.p->m_offsets[MM].m_fix_header_size) +
131     attrDataOffset;
132 }
133 int
tuxReadAttrs(EmulatedJamBuffer * jamBuf,Uint32 fragPtrI,Uint32 pageId,Uint32 pageIndex,Uint32 tupVersion,const Uint32 * attrIds,Uint32 numAttrs,Uint32 * dataOut,bool xfrmFlag)134 Dbtup::tuxReadAttrs(EmulatedJamBuffer * jamBuf,
135                     Uint32 fragPtrI,
136                     Uint32 pageId,
137                     Uint32 pageIndex,
138                     Uint32 tupVersion,
139                     const Uint32* attrIds,
140                     Uint32 numAttrs,
141                     Uint32* dataOut,
142                     bool xfrmFlag)
143 {
144   thrjamEntry(jamBuf);
145   // use own variables instead of globals
146   FragrecordPtr fragPtr;
147   fragPtr.i= fragPtrI;
148   ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
149   TablerecPtr tablePtr;
150   tablePtr.i= fragPtr.p->fragTableId;
151   ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
152 
153   // search for tuple version if not original
154 
155   Operationrec tmpOp;
156   KeyReqStruct req_struct(jamBuf);
157   req_struct.tablePtrP = tablePtr.p;
158   req_struct.fragPtrP = fragPtr.p;
159 
160   tmpOp.m_tuple_location.m_page_no= pageId;
161   tmpOp.m_tuple_location.m_page_idx= pageIndex;
162   tmpOp.op_struct.op_type = ZREAD; // valgrind
163   setup_fixed_part(&req_struct, &tmpOp, tablePtr.p);
164   Tuple_header *tuple_ptr= req_struct.m_tuple_ptr;
165   if (tuple_ptr->get_tuple_version() != tupVersion)
166   {
167     jam();
168     OperationrecPtr opPtr;
169     opPtr.i= tuple_ptr->m_operation_ptr_i;
170     Uint32 loopGuard= 0;
171     while (opPtr.i != RNIL) {
172       c_operation_pool.getPtr(opPtr);
173       if (opPtr.p->tupVersion == tupVersion) {
174 	jam();
175 	if (!opPtr.p->m_copy_tuple_location.isNull()) {
176 	  req_struct.m_tuple_ptr=
177             get_copy_tuple(&opPtr.p->m_copy_tuple_location);
178         }
179 	break;
180       }
181       jam();
182       opPtr.i= opPtr.p->prevActiveOp;
183       ndbrequire(++loopGuard < (1 << ZTUP_VERSION_BITS));
184     }
185   }
186   // read key attributes from found tuple version
187   // save globals
188   prepare_read(&req_struct, tablePtr.p, false);
189 
190   // do it
191   int ret = readAttributes(&req_struct,
192                            attrIds,
193                            numAttrs,
194                            dataOut,
195                            ZNIL,
196                            xfrmFlag);
197 
198   // done
199   return ret;
200 }
201 int
tuxReadPk(Uint32 fragPtrI,Uint32 pageId,Uint32 pageIndex,Uint32 * dataOut,bool xfrmFlag)202 Dbtup::tuxReadPk(Uint32 fragPtrI, Uint32 pageId, Uint32 pageIndex, Uint32* dataOut, bool xfrmFlag)
203 {
204   jamEntry();
205   // use own variables instead of globals
206   FragrecordPtr fragPtr;
207   fragPtr.i= fragPtrI;
208   ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
209   TablerecPtr tablePtr;
210   tablePtr.i= fragPtr.p->fragTableId;
211   ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
212 
213   Operationrec tmpOp;
214   tmpOp.m_tuple_location.m_page_no= pageId;
215   tmpOp.m_tuple_location.m_page_idx= pageIndex;
216 
217   KeyReqStruct req_struct(this);
218   req_struct.tablePtrP = tablePtr.p;
219   req_struct.fragPtrP = fragPtr.p;
220 
221   PagePtr page_ptr;
222   Uint32* ptr= get_ptr(&page_ptr, &tmpOp.m_tuple_location, tablePtr.p);
223   req_struct.m_page_ptr = page_ptr;
224   req_struct.m_tuple_ptr = (Tuple_header*)ptr;
225 
226   int ret = 0;
227   if (! (req_struct.m_tuple_ptr->m_header_bits & Tuple_header::FREE))
228   {
229     req_struct.check_offset[MM]= tablePtr.p->get_check_offset(MM);
230     req_struct.check_offset[DD]= tablePtr.p->get_check_offset(DD);
231 
232     Uint32 num_attr= tablePtr.p->m_no_of_attributes;
233     Uint32 descr_start= tablePtr.p->tabDescriptor;
234     TableDescriptor *tab_descr= &tableDescriptor[descr_start];
235     ndbrequire(descr_start + (num_attr << ZAD_LOG_SIZE) <= cnoOfTabDescrRec);
236     req_struct.attr_descr= tab_descr;
237 
238     if(req_struct.m_tuple_ptr->m_header_bits & Tuple_header::ALLOC)
239     {
240       Uint32 opPtrI= req_struct.m_tuple_ptr->m_operation_ptr_i;
241       Operationrec* opPtrP= c_operation_pool.getPtr(opPtrI);
242       ndbassert(!opPtrP->m_copy_tuple_location.isNull());
243       req_struct.m_tuple_ptr=
244 	get_copy_tuple(&opPtrP->m_copy_tuple_location);
245     }
246     prepare_read(&req_struct, tablePtr.p, false);
247 
248     const Uint32* attrIds= &tableDescriptor[tablePtr.p->readKeyArray].tabDescr;
249     const Uint32 numAttrs= tablePtr.p->noOfKeyAttr;
250     // read pk attributes from original tuple
251 
252     // do it
253     ret = readAttributes(&req_struct,
254 			 attrIds,
255 			 numAttrs,
256 			 dataOut,
257 			 ZNIL,
258 			 xfrmFlag);
259     // done
260     if (ret >= 0) {
261       // remove headers
262       Uint32 n= 0;
263       Uint32 i= 0;
264       while (n < numAttrs) {
265 	const AttributeHeader ah(dataOut[i]);
266 	Uint32 size= ah.getDataSize();
267 	ndbrequire(size != 0);
268 	for (Uint32 j= 0; j < size; j++) {
269 	  dataOut[i + j - n]= dataOut[i + j + 1];
270 	}
271 	n+= 1;
272 	i+= 1 + size;
273       }
274       ndbrequire((int)i == ret);
275       ret -= numAttrs;
276     } else {
277       return ret;
278     }
279   }
280   if (tablePtr.p->m_bits & Tablerec::TR_RowGCI)
281   {
282     dataOut[ret] = *req_struct.m_tuple_ptr->get_mm_gci(tablePtr.p);
283   }
284   else
285   {
286     dataOut[ret] = 0;
287   }
288   return ret;
289 }
290 
291 int
accReadPk(Uint32 tableId,Uint32 fragId,Uint32 fragPageId,Uint32 pageIndex,Uint32 * dataOut,bool xfrmFlag)292 Dbtup::accReadPk(Uint32 tableId, Uint32 fragId, Uint32 fragPageId, Uint32 pageIndex, Uint32* dataOut, bool xfrmFlag)
293 {
294   jamEntry();
295   // get table
296   TablerecPtr tablePtr;
297   tablePtr.i = tableId;
298   ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
299   // get fragment
300   FragrecordPtr fragPtr;
301   getFragmentrec(fragPtr, fragId, tablePtr.p);
302   // get real page id and tuple offset
303 
304   Uint32 pageId = getRealpid(fragPtr.p, fragPageId);
305   // use TUX routine - optimize later
306   int ret = tuxReadPk(fragPtr.i, pageId, pageIndex, dataOut, xfrmFlag);
307   return ret;
308 }
309 
310 /*
311  * TUX index contains all tuple versions.  A scan in TUX has scanned
312  * one of them and asks if it can be returned as scan result.  This
313  * depends on trans id, dirty read flag, and savepoint within trans.
314  *
315  * Previously this faked a ZREAD operation and used getPage().
316  * In TUP getPage() is run after ACC locking, but TUX comes here
317  * before ACC access.  Instead of modifying getPage() it is more
318  * clear to do the full check here.
319  */
320 bool
tuxQueryTh(Uint32 fragPtrI,Uint32 pageId,Uint32 pageIndex,Uint32 tupVersion,Uint32 transId1,Uint32 transId2,bool dirty,Uint32 savepointId)321 Dbtup::tuxQueryTh(Uint32 fragPtrI,
322                   Uint32 pageId,
323                   Uint32 pageIndex,
324                   Uint32 tupVersion,
325                   Uint32 transId1,
326                   Uint32 transId2,
327                   bool dirty,
328                   Uint32 savepointId)
329 {
330   jamEntry();
331   FragrecordPtr fragPtr;
332   fragPtr.i= fragPtrI;
333   ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
334   TablerecPtr tablePtr;
335   tablePtr.i= fragPtr.p->fragTableId;
336   ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
337   PagePtr pagePtr;
338   pagePtr.i = pageId;
339   c_page_pool.getPtr(pagePtr);
340 
341   KeyReqStruct req_struct(this);
342 
343   {
344     Operationrec tmpOp;
345     tmpOp.m_tuple_location.m_page_no = pageId;
346     tmpOp.m_tuple_location.m_page_idx = pageIndex;
347     tmpOp.op_struct.op_type = ZREAD; // valgrind
348     setup_fixed_part(&req_struct, &tmpOp, tablePtr.p);
349   }
350 
351   Tuple_header* tuple_ptr = req_struct.m_tuple_ptr;
352 
353   OperationrecPtr currOpPtr;
354   currOpPtr.i = tuple_ptr->m_operation_ptr_i;
355   if (currOpPtr.i == RNIL) {
356     jam();
357     // tuple has no operation, any scan can see it
358     return true;
359   }
360   c_operation_pool.getPtr(currOpPtr);
361 
362   const bool sameTrans =
363     c_lqh->is_same_trans(currOpPtr.p->userpointer, transId1, transId2);
364 
365   bool res = false;
366   OperationrecPtr loopOpPtr = currOpPtr;
367 
368   if (!sameTrans) {
369     jam();
370     if (!dirty) {
371       jam();
372       if (currOpPtr.p->nextActiveOp == RNIL) {
373         jam();
374         // last op - TUX makes ACC lock request in same timeslice
375         res = true;
376       }
377     }
378     else {
379       // loop to first op (returns false)
380       find_savepoint(loopOpPtr, 0);
381       const Uint32 op_type = loopOpPtr.p->op_struct.op_type;
382 
383       if (op_type != ZINSERT) {
384         jam();
385         // read committed version
386         const Uint32 origVersion = tuple_ptr->get_tuple_version();
387         if (origVersion == tupVersion) {
388           jam();
389           res = true;
390         }
391       }
392     }
393   }
394   else {
395     jam();
396     // for own trans, ignore dirty flag
397 
398     if (find_savepoint(loopOpPtr, savepointId)) {
399       jam();
400       const Uint32 op_type = loopOpPtr.p->op_struct.op_type;
401 
402       if (op_type != ZDELETE) {
403         jam();
404         // check if this op has produced the scanned version
405         Uint32 loopVersion = loopOpPtr.p->tupVersion;
406         if (loopVersion == tupVersion) {
407           jam();
408           res = true;
409         }
410       }
411     }
412   }
413 
414   return res;
415 }
416 
417 // ordered index build
418 
419 //#define TIME_MEASUREMENT
420 #ifdef TIME_MEASUREMENT
421   static Uint32 time_events;
422   NDB_TICKS tot_time_passed;
423   Uint32 number_events;
424 #endif
425 void
execBUILD_INDX_IMPL_REQ(Signal * signal)426 Dbtup::execBUILD_INDX_IMPL_REQ(Signal* signal)
427 {
428   jamEntry();
429 #ifdef TIME_MEASUREMENT
430   time_events= 0;
431   tot_time_passed= 0;
432   number_events= 1;
433 #endif
434   const BuildIndxImplReq* const req =
435     (const BuildIndxImplReq*)signal->getDataPtr();
436   // get new operation
437   BuildIndexPtr buildPtr;
438   if (ERROR_INSERTED(4031) || ! c_buildIndexList.seize(buildPtr)) {
439     jam();
440     BuildIndexRec buildRec;
441     buildRec.m_request = *req;
442     buildRec.m_errorCode = BuildIndxImplRef::Busy;
443     if (ERROR_INSERTED(4031))
444     {
445       CLEAR_ERROR_INSERT_VALUE;
446     }
447     buildIndexReply(signal, &buildRec);
448     return;
449   }
450   buildPtr.p->m_request = *req;
451   const BuildIndxImplReq* buildReq = &buildPtr.p->m_request;
452   // check
453   buildPtr.p->m_errorCode= BuildIndxImplRef::NoError;
454   buildPtr.p->m_outstanding = 0;
455   do {
456     if (buildReq->tableId >= cnoOfTablerec) {
457       jam();
458       buildPtr.p->m_errorCode= BuildIndxImplRef::InvalidPrimaryTable;
459       break;
460     }
461     TablerecPtr tablePtr;
462     tablePtr.i= buildReq->tableId;
463     ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
464     if (tablePtr.p->tableStatus != DEFINED) {
465       jam();
466       buildPtr.p->m_errorCode= BuildIndxImplRef::InvalidPrimaryTable;
467       break;
468     }
469     // memory page format
470     buildPtr.p->m_build_vs =
471       (tablePtr.p->m_attributes[MM].m_no_of_varsize +
472        tablePtr.p->m_attributes[MM].m_no_of_dynamic) > 0;
473     if (DictTabInfo::isOrderedIndex(buildReq->indexType)) {
474       jam();
475       const DLList<TupTriggerData>& triggerList =
476 	tablePtr.p->tuxCustomTriggers;
477 
478       TriggerPtr triggerPtr;
479       triggerList.first(triggerPtr);
480       while (triggerPtr.i != RNIL) {
481 	if (triggerPtr.p->indexId == buildReq->indexId) {
482 	  jam();
483 	  break;
484 	}
485 	triggerList.next(triggerPtr);
486       }
487       if (triggerPtr.i == RNIL) {
488 	jam();
489 	// trigger was not created
490         ndbassert(false);
491 	buildPtr.p->m_errorCode = BuildIndxImplRef::InternalError;
492 	break;
493       }
494       buildPtr.p->m_indexId = buildReq->indexId;
495       buildPtr.p->m_buildRef = DBTUX;
496       AlterIndxImplReq* req = (AlterIndxImplReq*)signal->getDataPtrSend();
497       req->indexId = buildReq->indexId;
498       req->senderRef = 0;
499       req->requestType = AlterIndxImplReq::AlterIndexBuilding;
500       EXECUTE_DIRECT(DBTUX, GSN_ALTER_INDX_IMPL_REQ, signal,
501                      AlterIndxImplReq::SignalLength);
502     } else if(buildReq->indexId == RNIL) {
503       jam();
504       // REBUILD of acc
505       buildPtr.p->m_indexId = RNIL;
506       buildPtr.p->m_buildRef = DBACC;
507     } else {
508       jam();
509       buildPtr.p->m_errorCode = BuildIndxImplRef::InvalidIndexType;
510       break;
511     }
512 
513     // set to first tuple position
514     const Uint32 firstTupleNo = 0;
515     buildPtr.p->m_fragNo= 0;
516     buildPtr.p->m_pageId= 0;
517     buildPtr.p->m_tupleNo= firstTupleNo;
518     // start build
519 
520     bool offline = !!(buildReq->requestType&BuildIndxImplReq::RF_BUILD_OFFLINE);
521     if (offline && m_max_parallel_index_build > 1)
522     {
523       jam();
524       buildIndexOffline(signal, buildPtr.i);
525     }
526     else
527     {
528       jam();
529       buildIndex(signal, buildPtr.i);
530     }
531     return;
532   } while (0);
533   // check failed
534   buildIndexReply(signal, buildPtr.p);
535   c_buildIndexList.release(buildPtr);
536 }
537 
538 void
buildIndex(Signal * signal,Uint32 buildPtrI)539 Dbtup::buildIndex(Signal* signal, Uint32 buildPtrI)
540 {
541   // get build record
542   BuildIndexPtr buildPtr;
543   buildPtr.i= buildPtrI;
544   c_buildIndexList.getPtr(buildPtr);
545   const BuildIndxImplReq* buildReq= &buildPtr.p->m_request;
546   // get table
547   TablerecPtr tablePtr;
548   tablePtr.i= buildReq->tableId;
549   ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
550 
551   const Uint32 firstTupleNo = 0;
552   const Uint32 tupheadsize = tablePtr.p->m_offsets[MM].m_fix_header_size;
553 
554 #ifdef TIME_MEASUREMENT
555   MicroSecondTimer start;
556   MicroSecondTimer stop;
557   NDB_TICKS time_passed;
558 #endif
559   do {
560     // get fragment
561     FragrecordPtr fragPtr;
562     if (buildPtr.p->m_fragNo == MAX_FRAG_PER_NODE) {
563       jam();
564       // build ready
565       buildIndexReply(signal, buildPtr.p);
566       c_buildIndexList.release(buildPtr);
567       return;
568     }
569     ndbrequire(buildPtr.p->m_fragNo < MAX_FRAG_PER_NODE);
570     fragPtr.i= tablePtr.p->fragrec[buildPtr.p->m_fragNo];
571     if (fragPtr.i == RNIL) {
572       jam();
573       buildPtr.p->m_fragNo++;
574       buildPtr.p->m_pageId= 0;
575       buildPtr.p->m_tupleNo= firstTupleNo;
576       break;
577     }
578     ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
579     // get page
580     PagePtr pagePtr;
581     if (buildPtr.p->m_pageId >= fragPtr.p->m_max_page_no) {
582       jam();
583       buildPtr.p->m_fragNo++;
584       buildPtr.p->m_pageId= 0;
585       buildPtr.p->m_tupleNo= firstTupleNo;
586       break;
587     }
588     Uint32 realPageId= getRealpidCheck(fragPtr.p, buildPtr.p->m_pageId);
589     // skip empty page
590     if (realPageId == RNIL)
591     {
592       jam();
593       goto next_tuple;
594     }
595 
596     c_page_pool.getPtr(pagePtr, realPageId);
597 
598 next_tuple:
599     // get tuple
600     Uint32 pageIndex = ~0;
601     const Tuple_header* tuple_ptr = 0;
602     pageIndex = buildPtr.p->m_tupleNo * tupheadsize;
603     if (pageIndex + tupheadsize > Fix_page::DATA_WORDS) {
604       jam();
605       buildPtr.p->m_pageId++;
606       buildPtr.p->m_tupleNo= firstTupleNo;
607       break;
608     }
609 
610     if (realPageId == RNIL)
611     {
612       jam();
613       buildPtr.p->m_tupleNo++;
614       break;
615     }
616 
617     tuple_ptr = (Tuple_header*)&pagePtr.p->m_data[pageIndex];
618     // skip over free tuple
619     if (tuple_ptr->m_header_bits & Tuple_header::FREE) {
620       jam();
621       buildPtr.p->m_tupleNo++;
622       break;
623     }
624     Uint32 tupVersion= tuple_ptr->get_tuple_version();
625     OperationrecPtr pageOperPtr;
626     pageOperPtr.i= tuple_ptr->m_operation_ptr_i;
627 #ifdef TIME_MEASUREMENT
628     NdbTick_getMicroTimer(&start);
629 #endif
630     // add to index
631     TuxMaintReq* const req = (TuxMaintReq*)signal->getDataPtrSend();
632     req->errorCode = RNIL;
633     req->tableId = tablePtr.i;
634     req->indexId = buildPtr.p->m_indexId;
635     req->fragId = tablePtr.p->fragid[buildPtr.p->m_fragNo];
636     req->pageId = realPageId;
637     req->tupVersion = tupVersion;
638     req->opInfo = TuxMaintReq::OpAdd;
639     req->tupFragPtrI = fragPtr.i;
640     req->fragPageId = buildPtr.p->m_pageId;
641     req->pageIndex = pageIndex;
642 
643     if (pageOperPtr.i == RNIL)
644     {
645       EXECUTE_DIRECT(buildPtr.p->m_buildRef, GSN_TUX_MAINT_REQ,
646 		     signal, TuxMaintReq::SignalLength+2);
647     }
648     else
649     {
650       /*
651       If there is an ongoing operation on the tuple then it is either a
652       copy tuple or an original tuple with an ongoing transaction. In
653       both cases realPageId and pageOffset refer to the original tuple.
654       The tuple address stored in TUX will always be the original tuple
655       but with the tuple version of the tuple we found.
656 
657       This is necessary to avoid having to update TUX at abort of
658       update. If an update aborts then the copy tuple is copied to
659       the original tuple. The build will however have found that
660       tuple as a copy tuple. The original tuple is stable and is thus
661       preferrable to store in TUX.
662       */
663       jam();
664 
665       /**
666        * Since copy tuples now can't be found on real pages.
667        *   we will here build all copies of the tuple
668        *
669        * Note only "real" tupVersion's should be added
670        *      i.e delete's shouldnt be added
671        *      (unless it's the first op, when "original" should be added)
672        */
673 
674       /*
675        * Start from first operation.  This is only to make things more
676        * clear.  It is not required by ordered index implementation.
677        */
678       c_operation_pool.getPtr(pageOperPtr);
679       while (pageOperPtr.p->prevActiveOp != RNIL)
680       {
681         jam();
682         pageOperPtr.i = pageOperPtr.p->prevActiveOp;
683         c_operation_pool.getPtr(pageOperPtr);
684       }
685       /*
686        * Do not use req->errorCode as global control.
687        */
688       bool ok = true;
689       /*
690        * If first operation is an update, add previous version.
691        * This version does not appear as the version of any operation.
692        * At commit this version is removed by executeTuxCommitTriggers.
693        * At abort it is preserved by executeTuxAbortTriggers.
694        */
695       if (pageOperPtr.p->op_struct.op_type == ZUPDATE)
696       {
697         jam();
698         req->errorCode = RNIL;
699         req->tupVersion = decr_tup_version(pageOperPtr.p->tupVersion);
700         EXECUTE_DIRECT(buildPtr.p->m_buildRef, GSN_TUX_MAINT_REQ,
701                        signal, TuxMaintReq::SignalLength+2);
702         ok = (req->errorCode == 0);
703       }
704       /*
705        * Add versions from all operations.
706        *
707        * Each operation has a tuple version.  For insert and update it
708        * is the newly created version.  For delete it is the version
709        * deleted.  The existence of operation tuple version implies that
710        * a corresponding tuple version exists for TUX to read.
711        *
712        * We could be in the middle of a commit.  The process here makes
713        * no assumptions about operation commit order.  (It should be
714        * first to last but this is not the place to assert it).
715        *
716        * Duplicate versions are possible e.g. a delete in the middle
717        * may have same version as the previous operation.  TUX ignores
718        * duplicate version errors during index build.
719        */
720       while (pageOperPtr.i != RNIL && ok)
721       {
722         jam();
723         c_operation_pool.getPtr(pageOperPtr);
724         req->errorCode = RNIL;
725         req->tupVersion = pageOperPtr.p->tupVersion;
726         EXECUTE_DIRECT(buildPtr.p->m_buildRef, GSN_TUX_MAINT_REQ,
727                        signal, TuxMaintReq::SignalLength+2);
728         pageOperPtr.i = pageOperPtr.p->nextActiveOp;
729         ok = (req->errorCode == 0);
730       }
731     }
732 
733     jamEntry();
734     if (req->errorCode != 0) {
735       switch (req->errorCode) {
736       case TuxMaintReq::NoMemError:
737         jam();
738         buildPtr.p->m_errorCode= BuildIndxImplRef::AllocationFailure;
739         break;
740       default:
741         ndbrequire(false);
742         break;
743       }
744       buildIndexReply(signal, buildPtr.p);
745       c_buildIndexList.release(buildPtr);
746       return;
747     }
748 #ifdef TIME_MEASUREMENT
749     NdbTick_getMicroTimer(&stop);
750     time_passed= NdbTick_getMicrosPassed(start, stop);
751     if (time_passed < 1000) {
752       time_events++;
753       tot_time_passed += time_passed;
754       if (time_events == number_events) {
755         NDB_TICKS mean_time_passed= tot_time_passed /
756                                      (NDB_TICKS)number_events;
757         ndbout << "Number of events= " << number_events;
758         ndbout << " Mean time passed= " << mean_time_passed << endl;
759         number_events <<= 1;
760         tot_time_passed= (NDB_TICKS)0;
761         time_events= 0;
762       }
763     }
764 #endif
765     // next tuple
766     buildPtr.p->m_tupleNo++;
767     break;
768   } while (0);
769   signal->theData[0]= ZBUILD_INDEX;
770   signal->theData[1]= buildPtr.i;
771   sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
772 }
773 
774 Uint32 Dbtux_mt_buildIndexFragment_wrapper_C(void*);
775 
776 void
buildIndexOffline(Signal * signal,Uint32 buildPtrI)777 Dbtup::buildIndexOffline(Signal* signal, Uint32 buildPtrI)
778 {
779   jam();
780   /**
781    * We need to make table read-only...as mtoib does not work otherwise
782    */
783   BuildIndexPtr buildPtr;
784   buildPtr.i= buildPtrI;
785   c_buildIndexList.getPtr(buildPtr);
786   const BuildIndxImplReq* buildReq =
787     (const BuildIndxImplReq*)&buildPtr.p->m_request;
788 
789   AlterTabReq* req = (AlterTabReq*)signal->getDataPtrSend();
790   bzero(req, sizeof(req));
791   req->senderRef = reference();
792   req->senderData = buildPtrI;
793   req->tableId = buildReq->tableId;
794   req->requestType = AlterTabReq::AlterTableReadOnly;
795   sendSignal(calcInstanceBlockRef(DBLQH), GSN_ALTER_TAB_REQ, signal,
796              AlterTabReq::SignalLength, JBB);
797 }
798 
799 void
execALTER_TAB_CONF(Signal * signal)800 Dbtup::execALTER_TAB_CONF(Signal* signal)
801 {
802   jamEntry();
803   AlterTabConf* conf = (AlterTabConf*)signal->getDataPtr();
804 
805   BuildIndexPtr buildPtr;
806   buildPtr.i = conf->senderData;
807   c_buildIndexList.getPtr(buildPtr);
808 
809 
810   if (buildPtr.p->m_fragNo == 0)
811   {
812     jam();
813     buildIndexOffline_table_readonly(signal, conf->senderData);
814     return;
815   }
816   else
817   {
818     jam();
819     ndbrequire(buildPtr.p->m_fragNo >= MAX_FRAG_PER_NODE);
820     buildIndexReply(signal, buildPtr.p);
821     c_buildIndexList.release(buildPtr);
822     return;
823   }
824 }
825 
826 void
buildIndexOffline_table_readonly(Signal * signal,Uint32 buildPtrI)827 Dbtup::buildIndexOffline_table_readonly(Signal* signal, Uint32 buildPtrI)
828 {
829   // get build record
830   BuildIndexPtr buildPtr;
831   buildPtr.i= buildPtrI;
832   c_buildIndexList.getPtr(buildPtr);
833   const BuildIndxImplReq* buildReq =
834     (const BuildIndxImplReq*)&buildPtr.p->m_request;
835   // get table
836   TablerecPtr tablePtr;
837   tablePtr.i= buildReq->tableId;
838   ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
839 
840   for (;buildPtr.p->m_fragNo < MAX_FRAG_PER_NODE;
841        buildPtr.p->m_fragNo++)
842   {
843     jam();
844     FragrecordPtr fragPtr;
845     fragPtr.i = tablePtr.p->fragrec[buildPtr.p->m_fragNo];
846     if (fragPtr.i == RNIL)
847     {
848       jam();
849       continue;
850     }
851     ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
852     mt_BuildIndxReq req;
853     bzero(&req, sizeof(req));
854     req.senderRef = reference();
855     req.senderData = buildPtr.i;
856     req.tableId = buildReq->tableId;
857     req.indexId = buildPtr.p->m_indexId;
858     req.fragId = tablePtr.p->fragid[buildPtr.p->m_fragNo];
859 
860     SimulatedBlock * tux = globalData.getBlock(DBTUX);
861     if (instance() != 0)
862     {
863       tux = tux->getInstance(instance());
864       ndbrequire(tux != 0);
865     }
866     req.tux_ptr = tux;
867     req.tup_ptr = this;
868     req.func_ptr = Dbtux_mt_buildIndexFragment_wrapper_C;
869     req.buffer_size = 16*32768; // thread-local-buffer
870 
871     Uint32 * req_ptr = signal->getDataPtrSend();
872     memcpy(req_ptr, &req, sizeof(req));
873 
874     sendSignal(NDBFS_REF, GSN_BUILD_INDX_IMPL_REQ, signal,
875                (sizeof(req) + 15) / 4, JBB);
876 
877     buildPtr.p->m_outstanding++;
878     if (buildPtr.p->m_outstanding >= m_max_parallel_index_build)
879     {
880       jam();
881       return;
882     }
883   }
884 
885   if (buildPtr.p->m_outstanding == 0)
886   {
887     jam();
888     AlterTabReq* req = (AlterTabReq*)signal->getDataPtrSend();
889     bzero(req, sizeof(req));
890     req->senderRef = reference();
891     req->senderData = buildPtrI;
892     req->tableId = buildReq->tableId;
893     req->requestType = AlterTabReq::AlterTableReadWrite;
894     sendSignal(calcInstanceBlockRef(DBLQH), GSN_ALTER_TAB_REQ, signal,
895                AlterTabReq::SignalLength, JBB);
896     return;
897   }
898   else
899   {
900     jam();
901     // wait for replies
902     return;
903   }
904 }
905 
906 int
mt_scan_init(Uint32 tableId,Uint32 fragId,Local_key * pos,Uint32 * fragPtrI)907 Dbtup::mt_scan_init(Uint32 tableId, Uint32 fragId,
908                     Local_key* pos, Uint32 * fragPtrI)
909 {
910   TablerecPtr tablePtr;
911   tablePtr.i = tableId;
912   ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
913 
914   FragrecordPtr fragPtr;
915   fragPtr.i = RNIL;
916   for (Uint32 i = 0; i<MAX_FRAG_PER_NODE; i++)
917   {
918     if (tablePtr.p->fragid[i] == fragId)
919     {
920       fragPtr.i = tablePtr.p->fragrec[i];
921       break;
922     }
923   }
924 
925   if (fragPtr.i == RNIL)
926     return -1;
927 
928   ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
929 
930   Uint32 fragPageId = 0;
931   while (fragPageId < fragPtr.p->m_max_page_no)
932   {
933     Uint32 realPageId= getRealpidCheck(fragPtr.p, fragPageId);
934     if (realPageId != RNIL)
935     {
936       * fragPtrI = fragPtr.i;
937       pos->m_page_no = realPageId;
938       pos->m_page_idx = 0;
939       pos->m_file_no = 0;
940       return 0;
941     }
942     fragPageId++;
943   }
944 
945   return 1;
946 }
947 
948 int
mt_scan_next(Uint32 tableId,Uint32 fragPtrI,Local_key * pos,bool moveNext)949 Dbtup::mt_scan_next(Uint32 tableId, Uint32 fragPtrI,
950                     Local_key* pos, bool moveNext)
951 {
952   TablerecPtr tablePtr;
953   tablePtr.i = tableId;
954   ptrCheckGuard(tablePtr, cnoOfTablerec, tablerec);
955 
956   FragrecordPtr fragPtr;
957   fragPtr.i = fragPtrI;
958   ptrCheckGuard(fragPtr, cnoOfFragrec, fragrecord);
959 
960   Uint32 tupheadsize = tablePtr.p->m_offsets[MM].m_fix_header_size;
961   if (moveNext)
962   {
963     pos->m_page_idx += tupheadsize;
964   }
965 
966   PagePtr pagePtr;
967   c_page_pool.getPtr(pagePtr, pos->m_page_no);
968 
969   while (1)
970   {
971     Tuple_header* tuple_ptr;
972     while (pos->m_page_idx + tupheadsize <= Fix_page::DATA_WORDS)
973     {
974       tuple_ptr = (Tuple_header*)(pagePtr.p->m_data + pos->m_page_idx);
975       // skip over free tuple
976       if (tuple_ptr->m_header_bits & Tuple_header::FREE)
977       {
978         pos->m_page_idx += tupheadsize;
979         continue;
980       }
981       pos->m_file_no = tuple_ptr->get_tuple_version();
982       return 0; // Found
983     }
984 
985     // End of page...move to next
986     Uint32 fragPageId = pagePtr.p->frag_page_id + 1;
987     while (fragPageId < fragPtr.p->m_max_page_no)
988     {
989       Uint32 realPageId = getRealpidCheck(fragPtr.p, fragPageId);
990       if (realPageId != RNIL)
991       {
992         pos->m_page_no = realPageId;
993         break;
994       }
995       fragPageId++;
996     }
997 
998     if (fragPageId == fragPtr.p->m_max_page_no)
999       break;
1000 
1001     pos->m_page_idx = 0;
1002     c_page_pool.getPtr(pagePtr, pos->m_page_no);
1003   }
1004 
1005   return 1;
1006 }
1007 
1008 void
execBUILD_INDX_IMPL_REF(Signal * signal)1009 Dbtup::execBUILD_INDX_IMPL_REF(Signal* signal)
1010 {
1011   jamEntry();
1012   BuildIndxImplRef* ref = (BuildIndxImplRef*)signal->getDataPtrSend();
1013   Uint32 ptr = ref->senderData;
1014   Uint32 err = ref->errorCode;
1015 
1016   BuildIndexPtr buildPtr;
1017   c_buildIndexList.getPtr(buildPtr, ptr);
1018   ndbrequire(buildPtr.p->m_outstanding);
1019   buildPtr.p->m_outstanding--;
1020 
1021   buildPtr.p->m_errorCode = (BuildIndxImplRef::ErrorCode)err;
1022   buildPtr.p->m_fragNo = MAX_FRAG_PER_NODE; // No point in starting any more
1023   buildIndexOffline_table_readonly(signal, ptr);
1024 }
1025 
1026 void
execBUILD_INDX_IMPL_CONF(Signal * signal)1027 Dbtup::execBUILD_INDX_IMPL_CONF(Signal* signal)
1028 {
1029   jamEntry();
1030   BuildIndxImplConf* conf = (BuildIndxImplConf*)signal->getDataPtrSend();
1031   Uint32 ptr = conf->senderData;
1032 
1033   BuildIndexPtr buildPtr;
1034   c_buildIndexList.getPtr(buildPtr, ptr);
1035   ndbrequire(buildPtr.p->m_outstanding);
1036   buildPtr.p->m_outstanding--;
1037   buildPtr.p->m_fragNo++;
1038 
1039   buildIndexOffline_table_readonly(signal, ptr);
1040 }
1041 
1042 void
buildIndexReply(Signal * signal,const BuildIndexRec * buildPtrP)1043 Dbtup::buildIndexReply(Signal* signal, const BuildIndexRec* buildPtrP)
1044 {
1045   const BuildIndxImplReq* buildReq = &buildPtrP->m_request;
1046 
1047   AlterIndxImplReq* req = (AlterIndxImplReq*)signal->getDataPtrSend();
1048   req->indexId = buildReq->indexId;
1049   req->senderRef = 0; //
1050   if (buildPtrP->m_errorCode == BuildIndxImplRef::NoError)
1051   {
1052     jam();
1053     req->requestType = AlterIndxImplReq::AlterIndexOnline;
1054   }
1055   else
1056   {
1057     jam();
1058     req->requestType = AlterIndxImplReq::AlterIndexOffline;
1059   }
1060   EXECUTE_DIRECT(DBTUX, GSN_ALTER_INDX_IMPL_REQ, signal,
1061                  AlterIndxImplReq::SignalLength);
1062 
1063   if (buildPtrP->m_errorCode == BuildIndxImplRef::NoError) {
1064     jam();
1065     BuildIndxImplConf* conf =
1066       (BuildIndxImplConf*)signal->getDataPtrSend();
1067     conf->senderRef = reference();
1068     conf->senderData = buildReq->senderData;
1069 
1070     sendSignal(buildReq->senderRef, GSN_BUILD_INDX_IMPL_CONF,
1071                signal, BuildIndxImplConf::SignalLength, JBB);
1072   } else {
1073     jam();
1074     BuildIndxImplRef* ref =
1075       (BuildIndxImplRef*)signal->getDataPtrSend();
1076     ref->senderRef = reference();
1077     ref->senderData = buildReq->senderData;
1078     ref->errorCode = buildPtrP->m_errorCode;
1079 
1080     sendSignal(buildReq->senderRef, GSN_BUILD_INDX_IMPL_REF,
1081                signal, BuildIndxImplRef::SignalLength, JBB);
1082   }
1083 }
1084