1 /*
2    Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
23 */
24 
25 #define DBTUX_SCAN_CPP
26 #include "Dbtux.hpp"
27 #include "my_sys.h"
28 
29 #define JAM_FILE_ID 371
30 
31 /**
32  * To speed up query processing we calculate a number of variables
33  * as part of our context while processing scan operations.
34  *
35  * This method is called every time we come back from a real-time
36  * break from LQH to setup all needed context to scan a range in
37  * TUX.
38  *
39  * These variables are:
40  * --------------------
41  * c_ctx.scanPtr
42  *   This is the pointer and i-value of the scan record
43  *
44  * c_ctx.fragPtr
45  *   This is the pointer and i-value of the table fragment being
46  *   scanned, this is the fragment record in TUX.
47  *
48  * c_ctx.indexPtr
49  *   This is the pointer and i-value of the index fragment record
50  *   currently being scanned. There can be multiple indexes on one
51  *   fragment.
52  *
53  * The following variables are setup using the prepare_scan_bounds method:
54  * .......................................................................
55  * c_ctx.searchScanDataArray
56  *   This is a KeyDataArray object (NdbPack::DataArray) representing
57  *   the right part of the boundary of the range scan.
58  *
59  * c_ctx.searchScanBoundArray
60  *   This is the KeyBoundArray object (NdbPack::BoundArray) also
61  *   representing the right part of the boundary of the range scan.
62  *   It contains the above KeyDataArray and also the scan direction
63  *   (whether we are scanning ascending or descending).
64  * The above two are only set if the boundary has at least one
65  * column that is bounded. A full table scan with order would not
66  * have any boundary and those would not be set since
67  * c_ctx.scanBoundCnt is set to 0.
68  *
69  * c_ctx.keyAttrs
70  *   This is the pointer to the Attrinfo array used to read the key
71  *   values from TUP. It is calculated from information in the
72  *   index fragment record.
73  * c_ctx.descending
74  *   This represents information about ascending or descending scan
75  *   derived from the scan object.
76  * c_ctx.scanBoundCnt
77  *   This represents the number of columns involved in the boundary
78  *   condition the scan uses.
79  *
80  * The following variables are setup through the prepare_all_tup_ptrs method:
81  * ..........................................................................
82  * c_ctx.tupIndexFragPtr
83  *   This is a pointer that points to the index fragment record for the index
84  *   scanned within TUP. These TUP pointers are represented as Uint32* pointers
85  *   in TUX to avoid having to include Dbtup.hpp in TUX.
86  * c_ctx.tupIndexTablePtr
87  *   This is a pointer that points to the index table record within TUP.
88  * c_ctx.tupRealFragPtr
89  *   This is a pointer that points to the fragment record in TUP of the
90  *   table fragment being scanned.
91  * c_ctx.tupRealTablePtr
92  *   This is a pointer that points to the table record in TUP of the table
93  *   being scanned.
94  * c_ctx.tuxFixHeaderSize
95  *   This variable contains the header size of the tuples used for index
96  *   nodes. These index nodes are stored in special index tables in TUP.
97  * c_ctx.attrDataOffset
98  *   This variable contains the offset within the data part of the index
99  *   node where the actual node starts.
100  */
101 
102 inline static void
prefetch_scan_record_3(Uint32 * scan_ptr)103 prefetch_scan_record_3(Uint32* scan_ptr)
104 {
105   NDB_PREFETCH_WRITE(scan_ptr);
106   NDB_PREFETCH_WRITE(scan_ptr + 16);
107   NDB_PREFETCH_WRITE(scan_ptr + 32);
108 }
109 
110 void
prepare_scan_ctx(Uint32 scanPtrI)111 Dbtux::prepare_scan_ctx(Uint32 scanPtrI)
112 {
113   jamDebug();
114   FragPtr fragPtr;
115   ScanOpPtr scanPtr;
116   IndexPtr indexPtr;
117   scanPtr.i = scanPtrI;
118   ndbrequire(c_scanOpPool.getUncheckedPtrRW(scanPtr));
119   prefetch_scan_record_3((Uint32*)scanPtr.p);
120   c_ctx.scanPtr = scanPtr;
121   fragPtr.i = scanPtr.p->m_fragPtrI;
122   c_fragPool.getPtr(fragPtr);
123   indexPtr.i = fragPtr.p->m_indexId;
124   c_ctx.fragPtr = fragPtr;
125   c_indexPool.getPtr(indexPtr);
126   c_ctx.indexPtr = indexPtr;
127   prepare_scan_bounds(scanPtr.p, indexPtr.p);
128   prepare_all_tup_ptrs(c_ctx);
129   ndbrequire(Magic::check_ptr(scanPtr.p));
130 }
131 
132 /**
133  * We are preparing to call scanNext to move a scan forward
134  * since the scan stopped on a row that is now being deleted.
135  * At this point we have already called prepare_build_ctx.
136  * Thus we need only setup the
137  * c_ctx.scanPtr and the variables setup in the method
138  * prepare_scan_bounds. Even the c_ctx.keyAttrs isn't
139  * necessary (setup in prepare_scan_bounds), it is kept to
140  * avoid having to call an extra method in the more
141  * common path coming from prepare_scan_ctx.
142  *
143  * We cannot call this method when we are performing a
144  * multi-threaded index build operation. This can only
145  * happen during a restart and during a restart a node
146  * cannot execute any scan operation.
147  */
148 void
prepare_move_scan_ctx(ScanOpPtr scanPtr)149 Dbtux::prepare_move_scan_ctx(ScanOpPtr scanPtr)
150 {
151   Index *indexPtrP = c_ctx.indexPtr.p;
152   c_ctx.scanPtr = scanPtr;
153   prepare_scan_bounds(scanPtr.p, indexPtrP);
154 }
155 
156 /**
157  * This method is called either from building of an index
158  * or when updating an index from execTUX_MAINT_REQ. It sets
159  * up the variables needed index reorganisations. There is
160  * no scan boundary in this case, there is only a key boundary,
161  * but this is setup the caller of this method.
162  */
163 void
prepare_build_ctx(TuxCtx & ctx,FragPtr fragPtr)164 Dbtux::prepare_build_ctx(TuxCtx& ctx, FragPtr fragPtr)
165 {
166   IndexPtr indexPtr;
167   ctx.fragPtr = fragPtr;
168   indexPtr.i = fragPtr.p->m_indexId;
169   c_indexPool.getPtr(indexPtr);
170   ctx.indexPtr = indexPtr;
171   const Index& index = *indexPtr.p;
172   const DescHead& descHead = getDescHead(index);
173   const AttributeHeader* keyAttrs = getKeyAttrs(descHead);
174   ctx.keyAttrs = (Uint32*)keyAttrs;
175   prepare_all_tup_ptrs(ctx);
176 }
177 
178 /**
179  * This method is called from prepare_scan_ctx after a real-time break has
180  * happened and we need to setup the scan context again.
181  *
182  * It is also called at start of a fragment scan setup from
183  * execTUX_BOUND_INFO.
184  *
185  * We also need to call it before moving the scan ahead after a row was
186  * deleted while we were processing a scan on the tuple. This code calls
187  * scanNext and moves to the next row and thus we need to setup this part
188  * of the scan context there as well.
189  */
190 void
prepare_scan_bounds(const ScanOp * scanPtrP,const Index * indexPtrP)191 Dbtux::prepare_scan_bounds(const ScanOp *scanPtrP, const Index *indexPtrP)
192 {
193   jamDebug();
194   const ScanOp& scan = *scanPtrP;
195   const Index& index = *indexPtrP;
196 
197   const unsigned idir = scan.m_descending;
198   const ScanBound& scanBound = scan.m_scanBound[1 - idir];
199   if (likely(scanBound.m_cnt != 0))
200   {
201     jamDebug();
202     KeyDataC searchBoundData(index.m_keySpec, true);
203     KeyBoundC searchBound(searchBoundData);
204     unpackBound(c_ctx.c_nextKey, scanBound, searchBound);
205     KeyDataArray *key_data = new (&c_ctx.searchScanDataArray)
206                              KeyDataArray();
207     key_data->init_bound(searchBound, scanBound.m_cnt);
208     KeyBoundArray *searchBoundArray = new (&c_ctx.searchScanBoundArray)
209        KeyBoundArray(&index.m_keySpec,
210                      key_data,
211                      scanBound.m_side);
212     (void)searchBoundArray;
213   }
214   const DescHead& descHead = getDescHead(index);
215   const AttributeHeader* keyAttrs = getKeyAttrs(descHead);
216   c_ctx.keyAttrs = (Uint32*)keyAttrs;
217   c_ctx.descending = scan.m_descending;
218   c_ctx.scanBoundCnt = scanBound.m_cnt;
219 }
220 
221 
222 void
execACC_CHECK_SCAN(Signal * signal)223 Dbtux::execACC_CHECK_SCAN(Signal* signal)
224 {
225   jamEntryDebug();
226   const AccCheckScan *req = (const AccCheckScan*)signal->getDataPtr();
227   ScanOpPtr scanPtr = c_ctx.scanPtr;
228   ScanOp& scan = *scanPtr.p;
229   Frag& frag = *c_ctx.fragPtr.p;
230 #ifdef VM_TRACE
231   if (debugFlags & DebugScan) {
232     tuxDebugOut << "ACC_CHECK_SCAN scan " << scanPtr.i << " " << scan << endl;
233   }
234 #endif
235 
236   bool wait_scan_lock_record = check_freeScanLock(scan);
237   if (req->checkLcpStop == AccCheckScan::ZCHECK_LCP_STOP &&
238       (scan.m_lockwait ||
239        wait_scan_lock_record))
240   {
241     /**
242      * Go to sleep for one millisecond if we encounter a locked row.
243      * Or if we could not allocate a ScanLock record.
244      */
245     jam();
246     CheckLcpStop* cls = (CheckLcpStop*) signal->theData;
247     cls->scanPtrI = scan.m_userPtr;
248     if (wait_scan_lock_record)
249     {
250       jam();
251       cls->scanState = CheckLcpStop::ZSCAN_RESOURCE_WAIT_STOPPABLE;
252     }
253     else
254     {
255       jam();
256       cls->scanState = CheckLcpStop::ZSCAN_RESOURCE_WAIT;
257     }
258     EXECUTE_DIRECT(DBLQH, GSN_CHECK_LCP_STOP, signal, 2);
259     if (signal->theData[0] == CheckLcpStop::ZTAKE_A_BREAK)
260     {
261       jamEntry();
262       release_c_free_scan_lock();
263       return;
264     }
265     jamEntry();
266     ndbrequire(signal->theData[0] == CheckLcpStop::ZABORT_SCAN);
267     /* Fall through, we will send NEXT_SCANCONF, this will detect close */
268   }
269   continue_scan(signal, scanPtr, frag, wait_scan_lock_record);
270   ndbassert(c_freeScanLock == RNIL); // No ndbrequire, will destroy tail call
271 }
272 
273 /*
274  * Error handling:  Any seized scan op is released.  ACC_SCANREF is sent
275  * to LQH.  LQH sets error code, and treats this like ZEMPTY_FRAGMENT.
276  * Therefore scan is now closed on both sides.
277  */
278 void
execACC_SCANREQ(Signal * signal)279 Dbtux::execACC_SCANREQ(Signal* signal)
280 {
281   jamEntry();
282   const AccScanReq *req = (const AccScanReq*)signal->getDataPtr();
283   Uint32 errorCode = 0;
284   ScanOpPtr scanPtr;
285   scanPtr.i = RNIL;
286   do {
287     // get the index
288     IndexPtr indexPtr;
289     c_indexPool.getPtr(indexPtr, req->tableId);
290     // get the fragment
291     FragPtr fragPtr;
292     findFrag(jamBuffer(), *indexPtr.p, req->fragmentNo, fragPtr);
293     ndbrequire(fragPtr.i != RNIL);
294     Frag& frag = *fragPtr.p;
295     // check for index not Online (i.e. Dropping)
296     c_ctx.indexPtr = indexPtr;
297     c_ctx.fragPtr = fragPtr;
298     if (unlikely(indexPtr.p->m_state != Index::Online)) {
299       jam();
300 #ifdef VM_TRACE
301       if (debugFlags & (DebugMeta | DebugScan)) {
302         tuxDebugOut << "Index dropping at ACC_SCANREQ " << indexPtr.i
303                     << " " << *indexPtr.p << endl;
304       }
305 #endif
306       errorCode = AccScanRef::TuxIndexNotOnline;
307       break;
308     }
309     // must be normal DIH/TC fragment
310     TreeHead& tree = frag.m_tree;
311     // check for empty fragment
312     if (tree.m_root == NullTupLoc)
313     {
314       jam();
315       scanPtr.p = NULL;
316       c_ctx.scanPtr = scanPtr; // Ensure crash if we try to use pointer.
317       AccScanConf* const conf = (AccScanConf*)signal->getDataPtrSend();
318       conf->scanPtr = req->senderData;
319       conf->accPtr = RNIL;
320       conf->flag = AccScanConf::ZEMPTY_FRAGMENT;
321       signal->theData[8] = 0;
322       /* Return ACC_SCANCONF */
323       return;
324     }
325     // seize from pool and link to per-fragment list
326     if (ERROR_INSERTED(12008) ||
327         ! c_scanOpPool.seize(scanPtr)) {
328       CLEAR_ERROR_INSERT_VALUE;
329       jam();
330       // should never happen but can be used to test error handling
331       errorCode = AccScanRef::TuxNoFreeScanOp;
332       break;
333     }
334     frag.m_scanList.addFirst(scanPtr);
335     scanPtr.p->m_state = ScanOp::First;
336     scanPtr.p->m_userPtr = req->senderData;
337     scanPtr.p->m_userRef = req->senderRef;
338     scanPtr.p->m_tableId = indexPtr.p->m_tableId;
339     scanPtr.p->m_indexId = indexPtr.i;
340     scanPtr.p->m_fragId = fragPtr.p->m_fragId;
341     scanPtr.p->m_fragPtrI = fragPtr.i;
342     scanPtr.p->m_transId1 = req->transId1;
343     scanPtr.p->m_transId2 = req->transId2;
344     scanPtr.p->m_savePointId = req->savePointId;
345     scanPtr.p->m_readCommitted =
346       AccScanReq::getReadCommittedFlag(req->requestInfo);
347     scanPtr.p->m_lockMode = AccScanReq::getLockMode(req->requestInfo);
348     scanPtr.p->m_descending = AccScanReq::getDescendingFlag(req->requestInfo);
349     c_ctx.scanPtr = scanPtr;
350     /*
351      * readCommitted lockMode keyInfo
352      * 1 0 0 - read committed (no lock)
353      * 0 0 0 - read latest (read lock)
354      * 0 1 1 - read exclusive (write lock)
355      */
356     const bool isStatScan = AccScanReq::getStatScanFlag(req->requestInfo);
357     if (unlikely(isStatScan)) {
358       jam();
359       // Check if index stat can handle this index length
360       Uint32 indexMaxKeyBytes = indexPtr.p->m_keySpec.get_max_data_len(false);
361       if (indexMaxKeyBytes > (StatOp::MaxKeySize * 4)) {
362         jam();
363         errorCode = AccScanRef::TuxInvalidKeySize;
364         break;
365       }
366 
367       if (!scanPtr.p->m_readCommitted) {
368         jam();
369         errorCode = AccScanRef::TuxInvalidLockMode;
370         break;
371       }
372       StatOpPtr statPtr;
373       if (!c_statOpPool.seize(statPtr)) {
374         jam();
375         errorCode = AccScanRef::TuxNoFreeStatOp;
376         break;
377       }
378       scanPtr.p->m_statOpPtrI = statPtr.i;
379       new (statPtr.p) StatOp(*indexPtr.p);
380       statPtr.p->m_scanOpPtrI = scanPtr.i;
381       // rest of StatOp is initialized in execTUX_BOUND_INFO
382 #ifdef VM_TRACE
383       if (debugFlags & DebugStat) {
384         tuxDebugOut << "Seize stat op" << endl;
385       }
386 #endif
387     }
388 #ifdef VM_TRACE
389     if (debugFlags & DebugScan) {
390       tuxDebugOut << "Seize scan " << scanPtr.i << " " << *scanPtr.p << endl;
391     }
392 #endif
393     // conf
394     AccScanConf* const conf = (AccScanConf*)signal->getDataPtrSend();
395     conf->scanPtr = req->senderData;
396     conf->accPtr = scanPtr.i;
397     conf->flag = AccScanConf::ZNOT_EMPTY_FRAGMENT;
398     signal->theData[8] = 0;
399     /* Return ACC_SCANCONF */
400     return;
401   } while (0);
402   if (scanPtr.i != RNIL) {
403     jam();
404     releaseScanOp(scanPtr);
405   }
406   // ref
407   ndbrequire(errorCode != 0);
408   signal->theData[8] = errorCode;
409   /* Return ACC_SCANREF */
410 }
411 
412 /*
413  * Receive bounds for scan in single direct call.  The bounds can arrive
414  * in any order.  Attribute ids are those of index table.
415  *
416  * Replace EQ by equivalent LE + GE.  Check for conflicting bounds.
417  * Check that sets of lower and upper bounds are on initial sequences of
418  * keys and that all but possibly last bound is non-strict.
419  *
420  * Finally convert the sets of lower and upper bounds (i.e. start key
421  * and end key) to NdbPack format.  The data is saved in segmented
422  * memory.  The bound is reconstructed at use time via unpackBound().
423  *
424  * Error handling:  Error code is set in the scan and also returned in
425  * EXECUTE_DIRECT (the old way).
426  */
427 void
execTUX_BOUND_INFO(Signal * signal)428 Dbtux::execTUX_BOUND_INFO(Signal* signal)
429 {
430   jamEntry();
431   // get records
432   TuxBoundInfo* const req = (TuxBoundInfo*)signal->getDataPtrSend();
433   ScanOpPtr scanPtr = c_ctx.scanPtr;
434   ScanOp& scan = *scanPtr.p;
435   const Index& index = *c_ctx.indexPtr.p;
436 
437   // compiler warning unused: const DescHead& descHead = getDescHead(index);
438   // compiler warning unused: const KeyType* keyTypes = getKeyTypes(descHead);
439   // data passed in Signal
440   const Uint32* const boundData = &req->data[0];
441   Uint32 boundLen = req->boundAiLength;
442   Uint32 boundOffset = 0;
443   // initialize stats scan
444   if (unlikely(scan.m_statOpPtrI != RNIL))
445   {
446     // stats options before bounds
447     StatOpPtr statPtr;
448     statPtr.i = scan.m_statOpPtrI;
449     c_statOpPool.getPtr(statPtr);
450     Uint32 usedLen = 0;
451     if (unlikely(statScanInit(statPtr, boundData, boundLen, &usedLen) == -1))
452     {
453       jam();
454       ndbrequire(scan.m_errorCode != 0);
455       req->errorCode = scan.m_errorCode;
456       return;
457     }
458     ndbrequire(usedLen <= boundLen);
459     boundLen -= usedLen;
460     boundOffset += usedLen;
461   }
462   // extract lower and upper bound in separate passes
463   for (unsigned idir = 0; idir <= 1; idir++)
464   {
465     jamDebug();
466     struct BoundInfo {
467       int type2;      // with EQ -> LE/GE
468       Uint32 offset;  // word offset in signal data
469       Uint32 bytes;
470     };
471     BoundInfo boundInfo[MaxIndexAttributes];
472     // largest attrId seen plus one
473     Uint32 maxAttrId = 0;
474     const Uint32* const data = &boundData[boundOffset];
475     Uint32 offset = 0;
476     while (offset + 2 <= boundLen) {
477       jamDebug();
478       const Uint32 type = data[offset];
479       const AttributeHeader* ah = (const AttributeHeader*)&data[offset + 1];
480       const Uint32 attrId = ah->getAttributeId();
481       const Uint32 byteSize = ah->getByteSize();
482       const Uint32 dataSize = ah->getDataSize();
483       // check type
484       if (unlikely(type > 4))
485       {
486         jam();
487         scan.m_errorCode = TuxBoundInfo::InvalidAttrInfo;
488         req->errorCode = scan.m_errorCode;
489         return;
490       }
491       Uint32 type2 = type;
492       if (type2 == 4)
493       {
494         jamDebug();
495         type2 = (idir << 1); // LE=0 GE=2
496       }
497       // check if attribute belongs to this bound
498       if ((type2 & 0x2) == (idir << 1))
499       {
500         if (unlikely(attrId >= index.m_numAttrs))
501         {
502           jam();
503           scan.m_errorCode = TuxBoundInfo::InvalidAttrInfo;
504           req->errorCode = scan.m_errorCode;
505           return;
506         }
507         // mark entries in any gap as undefined
508         while (maxAttrId <= attrId)
509         {
510           jamDebug();
511           BoundInfo& b = boundInfo[maxAttrId];
512           b.type2 = -1;
513           maxAttrId++;
514         }
515         BoundInfo& b = boundInfo[attrId];
516         // duplicate no longer allowed (wl#4163)
517         if (unlikely(b.type2 != -1))
518         {
519           jam();
520           scan.m_errorCode = TuxBoundInfo::InvalidBounds;
521           req->errorCode = scan.m_errorCode;
522           return;
523         }
524         b.type2 = (int)type2;
525         b.offset = offset + 1; // poai
526         b.bytes = byteSize;
527       }
528       // jump to next
529       offset += 2 + dataSize;
530     }
531     if (unlikely(offset != boundLen))
532     {
533       jam();
534       scan.m_errorCode = TuxBoundInfo::InvalidAttrInfo;
535       req->errorCode = scan.m_errorCode;
536       return;
537     }
538     // check and pack the bound data
539     KeyData searchBoundData(index.m_keySpec, true, 0);
540     KeyBound searchBound(searchBoundData);
541     searchBoundData.set_buf(c_ctx.c_searchKey, MaxAttrDataSize << 2);
542     int strict = 0; // 0 or 1
543     Uint32 i;
544     for (i = 0; i < maxAttrId; i++)
545     {
546       jamDebug();
547       const BoundInfo& b = boundInfo[i];
548        // check for gap or strict bound before last
549        strict = (b.type2 & 0x1);
550        if (unlikely(b.type2 == -1 || (i + 1 < maxAttrId && strict)))
551        {
552          jam();
553          scan.m_errorCode = TuxBoundInfo::InvalidBounds;
554          req->errorCode = scan.m_errorCode;
555          return;
556        }
557        Uint32 len;
558        if (unlikely(searchBoundData.add_poai(&data[b.offset], &len) == -1 ||
559            b.bytes != len))
560        {
561          jam();
562          scan.m_errorCode = TuxBoundInfo::InvalidCharFormat;
563          req->errorCode = scan.m_errorCode;
564          return;
565        }
566     }
567     int side = 0;
568     if (maxAttrId != 0)
569     {
570       // arithmetic is faster
571       // side = (idir == 0 ? (strict ? +1 : -1) : (strict ? -1 : +1));
572       side = (-1) * (1 - 2 * strict) * (1 - 2 * int(idir));
573     }
574     if (unlikely(searchBound.finalize(side) == -1))
575     {
576       jam();
577       scan.m_errorCode = TuxBoundInfo::InvalidCharFormat;
578       req->errorCode = scan.m_errorCode;
579       return;
580     }
581     ScanBound& scanBound = scan.m_scanBound[idir];
582     scanBound.m_cnt = maxAttrId;
583     scanBound.m_side = side;
584     // save data words in segmented memory
585     {
586       ScanBoundBuffer::Head& head = scanBound.m_head;
587       LocalScanBoundBuffer b(c_scanBoundPool, head);
588       const Uint32* data = (const Uint32*)searchBoundData.get_data_buf();
589       Uint32 size = (searchBoundData.get_data_len() + 3) / 4;
590       bool ok = b.append(data, size);
591       if (unlikely(!ok))
592       {
593         jam();
594         scan.m_errorCode = TuxBoundInfo::OutOfBuffers;
595         req->errorCode = scan.m_errorCode;
596         return;
597       }
598     }
599   }
600   if (ERROR_INSERTED(12009)) {
601     jam();
602     CLEAR_ERROR_INSERT_VALUE;
603     scan.m_errorCode = TuxBoundInfo::InvalidBounds;
604     req->errorCode = scan.m_errorCode;
605     return;
606   }
607   prepare_scan_bounds(scanPtr.p, c_ctx.indexPtr.p);
608   prepare_all_tup_ptrs(c_ctx);
609   // no error
610   req->errorCode = 0;
611 }
612 
613 void
execNEXT_SCANREQ(Signal * signal)614 Dbtux::execNEXT_SCANREQ(Signal* signal)
615 {
616   const NextScanReq *req = (const NextScanReq*)signal->getDataPtr();
617   ScanOp& scan = *c_ctx.scanPtr.p;
618   Frag& frag = *c_ctx.fragPtr.p;
619   Uint32 scanFlag = req->scanFlag;
620 #ifdef VM_TRACE
621   if (debugFlags & DebugScan) {
622     tuxDebugOut << "NEXT_SCANREQ scan " << c_ctx.scanPtr.i << " "
623                 << scan << endl;
624   }
625 #endif
626   // handle unlock previous and close scan
627   switch (scanFlag) {
628   case NextScanReq::ZSCAN_NEXT:
629     jamDebug();
630     break;
631   case NextScanReq::ZSCAN_COMMIT:
632     jamDebug();
633     // Fall through
634   case NextScanReq::ZSCAN_NEXT_COMMIT:
635     jamDebug();
636     if (! scan.m_readCommitted)
637     {
638       jam();
639       Uint32 accOperationPtr = req->accOperationPtr;
640       AccLockReq* const lockReq = (AccLockReq*)signal->getDataPtrSend();
641       lockReq->returnCode = RNIL;
642       lockReq->requestInfo = AccLockReq::Unlock;
643       lockReq->accOpPtr = accOperationPtr;
644       EXECUTE_DIRECT(DBACC, GSN_ACC_LOCKREQ, signal, AccLockReq::UndoSignalLength);
645       jamEntryDebug();
646       ndbrequire(lockReq->returnCode == AccLockReq::Success);
647       removeAccLockOp(c_ctx.scanPtr, accOperationPtr);
648     }
649     if (scanFlag == NextScanReq::ZSCAN_COMMIT)
650     {
651       jamDebug();
652       signal->theData[0] = 0; /* Success */
653       /**
654        * Return with signal->theData[0] = 0 means a return
655        * signal NEXT_SCANCONF for NextScanReq::ZSCAN_COMMIT
656        */
657       return;
658     }
659     break;
660   case NextScanReq::ZSCAN_CLOSE:
661     jamDebug();
662     // unlink from tree node first to avoid state changes
663     if (scan.m_scanPos.m_loc != NullTupLoc)
664     {
665       jam();
666       const TupLoc loc = scan.m_scanPos.m_loc;
667       NodeHandle node(frag);
668       selectNode(c_ctx, node, loc);
669       unlinkScan(node, c_ctx.scanPtr);
670       scan.m_scanPos.m_loc = NullTupLoc;
671     }
672     if (unlikely(scan.m_lockwait))
673     {
674       jam();
675       ndbrequire(scan.m_accLockOp != RNIL);
676       // use ACC_ABORTCONF to flush out any reply in job buffer
677       AccLockReq* const lockReq = (AccLockReq*)signal->getDataPtrSend();
678       lockReq->returnCode = RNIL;
679       lockReq->requestInfo = AccLockReq::AbortWithConf;
680       lockReq->accOpPtr = scan.m_accLockOp;
681       EXECUTE_DIRECT(DBACC, GSN_ACC_LOCKREQ, signal,
682 		     AccLockReq::UndoSignalLength);
683       jamEntry();
684       ndbrequire(lockReq->returnCode == AccLockReq::Success);
685       scan.m_state = ScanOp::Aborting;
686       return;
687     }
688     if (scan.m_state == ScanOp::Locked)
689     {
690       jam();
691       ndbrequire(scan.m_accLockOp != RNIL);
692       AccLockReq* const lockReq = (AccLockReq*)signal->getDataPtrSend();
693       lockReq->returnCode = RNIL;
694       lockReq->requestInfo = AccLockReq::Abort;
695       lockReq->accOpPtr = scan.m_accLockOp;
696       EXECUTE_DIRECT(DBACC, GSN_ACC_LOCKREQ, signal,
697 		     AccLockReq::UndoSignalLength);
698       jamEntry();
699       ndbrequire(lockReq->returnCode == AccLockReq::Success);
700       scan.m_accLockOp = RNIL;
701     }
702     scan.m_state = ScanOp::Aborting;
703     scanClose(signal, c_ctx.scanPtr);
704     return;
705   case NextScanReq::ZSCAN_NEXT_ABORT:
706     ndbabort();
707   default:
708     jam();
709     ndbabort();
710   }
711   bool wait_scan_lock_record = check_freeScanLock(scan);
712   continue_scan(signal, c_ctx.scanPtr, frag, wait_scan_lock_record);
713   ndbassert(c_freeScanLock == RNIL); // No ndbrequire, will destroy tail call
714 }
715 
716 void
continue_scan(Signal * signal,ScanOpPtr scanPtr,Frag & frag,bool wait_scan_lock_record)717 Dbtux::continue_scan(Signal *signal,
718                      ScanOpPtr scanPtr,
719                      Frag& frag,
720                      bool wait_scan_lock_record)
721 {
722   ScanOp& scan = *scanPtr.p;
723 #ifdef VM_TRACE
724   if (debugFlags & DebugScan) {
725     tuxDebugOut << "ACC_CHECK_SCAN scan " << scanPtr.i << " " << scan << endl;
726   }
727 #endif
728   const Index& index = *c_ctx.indexPtr.p;
729   if (unlikely(scan.m_lockwait || wait_scan_lock_record))
730   {
731     jam();
732     /**
733      * LQH asks if we are waiting for lock and we tell it to ask again
734      * Used to check if TC has ordered close both in situations where we
735      * cannot allocate a lock record and when we encountered a locked row.
736      */
737     release_c_free_scan_lock();
738     NextScanConf* const conf = (NextScanConf*)signal->getDataPtrSend();
739     conf->scanPtr = scan.m_userPtr;
740     conf->accOperationPtr = RNIL;       // no tuple returned
741     conf->fragId = frag.m_fragId;
742     // if TC has ordered scan close, it will be detected here
743     sendSignal(scan.m_userRef,
744                GSN_NEXT_SCANCONF,
745                signal,
746                NextScanConf::SignalLengthNoTuple,
747                JBB);
748     return;     // stop
749   }
750   // check index online
751   if (unlikely(index.m_state != Index::Online) &&
752       scan.m_errorCode == 0)
753   {
754     jam();
755 #ifdef VM_TRACE
756     if (debugFlags & (DebugMeta | DebugScan)) {
757       tuxDebugOut << "Index dropping at execACC_CHECK_SCAN " << scanPtr.i
758                   << " " << *scanPtr.p << endl;
759     }
760 #endif
761     scan.m_errorCode = AccScanRef::TuxIndexNotOnline;
762   }
763   if (unlikely(scan.m_errorCode != 0))
764   {
765     jamDebug();
766     release_c_free_scan_lock();
767     NextScanConf* const conf = (NextScanConf*)signal->getDataPtrSend();
768     conf->scanPtr = scan.m_userPtr;
769     conf->accOperationPtr = RNIL;
770     conf->fragId = RNIL;
771     signal->setLength(NextScanConf::SignalLengthNoTuple);
772     c_lqh->exec_next_scan_conf(signal);
773     return;
774   }
775   if (scan.m_state == ScanOp::First)
776   {
777     jamDebug();
778     // search is done only once in single range scan
779     scanFirst(scanPtr, frag, index);
780   }
781   if (scan.m_state == ScanOp::Current ||
782       scan.m_state == ScanOp::Next)
783   {
784     jamDebug();
785     // look for next
786     scanFind(scanPtr, frag);
787   }
788   // for reading tuple key in Found or Locked state
789   Uint32* pkData = c_ctx.c_dataBuffer;
790   unsigned pkSize = 0; // indicates not yet done
791   if (likely(scan.m_state == ScanOp::Found))
792   {
793     // found an entry to return
794     jamDebug();
795     ndbrequire(scan.m_accLockOp == RNIL);
796     if (unlikely(! scan.m_readCommitted))
797     {
798       jamDebug();
799       const TreeEnt ent = scan.m_scanEnt;
800       // read tuple key
801       readTablePk(ent, pkData, pkSize);
802       // get read lock or exclusive lock
803       AccLockReq* const lockReq = (AccLockReq*)signal->getDataPtrSend();
804       lockReq->returnCode = RNIL;
805       lockReq->requestInfo =
806         scan.m_lockMode == 0 ? AccLockReq::LockShared : AccLockReq::LockExclusive;
807       lockReq->accOpPtr = RNIL;
808       lockReq->userPtr = scanPtr.i;
809       lockReq->userRef = reference();
810       lockReq->tableId = scan.m_tableId;
811       lockReq->fragId = frag.m_fragId;
812       lockReq->fragPtrI = frag.m_accTableFragPtrI;
813       const Uint32* const buf32 = static_cast<Uint32*>(pkData);
814       const Uint64* const buf64 = reinterpret_cast<const Uint64*>(buf32);
815       lockReq->hashValue = md5_hash(buf64, pkSize);
816       Uint32 lkey1, lkey2;
817       getTupAddr(frag, ent, lkey1, lkey2);
818       lockReq->page_id = lkey1;
819       lockReq->page_idx = lkey2;
820       lockReq->transId1 = scan.m_transId1;
821       lockReq->transId2 = scan.m_transId2;
822       lockReq->isCopyFragScan = ZFALSE;
823       // execute
824       EXECUTE_DIRECT(DBACC, GSN_ACC_LOCKREQ, signal, AccLockReq::LockSignalLength);
825       jamEntryDebug();
826       switch (lockReq->returnCode)
827       {
828       case AccLockReq::Success:
829       {
830         jam();
831         scan.m_state = ScanOp::Locked;
832         scan.m_accLockOp = lockReq->accOpPtr;
833 #ifdef VM_TRACE
834         if (debugFlags & (DebugScan | DebugLock))
835         {
836           tuxDebugOut << "Lock immediate scan " << scanPtr.i << " "
837                       << scan << endl;
838         }
839 #endif
840         break;
841       }
842       case AccLockReq::IsBlocked:
843       {
844         jam();
845         // normal lock wait
846         scan.m_state = ScanOp::Blocked;
847         scan.m_lockwait = true;
848         scan.m_accLockOp = lockReq->accOpPtr;
849 #ifdef VM_TRACE
850         if (debugFlags & (DebugScan | DebugLock))
851         {
852           tuxDebugOut << "Lock wait scan " << scanPtr.i << " " << scan << endl;
853         }
854 #endif
855         // LQH will wake us up
856         CheckLcpStop* cls = (CheckLcpStop*) signal->theData;
857         cls->scanPtrI = scan.m_userPtr;
858         cls->scanState = CheckLcpStop::ZSCAN_RESOURCE_WAIT;
859         EXECUTE_DIRECT(DBLQH, GSN_CHECK_LCP_STOP, signal, 2);
860         if (signal->theData[0] == CheckLcpStop::ZTAKE_A_BREAK)
861         {
862           jamEntry();
863           /* Normal path */
864           release_c_free_scan_lock();
865           return; // stop for a while
866         }
867         jamEntry();
868         /* DBTC has most likely aborted due to timeout */
869         ndbrequire(signal->theData[0] == CheckLcpStop::ZABORT_SCAN);
870         /* Ensure that we send NEXT_SCANCONF immediately to close */
871         scan.m_state = ScanOp::Last;
872         break;
873       }
874       case AccLockReq::Refused:
875       {
876         jam();
877         // we cannot see deleted tuple (assert only)
878         g_eventLogger->info("(%u) Refused tab(%u,%u) row(%u,%u)",
879                             instance(),
880                             scan.m_tableId,
881                             frag.m_fragId,
882                             lkey1,
883                             lkey2);
884         ndbassert(false);
885         // skip it
886         scan.m_state = ScanOp::Next;
887         CheckLcpStop* cls = (CheckLcpStop*) signal->theData;
888         cls->scanPtrI = scan.m_userPtr;
889         cls->scanState = CheckLcpStop::ZSCAN_RESOURCE_WAIT;
890         EXECUTE_DIRECT(DBLQH, GSN_CHECK_LCP_STOP, signal, 2);
891         if (signal->theData[0] == CheckLcpStop::ZTAKE_A_BREAK)
892         {
893           jamEntry();
894           /* Normal path */
895           release_c_free_scan_lock();
896           return; // stop for a while
897         }
898         jamEntry();
899         /* DBTC has most likely aborted due to timeout */
900         ndbrequire(signal->theData[0] == CheckLcpStop::ZABORT_SCAN);
901         /* Ensure that we send NEXT_SCANCONF immediately to close */
902         scan.m_state = ScanOp::Last;
903         break;
904       }
905       case AccLockReq::NoFreeOp:
906       {
907         jam();
908         // stay in Found state
909         scan.m_state = ScanOp::Found;
910         CheckLcpStop* cls = (CheckLcpStop*) signal->theData;
911         cls->scanPtrI = scan.m_userPtr;
912         cls->scanState = CheckLcpStop::ZSCAN_RESOURCE_WAIT_STOPPABLE;
913         EXECUTE_DIRECT(DBLQH, GSN_CHECK_LCP_STOP, signal, 2);
914         if (signal->theData[0] == CheckLcpStop::ZTAKE_A_BREAK)
915         {
916           jamEntry();
917           /* Normal path */
918           release_c_free_scan_lock();
919           return; // stop for a while
920         }
921         jamEntry();
922         ndbrequire(signal->theData[0] == CheckLcpStop::ZABORT_SCAN);
923         /* Ensure that we send NEXT_SCANCONF immediately to close */
924         scan.m_state = ScanOp::Last;
925         break;
926       }
927       default:
928         ndbabort();
929       }
930     }
931     else
932     {
933       scan.m_state = ScanOp::Locked;
934     }
935   }
936   else if (scan.m_state == ScanOp::Next)
937   {
938     jam();
939     // Taking a break from searching the tree
940     release_c_free_scan_lock();
941     CheckLcpStop* cls = (CheckLcpStop*) signal->theData;
942     cls->scanPtrI = scan.m_userPtr;
943     cls->scanState = CheckLcpStop::ZSCAN_RUNNABLE_YIELD;
944     EXECUTE_DIRECT(DBLQH, GSN_CHECK_LCP_STOP, signal, 2);
945     jam();
946     ndbassert(signal->theData[0] == CheckLcpStop::ZTAKE_A_BREAK);
947     return;
948   }
949   if (likely(scan.m_state == ScanOp::Locked))
950   {
951     // we have lock or do not need one
952     jamDebug();
953     // read keys if not already done (uses signal)
954     const TreeEnt ent = scan.m_scanEnt;
955     // conf signal
956     NextScanConf* const conf = (NextScanConf*)signal->getDataPtrSend();
957     conf->scanPtr = scan.m_userPtr;
958     // the lock is passed to LQH
959     Uint32 accLockOp = scan.m_accLockOp;
960     if (unlikely(accLockOp != RNIL))
961     {
962       scan.m_accLockOp = RNIL;
963       // remember it until LQH unlocks it
964       addAccLockOp(scanPtr, accLockOp);
965     }
966     else
967     {
968       ndbrequire(scan.m_readCommitted);
969       // operation RNIL in LQH would signal no tuple returned
970       accLockOp = (Uint32)-1;
971     }
972     ndbrequire(c_freeScanLock == RNIL);
973     conf->accOperationPtr = accLockOp;
974     conf->fragId = frag.m_fragId;
975     const TupLoc tupLoc = ent.m_tupLoc;
976     Uint32 lkey1 = tupLoc.getPageId();
977     Uint32 lkey2 = tupLoc.getPageOffset();
978     conf->localKey[0] = lkey1;
979     conf->localKey[1] = lkey2;
980     /**
981      * We can arrive here from a delayed CONTINUEB signal from
982      * LQH when we are waiting for a locked row and we now
983      * acquired the lock. To ensure that we have properly
984      * setup for execution of execTUPKEYREQ we call
985      * prepare_scan_tux_TUPKEYREQ here even if we already did
986      * it from ACC. Also needed to ensure proper operation of
987      * ndbassert's in debug mode.
988      */
989     c_tup->prepare_scan_tux_TUPKEYREQ(lkey1, lkey2);
990     // add key info
991     // next time look for next entry
992     scan.m_state = ScanOp::Next;
993     signal->setLength(NextScanConf::SignalLengthNoGCI);
994     c_lqh->exec_next_scan_conf(signal);
995     return;
996   }
997   // In ACC this is checked before req->checkLcpStop
998   if (scan.m_state == ScanOp::Last)
999   {
1000     jamDebug();
1001     release_c_free_scan_lock();
1002     NextScanConf* const conf = (NextScanConf*)signal->getDataPtrSend();
1003     conf->scanPtr = scan.m_userPtr;
1004     conf->accOperationPtr = RNIL;
1005     conf->fragId = RNIL;
1006     signal->setLength(NextScanConf::SignalLengthNoTuple);
1007     c_lqh->exec_next_scan_conf(signal);
1008     return;
1009   }
1010   ndbabort();
1011 }
1012 
1013 /*
1014  * Lock succeeded (after delay) in ACC.  If the lock is for current
1015  * entry, set state to Locked.  If the lock is for an entry we were
1016  * moved away from, simply unlock it.  Finally, if we are closing the
1017  * scan, do nothing since we have already sent an abort request.
1018  */
1019 void
execACCKEYCONF(Signal * signal)1020 Dbtux::execACCKEYCONF(Signal* signal)
1021 {
1022   jamEntry();
1023   ScanOpPtr scanPtr;
1024   scanPtr.i = signal->theData[0];
1025   ndbrequire(c_scanOpPool.getValidPtr(scanPtr));
1026   ScanOp& scan = *scanPtr.p;
1027 #ifdef VM_TRACE
1028   if (debugFlags & (DebugScan | DebugLock)) {
1029     tuxDebugOut << "Lock obtained scan " << scanPtr.i << " " << scan << endl;
1030   }
1031 #endif
1032   ndbrequire(scan.m_lockwait && scan.m_accLockOp != RNIL);
1033   scan.m_lockwait = false;
1034   if (scan.m_state == ScanOp::Blocked) {
1035     // the lock wait was for current entry
1036     jam();
1037     scan.m_state = ScanOp::Locked;
1038     // LQH has the ball
1039     return;
1040   }
1041   if (scan.m_state != ScanOp::Aborting) {
1042     // we were moved, release lock
1043     jam();
1044     AccLockReq* const lockReq = (AccLockReq*)signal->getDataPtrSend();
1045     lockReq->returnCode = RNIL;
1046     lockReq->requestInfo = AccLockReq::Abort;
1047     lockReq->accOpPtr = scan.m_accLockOp;
1048     EXECUTE_DIRECT(DBACC, GSN_ACC_LOCKREQ, signal, AccLockReq::UndoSignalLength);
1049     jamEntry();
1050     ndbrequire(lockReq->returnCode == AccLockReq::Success);
1051     scan.m_accLockOp = RNIL;
1052     // LQH has the ball
1053     return;
1054   }
1055   // lose the lock
1056   scan.m_accLockOp = RNIL;
1057   // continue at ACC_ABORTCONF
1058 }
1059 
1060 /*
1061  * Lock failed (after delay) in ACC.  Probably means somebody ahead of
1062  * us in lock queue deleted the tuple.
1063  */
1064 void
execACCKEYREF(Signal * signal)1065 Dbtux::execACCKEYREF(Signal* signal)
1066 {
1067   jamEntry();
1068   ScanOpPtr scanPtr;
1069   scanPtr.i = signal->theData[0];
1070   ndbrequire(c_scanOpPool.getValidPtr(scanPtr));
1071   ScanOp& scan = *scanPtr.p;
1072 #ifdef VM_TRACE
1073   if (debugFlags & (DebugScan | DebugLock)) {
1074     tuxDebugOut << "Lock refused scan " << scanPtr.i << " " << scan << endl;
1075   }
1076 #endif
1077   ndbrequire(scan.m_lockwait && scan.m_accLockOp != RNIL);
1078   scan.m_lockwait = false;
1079   if (scan.m_state != ScanOp::Aborting) {
1080     jam();
1081     // release the operation
1082     AccLockReq* const lockReq = (AccLockReq*)signal->getDataPtrSend();
1083     lockReq->returnCode = RNIL;
1084     lockReq->requestInfo = AccLockReq::Abort;
1085     lockReq->accOpPtr = scan.m_accLockOp;
1086     EXECUTE_DIRECT(DBACC, GSN_ACC_LOCKREQ, signal, AccLockReq::UndoSignalLength);
1087     jamEntry();
1088     ndbrequire(lockReq->returnCode == AccLockReq::Success);
1089     scan.m_accLockOp = RNIL;
1090     // scan position should already have been moved (assert only)
1091     if (scan.m_state == ScanOp::Blocked) {
1092       jam();
1093       // can happen when Dropping
1094 #ifdef VM_TRACE
1095       const Frag& frag = *c_fragPool.getPtr(scan.m_fragPtrI);
1096       const Index& index = *c_indexPool.getPtr(frag.m_indexId);
1097       ndbassert(index.m_state != Index::Online);
1098 #endif
1099       scan.m_state = ScanOp::Next;
1100     }
1101     // LQH has the ball
1102     return;
1103   }
1104   // lose the lock
1105   scan.m_accLockOp = RNIL;
1106   // continue at ACC_ABORTCONF
1107 }
1108 
1109 /*
1110  * Received when scan is closing.  This signal arrives after any
1111  * ACCKEYCONF or ACCKEYREF which may have been in job buffer.
1112  */
1113 void
execACC_ABORTCONF(Signal * signal)1114 Dbtux::execACC_ABORTCONF(Signal* signal)
1115 {
1116   jamEntry();
1117   ScanOpPtr scanPtr;
1118   scanPtr.i = signal->theData[0];
1119   ndbrequire(c_scanOpPool.getValidPtr(scanPtr));
1120   ScanOp& scan = *scanPtr.p;
1121 #ifdef VM_TRACE
1122   if (debugFlags & (DebugScan | DebugLock)) {
1123     tuxDebugOut << "ACC_ABORTCONF scan " << scanPtr.i << " " << scan << endl;
1124   }
1125 #endif
1126   c_lqh->setup_scan_pointers(scan.m_userPtr);
1127   ndbrequire(scan.m_state == ScanOp::Aborting);
1128   // most likely we are still in lock wait
1129   if (scan.m_lockwait) {
1130     jam();
1131     scan.m_lockwait = false;
1132     scan.m_accLockOp = RNIL;
1133   }
1134   scanClose(signal, scanPtr);
1135 }
1136 
1137 /*
1138  * Find start position for single range scan.
1139  */
1140 void
scanFirst(ScanOpPtr scanPtr,Frag & frag,const Index & index)1141 Dbtux::scanFirst(ScanOpPtr scanPtr, Frag& frag, const Index& index)
1142 {
1143   ScanOp& scan = *scanPtr.p;
1144   // scan direction 0, 1
1145   const unsigned idir = c_ctx.descending;
1146   // set up bound from segmented memory
1147   const ScanBound& scanBound = scan.m_scanBound[idir];
1148   KeyDataC searchBoundData(index.m_keySpec, true);
1149   KeyBoundC searchBound(searchBoundData);
1150   unpackBound(c_ctx.c_searchKey, scanBound, searchBound);
1151 
1152   KeyDataArray *key_data = new (&c_ctx.searchKeyDataArray)
1153                            KeyDataArray();
1154   key_data->init_bound(searchBound, scanBound.m_cnt);
1155   KeyBoundArray *searchBoundArray = new (&c_ctx.searchKeyBoundArray)
1156     KeyBoundArray(&index.m_keySpec,
1157                   &c_ctx.searchKeyDataArray,
1158                   scanBound.m_side);
1159 
1160   TreePos treePos;
1161   searchToScan(frag, idir, *searchBoundArray, treePos);
1162   if (likely(treePos.m_loc != NullTupLoc))
1163   {
1164     scan.m_scanPos = treePos;
1165     // link the scan to node found
1166     NodeHandle node(frag);
1167     selectNode(c_ctx, node, treePos.m_loc);
1168     linkScan(node, scanPtr);
1169     if (likely(treePos.m_dir == 3))
1170     {
1171       jamDebug();
1172       // check upper bound
1173       TreeEnt ent = node.getEnt(treePos.m_pos);
1174       const TupLoc tupLoc = ent.m_tupLoc;
1175       jamDebug();
1176       c_tup->prepare_scan_tux_TUPKEYREQ(tupLoc.getPageId(),
1177                                         tupLoc.getPageOffset());
1178       jamDebug();
1179       if (unlikely(scanCheck(scan, ent)))
1180       {
1181         jamDebug();
1182         c_ctx.m_current_ent = ent;
1183         scan.m_state = ScanOp::Current;
1184       }
1185       else
1186       {
1187         jamDebug();
1188         scan.m_state = ScanOp::Last;
1189       }
1190     } else {
1191       jamDebug();
1192       scan.m_state = ScanOp::Next;
1193     }
1194   } else {
1195     jamDebug();
1196     scan.m_state = ScanOp::Last;
1197   }
1198 }
1199 
1200 /*
1201  * Look for entry to return as scan result.
1202  */
1203 void
scanFind(ScanOpPtr scanPtr,Frag & frag)1204 Dbtux::scanFind(ScanOpPtr scanPtr, Frag& frag)
1205 {
1206   ScanOp& scan = *scanPtr.p;
1207   Uint32 scan_state = scan.m_state;
1208   ndbassert(scan_state == ScanOp::Current || scan_state == ScanOp::Next);
1209   while (1)
1210   {
1211     jamDebug();
1212     if (scan_state == ScanOp::Next)
1213     {
1214       scan_state = scanNext(scanPtr, false, frag);
1215     }
1216     else
1217     {
1218       jamDebug();
1219       ndbrequire(scan_state == ScanOp::Current);
1220       const TreePos treePos = scan.m_scanPos;
1221       NodeHandle node(frag);
1222       selectNode(c_ctx, node, treePos.m_loc);
1223       TreeEnt ent = node.getEnt(treePos.m_pos);
1224       const TupLoc tupLoc = ent.m_tupLoc;
1225       c_tup->prepare_scan_tux_TUPKEYREQ(tupLoc.getPageId(),
1226                                         tupLoc.getPageOffset());
1227       c_ctx.m_current_ent = ent;
1228     }
1229     Uint32 statOpPtrI = scan.m_statOpPtrI;
1230     if (likely(scan_state == ScanOp::Current))
1231     {
1232       jamDebug();
1233       const TreeEnt ent = c_ctx.m_current_ent;
1234       if (likely(statOpPtrI == RNIL))
1235       {
1236         if (likely(scanVisible(scan, ent)))
1237         {
1238           jamDebug();
1239           scan.m_state = ScanOp::Found;
1240           scan.m_scanEnt = ent;
1241           break;
1242         }
1243       }
1244       else
1245       {
1246         StatOpPtr statPtr;
1247         statPtr.i = statOpPtrI;
1248         c_statOpPool.getPtr(statPtr);
1249         // report row to stats, returns true if a sample is available
1250         int ret = statScanAddRow(statPtr, ent);
1251         if (ret == 1)
1252         {
1253           jam();
1254           scan.m_state = ScanOp::Found;
1255           // may not access non-pseudo cols but must return valid ent
1256           scan.m_scanEnt = ent;
1257           break;
1258         }
1259         else if (ret == 2)
1260         {
1261           // take a break
1262           jam();
1263           scan.m_state = ScanOp::Next;
1264           scan.m_scanEnt = ent;
1265           break;
1266         }
1267       }
1268     }
1269     else
1270     {
1271       jamDebug();
1272       break;
1273     }
1274     scan.m_state = scan_state = ScanOp::Next;
1275   }
1276 }
1277 
1278 /*
1279  * Move to next entry.  The scan is already linked to some node.  When
1280  * we leave, if an entry was found, it will be linked to a possibly
1281  * different node.  The scan has a position, and a direction which tells
1282  * from where we came to this position.  This is one of (all comments
1283  * are in terms of ascending scan):
1284  *
1285  * 0 - up from left child (scan this node next)
1286  * 1 - up from right child (proceed to parent)
1287  * 2 - up from root (the scan ends)
1288  * 3 - left to right within node (at end set state 5)
1289  * 4 - down from parent (proceed to left child)
1290  * 5 - at node end proceed to right child (state becomes 4)
1291  *
1292  * If an entry was found, scan direction is 3.  Therefore tree
1293  * re-organizations need not worry about scan direction.
1294  *
1295  * This method is also used to move a scan when its entry is removed
1296  * (see moveScanList).  If the scan is Blocked, we check if it remains
1297  * Blocked on a different version of the tuple.  Otherwise the tuple is
1298  * lost and state becomes Current.
1299  */
1300 Uint32
scanNext(ScanOpPtr scanPtr,bool fromMaintReq,Frag & frag)1301 Dbtux::scanNext(ScanOpPtr scanPtr, bool fromMaintReq, Frag& frag)
1302 {
1303   ScanOp& scan = *scanPtr.p;
1304   // cannot be moved away from tuple we have locked
1305 #if defined VM_TRACE || defined ERROR_INSERT
1306   ndbrequire(fromMaintReq || scan.m_state != ScanOp::Locked);
1307 #else
1308   ndbassert(fromMaintReq || scan.m_state != ScanOp::Locked);
1309 #endif
1310   // scan direction
1311   const unsigned idir = scan.m_descending; // 0, 1
1312   const int jdir = 1 - 2 * (int)idir;      // 1, -1
1313   // use copy of position
1314   TreePos pos = scan.m_scanPos;
1315   Uint32 scan_state = scan.m_state;
1316   // get and remember original node
1317   NodeHandle origNode(frag);
1318   selectNode(c_ctx, origNode, pos.m_loc);
1319   ndbassert(islinkScan(origNode, scanPtr));
1320   if (unlikely(scan_state == ScanOp::Locked))
1321   {
1322     // bug#32040 - no fix, just unlock and continue
1323     jam();
1324     if (scan.m_accLockOp != RNIL)
1325     {
1326       jam();
1327       Signal* signal = c_signal_bug32040;
1328       AccLockReq* const lockReq = (AccLockReq*)signal->getDataPtrSend();
1329       lockReq->returnCode = RNIL;
1330       lockReq->requestInfo = AccLockReq::Abort;
1331       lockReq->accOpPtr = scan.m_accLockOp;
1332       EXECUTE_DIRECT(DBACC, GSN_ACC_LOCKREQ, signal, AccLockReq::UndoSignalLength);
1333       jamEntry();
1334       ndbrequire(lockReq->returnCode == AccLockReq::Success);
1335       scan.m_accLockOp = RNIL;
1336       scan.m_lockwait = false;
1337     }
1338     scan.m_state = ScanOp::Next;
1339   }
1340   // current node in loop
1341   NodeHandle node = origNode;
1342   // copy of entry found
1343   TreeEnt ent;
1344   TupLoc loc;
1345   Uint32 occup;
1346   do
1347   {
1348     jamDebug();
1349     Uint32 dir = pos.m_dir;
1350     {
1351       /* Search in node we are currently scanning. */
1352       const Uint32 node_occup = node.getOccup();
1353       const Uint32 node_pos = pos.m_pos;
1354       // advance position - becomes large (> occup) if 0 and descending
1355       const Uint32 new_node_pos = node_pos + jdir;
1356       if (likely(dir == 3))
1357       {
1358         /**
1359          * We are currently scanning inside a node, proceed until we
1360          * have scanned all items in this node.
1361          */
1362         if (likely(new_node_pos < node_occup))
1363         {
1364           jamDebug();
1365           ent = node.getEnt(new_node_pos);
1366           const TupLoc tupLoc = ent.m_tupLoc;
1367           pos.m_pos = new_node_pos;
1368           c_tup->prepare_scan_tux_TUPKEYREQ(tupLoc.getPageId(),
1369                                             tupLoc.getPageOffset());
1370           if (unlikely(!scanCheck(scan, ent)))
1371           {
1372             /**
1373              * We have reached the end of the scan, this row is outside
1374              * the range to scan.
1375              */
1376             jamDebug();
1377             pos.m_loc = NullTupLoc;
1378             goto found_none;
1379           }
1380           goto found;
1381         }
1382         /* Start search for next node. */
1383         if (likely(node_occup != 0))
1384         {
1385           pos.m_dir = dir = 5;
1386         }
1387       }
1388     }
1389     do
1390     {
1391       /* Search for a node that is at the leaf level */
1392       if (likely(dir == 5))
1393       {
1394         // at node end proceed to right child
1395         jamDebug();
1396         loc = node.getLink(1 - idir);
1397         if (loc != NullTupLoc)
1398         {
1399           jamDebug();
1400           pos.m_loc = loc;
1401           pos.m_dir = dir = 4;  // down from parent as usual
1402           selectNode(c_ctx, node, loc);
1403         }
1404         else
1405         {
1406           // pretend we came from right child
1407           pos.m_dir = dir = 1 - idir;
1408           break;
1409         }
1410       }
1411       while (likely(dir == 4))
1412       {
1413         // coming down from parent proceed to left child
1414         jamDebug();
1415         loc = node.getLink(idir);
1416         if (loc != NullTupLoc)
1417         {
1418           jamDebug();
1419           pos.m_loc = loc;
1420           selectNode(c_ctx, node, loc);
1421           continue;
1422         }
1423         // pretend we came from left child
1424         pos.m_dir = dir = idir;
1425         break;
1426       }
1427     } while (0);
1428     do
1429     {
1430       /* Search for a non-empty node at leaf level to scan. */
1431       occup = node.getOccup();
1432       if (unlikely(occup == 0))
1433       {
1434         jamDebug();
1435         ndbrequire(fromMaintReq);
1436         // move back to parent - see comment in treeRemoveInner
1437         loc = pos.m_loc = node.getLink(2);
1438         pos.m_dir = dir = node.getSide();
1439       }
1440       else if (dir == idir)
1441       {
1442         // coming up from left child scan current node
1443         jamDebug();
1444         pos.m_pos = idir == 0 ? Uint32(~0) : occup;
1445         pos.m_dir = 3;
1446         break;
1447       }
1448       else
1449       {
1450         ndbrequire(dir == 1 - idir);
1451         // coming up from right child proceed to parent
1452         jamDebug();
1453         loc = pos.m_loc = node.getLink(2);
1454         pos.m_dir = dir = node.getSide();
1455       }
1456       if (unlikely(dir == 2))
1457       {
1458         // coming up from root ends the scan
1459         jamDebug();
1460         pos.m_loc = NullTupLoc;
1461         goto found_none;
1462       }
1463       selectNode(c_ctx, node, loc);
1464     } while (true);
1465   } while (true);
1466 found:
1467   // copy back position
1468   scan.m_scanPos = pos;
1469   // relink
1470   ndbassert(pos.m_dir == 3);
1471   ndbassert(pos.m_loc == node.m_loc);
1472   if (unlikely(origNode.m_loc != node.m_loc))
1473   {
1474     jamDebug();
1475     unlinkScan(origNode, scanPtr);
1476     linkScan(node, scanPtr);
1477   }
1478   if (likely(scan.m_state != ScanOp::Blocked))
1479   {
1480     c_ctx.m_current_ent = ent;
1481     scan.m_state = ScanOp::Current;
1482   }
1483   else
1484   {
1485     jamDebug();
1486     ndbrequire(fromMaintReq);
1487     TreeEnt& scanEnt = scan.m_scanEnt;
1488     ndbrequire(scanEnt.m_tupLoc != NullTupLoc);
1489     if (scanEnt.eqtuple(ent))
1490     {
1491       // remains blocked on another version
1492       scanEnt = ent;
1493     } else {
1494       jamDebug();
1495       scanEnt.m_tupLoc = NullTupLoc;
1496       c_ctx.m_current_ent = ent;
1497       scan.m_state = ScanOp::Current;
1498     }
1499   }
1500   return scan.m_state;
1501 
1502 found_none:
1503   jamDebug();
1504   scan.m_scanPos = pos;
1505   unlinkScan(origNode, scanPtr);
1506   scan.m_state = ScanOp::Last;
1507   return ScanOp::Last;
1508 
1509 }
1510 
1511 /*
1512  * Check end key.  Return true if scan is still within range.
1513  *
1514  * Error handling:  If scan error code has been set, return false at
1515  * once.  This terminates the scan and also avoids kernel crash on
1516  * invalid data.
1517  */
1518 inline
1519 bool
scanCheck(ScanOp & scan,TreeEnt ent)1520 Dbtux::scanCheck(ScanOp& scan, TreeEnt ent)
1521 {
1522   jamDebug();
1523   Uint32 scanBoundCnt = c_ctx.scanBoundCnt;
1524   int ret = 0;
1525   if (likely(scanBoundCnt != 0))
1526   {
1527     const Uint32 tupVersion = ent.m_tupVersion;
1528     Uint32* const outputBuffer = c_ctx.c_dataBuffer;
1529     const Uint32 count = c_ctx.scanBoundCnt;
1530     const Uint32* keyAttrs32 = (const Uint32*)&c_ctx.keyAttrs[0];
1531     ret = c_tup->tuxReadAttrsCurr(c_ctx.jamBuffer,
1532                                   keyAttrs32,
1533                                   count,
1534                                   outputBuffer,
1535                                   false,
1536                                   tupVersion);
1537     thrjamDebug(c_ctx.jamBuffer);
1538     thrjamLineDebug(c_ctx.jamBuffer, count);
1539     KeyDataArray key_data;
1540     key_data.init_poai(outputBuffer, count);
1541     // compare bound to key
1542     ret = c_ctx.searchScanBoundArray.cmp(&key_data, count, false);
1543     ndbrequire(ret != 0);
1544     const unsigned idir = c_ctx.descending;
1545     const int jdir = 1 - 2 * (int)idir;
1546     ret = (-1) * ret; // reverse for key vs bound
1547     ret = jdir * ret; // reverse for descending scan
1548   }
1549   return (ret <= 0);
1550 }
1551 
1552 /*
1553  * Check if an entry is visible to the scan.
1554  *
1555  * There is a special check to never accept same tuple twice in a row.
1556  * This is faster than asking TUP.  It also fixes some special cases
1557  * which are not analyzed or handled yet.
1558  *
1559  * Error handling:  If scan error code has been set, return false since
1560  * no new result can be returned to LQH.  The scan will then look for
1561  * next result and terminate via scanCheck():
1562  */
1563 bool
scanVisible(ScanOp & scan,TreeEnt ent)1564 Dbtux::scanVisible(ScanOp& scan, TreeEnt ent)
1565 {
1566   Uint32 opPtrI = c_tup->get_tuple_operation_ptr_i();
1567   // check for same tuple twice in row
1568   if (unlikely(scan.m_scanEnt.m_tupLoc == ent.m_tupLoc))
1569   {
1570     jamDebug();
1571     return false;
1572   }
1573   if (likely(opPtrI == RNIL))
1574   {
1575     return true;
1576   }
1577   Uint32 tupVersion = ent.m_tupVersion;
1578   Uint32 transId1 = scan.m_transId1;
1579   Uint32 transId2 = scan.m_transId2;
1580   bool dirty = scan.m_readCommitted;
1581   Uint32 savePointId = scan.m_savePointId;
1582   bool ret = c_tup->tuxQueryTh(opPtrI,
1583                                tupVersion,
1584                                transId1,
1585                                transId2,
1586                                dirty,
1587                                savePointId);
1588   jamEntryDebug();
1589   return ret;
1590 }
1591 
1592 /*
1593  * Finish closing of scan and send conf.  Any lock wait has been done
1594  * already.
1595  *
1596  * Error handling:  Every scan ends here.  If error code has been set,
1597  * send a REF.
1598  */
1599 void
scanClose(Signal * signal,ScanOpPtr scanPtr)1600 Dbtux::scanClose(Signal* signal, ScanOpPtr scanPtr)
1601 {
1602   ScanOp& scan = *scanPtr.p;
1603   ndbrequire(! scan.m_lockwait && scan.m_accLockOp == RNIL);
1604   // unlock all not unlocked by LQH
1605   if (! scan.m_accLockOps.isEmpty()) {
1606     jam();
1607     abortAccLockOps(signal, scanPtr);
1608   }
1609   Uint32 blockNo = refToMain(scanPtr.p->m_userRef);
1610   if (scanPtr.p->m_errorCode == 0) {
1611     jamDebug();
1612     // send conf
1613     NextScanConf* const conf = (NextScanConf*)signal->getDataPtrSend();
1614     conf->scanPtr = scanPtr.p->m_userPtr;
1615     conf->accOperationPtr = RNIL;
1616     conf->fragId = RNIL;
1617     releaseScanOp(scanPtr);
1618     signal->setLength(NextScanConf::SignalLengthNoTuple);
1619     c_lqh->exec_next_scan_conf(signal);
1620     return;
1621   } else {
1622     // send ref
1623     NextScanRef* ref = (NextScanRef*)signal->getDataPtr();
1624     ref->scanPtr = scanPtr.p->m_userPtr;
1625     ref->accOperationPtr = RNIL;
1626     ref->fragId = RNIL;
1627     ref->errorCode = scanPtr.p->m_errorCode;
1628     releaseScanOp(scanPtr);
1629     EXECUTE_DIRECT(blockNo,
1630                    GSN_NEXT_SCANREF,
1631                    signal,
1632                    NextScanRef::SignalLength);
1633     return;
1634   }
1635 }
1636 
1637 void
abortAccLockOps(Signal * signal,ScanOpPtr scanPtr)1638 Dbtux::abortAccLockOps(Signal* signal, ScanOpPtr scanPtr)
1639 {
1640   ScanOp& scan = *scanPtr.p;
1641 #ifdef VM_TRACE
1642   if (debugFlags & (DebugScan | DebugLock)) {
1643     tuxDebugOut << "Abort locks in scan " << scanPtr.i << " " << scan << endl;
1644   }
1645 #endif
1646   Local_ScanLock_fifo list(c_scanLockPool, scan.m_accLockOps);
1647   ScanLockPtr lockPtr;
1648   while (list.first(lockPtr)) {
1649     jam();
1650     AccLockReq* const lockReq = (AccLockReq*)signal->getDataPtrSend();
1651     lockReq->returnCode = RNIL;
1652     lockReq->requestInfo = AccLockReq::Abort;
1653     lockReq->accOpPtr = lockPtr.p->m_accLockOp;
1654     EXECUTE_DIRECT(DBACC, GSN_ACC_LOCKREQ, signal, AccLockReq::UndoSignalLength);
1655     jamEntry();
1656     ndbrequire(lockReq->returnCode == AccLockReq::Success);
1657     list.remove(lockPtr);
1658     c_scanLockPool.release(lockPtr);
1659   }
1660   checkPoolShrinkNeed(DBTUX_SCAN_LOCK_TRANSIENT_POOL_INDEX,
1661                       c_scanLockPool);
1662 }
1663 
1664 void
addAccLockOp(ScanOpPtr scanPtr,Uint32 accLockOp)1665 Dbtux::addAccLockOp(ScanOpPtr scanPtr, Uint32 accLockOp)
1666 {
1667   ScanOp& scan = *scanPtr.p;
1668 #ifdef VM_TRACE
1669   if (debugFlags & (DebugScan | DebugLock)) {
1670     tuxDebugOut << "Add lock " << hex << accLockOp << dec
1671                 << " to scan " << scanPtr.i << " " << scan << endl;
1672   }
1673 #endif
1674   Local_ScanLock_fifo list(c_scanLockPool, scan.m_accLockOps);
1675   ScanLockPtr lockPtr;
1676 #ifdef VM_TRACE
1677   list.first(lockPtr);
1678   while (lockPtr.i != RNIL) {
1679     ndbrequire(lockPtr.p->m_accLockOp != accLockOp);
1680     list.next(lockPtr);
1681   }
1682 #endif
1683   lockPtr.i = c_freeScanLock;
1684   ndbrequire(c_scanLockPool.getValidPtr(lockPtr));
1685   c_freeScanLock = RNIL;
1686   ndbrequire(accLockOp != RNIL);
1687   lockPtr.p->m_accLockOp = accLockOp;
1688   list.addLast(lockPtr);
1689 }
1690 
1691 void
removeAccLockOp(ScanOpPtr scanPtr,Uint32 accLockOp)1692 Dbtux::removeAccLockOp(ScanOpPtr scanPtr, Uint32 accLockOp)
1693 {
1694   ScanOp& scan = *scanPtr.p;
1695 #ifdef VM_TRACE
1696   if (debugFlags & (DebugScan | DebugLock)) {
1697     tuxDebugOut << "Remove lock " << hex << accLockOp << dec
1698                 << " from scan " << scanPtr.i << " " << scan << endl;
1699   }
1700 #endif
1701   Local_ScanLock_fifo list(c_scanLockPool, scan.m_accLockOps);
1702   ScanLockPtr lockPtr;
1703   list.first(lockPtr);
1704   while (lockPtr.i != RNIL) {
1705     if (lockPtr.p->m_accLockOp == accLockOp) {
1706       jam();
1707       break;
1708     }
1709     list.next(lockPtr);
1710   }
1711   ndbrequire(lockPtr.i != RNIL);
1712   list.remove(lockPtr);
1713   c_scanLockPool.release(lockPtr);
1714   checkPoolShrinkNeed(DBTUX_SCAN_LOCK_TRANSIENT_POOL_INDEX,
1715                       c_scanLockPool);
1716 }
1717 
1718 /*
1719  * Release allocated records.
1720  */
1721 void
releaseScanOp(ScanOpPtr & scanPtr)1722 Dbtux::releaseScanOp(ScanOpPtr& scanPtr)
1723 {
1724 #ifdef VM_TRACE
1725   if (debugFlags & DebugScan) {
1726     tuxDebugOut << "Release scan " << scanPtr.i << " " << *scanPtr.p << endl;
1727   }
1728 #endif
1729   Frag& frag = *c_fragPool.getPtr(scanPtr.p->m_fragPtrI);
1730   for (unsigned i = 0; i <= 1; i++) {
1731     ScanBound& scanBound = scanPtr.p->m_scanBound[i];
1732     ScanBoundBuffer::Head& head = scanBound.m_head;
1733     LocalScanBoundBuffer b(c_scanBoundPool, head);
1734     b.release();
1735   }
1736   checkPoolShrinkNeed(DBTUX_SCAN_BOUND_TRANSIENT_POOL_INDEX,
1737                       c_scanBoundPool);
1738   if (unlikely(scanPtr.p->m_statOpPtrI != RNIL)) {
1739     jam();
1740     StatOpPtr statPtr;
1741     statPtr.i = scanPtr.p->m_statOpPtrI;
1742     c_statOpPool.getPtr(statPtr);
1743     c_statOpPool.release(statPtr);
1744   }
1745   // unlink from per-fragment list and release from pool
1746   frag.m_scanList.remove(scanPtr);
1747   c_scanOpPool.release(scanPtr);
1748   checkPoolShrinkNeed(DBTUX_SCAN_OPERATION_TRANSIENT_POOL_INDEX,
1749                       c_scanOpPool);
1750 }
1751