1 /*
2 Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 #define DBTUX_SCAN_CPP
26 #include "Dbtux.hpp"
27 #include "my_sys.h"
28
29 #define JAM_FILE_ID 371
30
31 /**
32 * To speed up query processing we calculate a number of variables
33 * as part of our context while processing scan operations.
34 *
35 * This method is called every time we come back from a real-time
36 * break from LQH to setup all needed context to scan a range in
37 * TUX.
38 *
39 * These variables are:
40 * --------------------
41 * c_ctx.scanPtr
42 * This is the pointer and i-value of the scan record
43 *
44 * c_ctx.fragPtr
45 * This is the pointer and i-value of the table fragment being
46 * scanned, this is the fragment record in TUX.
47 *
48 * c_ctx.indexPtr
49 * This is the pointer and i-value of the index fragment record
50 * currently being scanned. There can be multiple indexes on one
51 * fragment.
52 *
53 * The following variables are setup using the prepare_scan_bounds method:
54 * .......................................................................
55 * c_ctx.searchScanDataArray
56 * This is a KeyDataArray object (NdbPack::DataArray) representing
57 * the right part of the boundary of the range scan.
58 *
59 * c_ctx.searchScanBoundArray
60 * This is the KeyBoundArray object (NdbPack::BoundArray) also
61 * representing the right part of the boundary of the range scan.
62 * It contains the above KeyDataArray and also the scan direction
63 * (whether we are scanning ascending or descending).
64 * The above two are only set if the boundary has at least one
65 * column that is bounded. A full table scan with order would not
66 * have any boundary and those would not be set since
67 * c_ctx.scanBoundCnt is set to 0.
68 *
69 * c_ctx.keyAttrs
70 * This is the pointer to the Attrinfo array used to read the key
71 * values from TUP. It is calculated from information in the
72 * index fragment record.
73 * c_ctx.descending
74 * This represents information about ascending or descending scan
75 * derived from the scan object.
76 * c_ctx.scanBoundCnt
77 * This represents the number of columns involved in the boundary
78 * condition the scan uses.
79 *
80 * The following variables are setup through the prepare_all_tup_ptrs method:
81 * ..........................................................................
82 * c_ctx.tupIndexFragPtr
83 * This is a pointer that points to the index fragment record for the index
84 * scanned within TUP. These TUP pointers are represented as Uint32* pointers
85 * in TUX to avoid having to include Dbtup.hpp in TUX.
86 * c_ctx.tupIndexTablePtr
87 * This is a pointer that points to the index table record within TUP.
88 * c_ctx.tupRealFragPtr
89 * This is a pointer that points to the fragment record in TUP of the
90 * table fragment being scanned.
91 * c_ctx.tupRealTablePtr
92 * This is a pointer that points to the table record in TUP of the table
93 * being scanned.
94 * c_ctx.tuxFixHeaderSize
95 * This variable contains the header size of the tuples used for index
96 * nodes. These index nodes are stored in special index tables in TUP.
97 * c_ctx.attrDataOffset
98 * This variable contains the offset within the data part of the index
99 * node where the actual node starts.
100 */
101
102 inline static void
prefetch_scan_record_3(Uint32 * scan_ptr)103 prefetch_scan_record_3(Uint32* scan_ptr)
104 {
105 NDB_PREFETCH_WRITE(scan_ptr);
106 NDB_PREFETCH_WRITE(scan_ptr + 16);
107 NDB_PREFETCH_WRITE(scan_ptr + 32);
108 }
109
110 void
prepare_scan_ctx(Uint32 scanPtrI)111 Dbtux::prepare_scan_ctx(Uint32 scanPtrI)
112 {
113 jamDebug();
114 FragPtr fragPtr;
115 ScanOpPtr scanPtr;
116 IndexPtr indexPtr;
117 scanPtr.i = scanPtrI;
118 ndbrequire(c_scanOpPool.getUncheckedPtrRW(scanPtr));
119 prefetch_scan_record_3((Uint32*)scanPtr.p);
120 c_ctx.scanPtr = scanPtr;
121 fragPtr.i = scanPtr.p->m_fragPtrI;
122 c_fragPool.getPtr(fragPtr);
123 indexPtr.i = fragPtr.p->m_indexId;
124 c_ctx.fragPtr = fragPtr;
125 c_indexPool.getPtr(indexPtr);
126 c_ctx.indexPtr = indexPtr;
127 prepare_scan_bounds(scanPtr.p, indexPtr.p);
128 prepare_all_tup_ptrs(c_ctx);
129 ndbrequire(Magic::check_ptr(scanPtr.p));
130 }
131
132 /**
133 * We are preparing to call scanNext to move a scan forward
134 * since the scan stopped on a row that is now being deleted.
135 * At this point we have already called prepare_build_ctx.
136 * Thus we need only setup the
137 * c_ctx.scanPtr and the variables setup in the method
138 * prepare_scan_bounds. Even the c_ctx.keyAttrs isn't
139 * necessary (setup in prepare_scan_bounds), it is kept to
140 * avoid having to call an extra method in the more
141 * common path coming from prepare_scan_ctx.
142 *
143 * We cannot call this method when we are performing a
144 * multi-threaded index build operation. This can only
145 * happen during a restart and during a restart a node
146 * cannot execute any scan operation.
147 */
148 void
prepare_move_scan_ctx(ScanOpPtr scanPtr)149 Dbtux::prepare_move_scan_ctx(ScanOpPtr scanPtr)
150 {
151 Index *indexPtrP = c_ctx.indexPtr.p;
152 c_ctx.scanPtr = scanPtr;
153 prepare_scan_bounds(scanPtr.p, indexPtrP);
154 }
155
156 /**
157 * This method is called either from building of an index
158 * or when updating an index from execTUX_MAINT_REQ. It sets
159 * up the variables needed index reorganisations. There is
160 * no scan boundary in this case, there is only a key boundary,
161 * but this is setup the caller of this method.
162 */
163 void
prepare_build_ctx(TuxCtx & ctx,FragPtr fragPtr)164 Dbtux::prepare_build_ctx(TuxCtx& ctx, FragPtr fragPtr)
165 {
166 IndexPtr indexPtr;
167 ctx.fragPtr = fragPtr;
168 indexPtr.i = fragPtr.p->m_indexId;
169 c_indexPool.getPtr(indexPtr);
170 ctx.indexPtr = indexPtr;
171 const Index& index = *indexPtr.p;
172 const DescHead& descHead = getDescHead(index);
173 const AttributeHeader* keyAttrs = getKeyAttrs(descHead);
174 ctx.keyAttrs = (Uint32*)keyAttrs;
175 prepare_all_tup_ptrs(ctx);
176 }
177
178 /**
179 * This method is called from prepare_scan_ctx after a real-time break has
180 * happened and we need to setup the scan context again.
181 *
182 * It is also called at start of a fragment scan setup from
183 * execTUX_BOUND_INFO.
184 *
185 * We also need to call it before moving the scan ahead after a row was
186 * deleted while we were processing a scan on the tuple. This code calls
187 * scanNext and moves to the next row and thus we need to setup this part
188 * of the scan context there as well.
189 */
190 void
prepare_scan_bounds(const ScanOp * scanPtrP,const Index * indexPtrP)191 Dbtux::prepare_scan_bounds(const ScanOp *scanPtrP, const Index *indexPtrP)
192 {
193 jamDebug();
194 const ScanOp& scan = *scanPtrP;
195 const Index& index = *indexPtrP;
196
197 const unsigned idir = scan.m_descending;
198 const ScanBound& scanBound = scan.m_scanBound[1 - idir];
199 if (likely(scanBound.m_cnt != 0))
200 {
201 jamDebug();
202 KeyDataC searchBoundData(index.m_keySpec, true);
203 KeyBoundC searchBound(searchBoundData);
204 unpackBound(c_ctx.c_nextKey, scanBound, searchBound);
205 KeyDataArray *key_data = new (&c_ctx.searchScanDataArray)
206 KeyDataArray();
207 key_data->init_bound(searchBound, scanBound.m_cnt);
208 KeyBoundArray *searchBoundArray = new (&c_ctx.searchScanBoundArray)
209 KeyBoundArray(&index.m_keySpec,
210 key_data,
211 scanBound.m_side);
212 (void)searchBoundArray;
213 }
214 const DescHead& descHead = getDescHead(index);
215 const AttributeHeader* keyAttrs = getKeyAttrs(descHead);
216 c_ctx.keyAttrs = (Uint32*)keyAttrs;
217 c_ctx.descending = scan.m_descending;
218 c_ctx.scanBoundCnt = scanBound.m_cnt;
219 }
220
221
222 void
execACC_CHECK_SCAN(Signal * signal)223 Dbtux::execACC_CHECK_SCAN(Signal* signal)
224 {
225 jamEntryDebug();
226 const AccCheckScan *req = (const AccCheckScan*)signal->getDataPtr();
227 ScanOpPtr scanPtr = c_ctx.scanPtr;
228 ScanOp& scan = *scanPtr.p;
229 Frag& frag = *c_ctx.fragPtr.p;
230 #ifdef VM_TRACE
231 if (debugFlags & DebugScan) {
232 tuxDebugOut << "ACC_CHECK_SCAN scan " << scanPtr.i << " " << scan << endl;
233 }
234 #endif
235
236 bool wait_scan_lock_record = check_freeScanLock(scan);
237 if (req->checkLcpStop == AccCheckScan::ZCHECK_LCP_STOP &&
238 (scan.m_lockwait ||
239 wait_scan_lock_record))
240 {
241 /**
242 * Go to sleep for one millisecond if we encounter a locked row.
243 * Or if we could not allocate a ScanLock record.
244 */
245 jam();
246 CheckLcpStop* cls = (CheckLcpStop*) signal->theData;
247 cls->scanPtrI = scan.m_userPtr;
248 if (wait_scan_lock_record)
249 {
250 jam();
251 cls->scanState = CheckLcpStop::ZSCAN_RESOURCE_WAIT_STOPPABLE;
252 }
253 else
254 {
255 jam();
256 cls->scanState = CheckLcpStop::ZSCAN_RESOURCE_WAIT;
257 }
258 EXECUTE_DIRECT(DBLQH, GSN_CHECK_LCP_STOP, signal, 2);
259 if (signal->theData[0] == CheckLcpStop::ZTAKE_A_BREAK)
260 {
261 jamEntry();
262 release_c_free_scan_lock();
263 return;
264 }
265 jamEntry();
266 ndbrequire(signal->theData[0] == CheckLcpStop::ZABORT_SCAN);
267 /* Fall through, we will send NEXT_SCANCONF, this will detect close */
268 }
269 continue_scan(signal, scanPtr, frag, wait_scan_lock_record);
270 ndbassert(c_freeScanLock == RNIL); // No ndbrequire, will destroy tail call
271 }
272
273 /*
274 * Error handling: Any seized scan op is released. ACC_SCANREF is sent
275 * to LQH. LQH sets error code, and treats this like ZEMPTY_FRAGMENT.
276 * Therefore scan is now closed on both sides.
277 */
278 void
execACC_SCANREQ(Signal * signal)279 Dbtux::execACC_SCANREQ(Signal* signal)
280 {
281 jamEntry();
282 const AccScanReq *req = (const AccScanReq*)signal->getDataPtr();
283 Uint32 errorCode = 0;
284 ScanOpPtr scanPtr;
285 scanPtr.i = RNIL;
286 do {
287 // get the index
288 IndexPtr indexPtr;
289 c_indexPool.getPtr(indexPtr, req->tableId);
290 // get the fragment
291 FragPtr fragPtr;
292 findFrag(jamBuffer(), *indexPtr.p, req->fragmentNo, fragPtr);
293 ndbrequire(fragPtr.i != RNIL);
294 Frag& frag = *fragPtr.p;
295 // check for index not Online (i.e. Dropping)
296 c_ctx.indexPtr = indexPtr;
297 c_ctx.fragPtr = fragPtr;
298 if (unlikely(indexPtr.p->m_state != Index::Online)) {
299 jam();
300 #ifdef VM_TRACE
301 if (debugFlags & (DebugMeta | DebugScan)) {
302 tuxDebugOut << "Index dropping at ACC_SCANREQ " << indexPtr.i
303 << " " << *indexPtr.p << endl;
304 }
305 #endif
306 errorCode = AccScanRef::TuxIndexNotOnline;
307 break;
308 }
309 // must be normal DIH/TC fragment
310 TreeHead& tree = frag.m_tree;
311 // check for empty fragment
312 if (tree.m_root == NullTupLoc)
313 {
314 jam();
315 scanPtr.p = NULL;
316 c_ctx.scanPtr = scanPtr; // Ensure crash if we try to use pointer.
317 AccScanConf* const conf = (AccScanConf*)signal->getDataPtrSend();
318 conf->scanPtr = req->senderData;
319 conf->accPtr = RNIL;
320 conf->flag = AccScanConf::ZEMPTY_FRAGMENT;
321 signal->theData[8] = 0;
322 /* Return ACC_SCANCONF */
323 return;
324 }
325 // seize from pool and link to per-fragment list
326 if (ERROR_INSERTED(12008) ||
327 ! c_scanOpPool.seize(scanPtr)) {
328 CLEAR_ERROR_INSERT_VALUE;
329 jam();
330 // should never happen but can be used to test error handling
331 errorCode = AccScanRef::TuxNoFreeScanOp;
332 break;
333 }
334 frag.m_scanList.addFirst(scanPtr);
335 scanPtr.p->m_state = ScanOp::First;
336 scanPtr.p->m_userPtr = req->senderData;
337 scanPtr.p->m_userRef = req->senderRef;
338 scanPtr.p->m_tableId = indexPtr.p->m_tableId;
339 scanPtr.p->m_indexId = indexPtr.i;
340 scanPtr.p->m_fragId = fragPtr.p->m_fragId;
341 scanPtr.p->m_fragPtrI = fragPtr.i;
342 scanPtr.p->m_transId1 = req->transId1;
343 scanPtr.p->m_transId2 = req->transId2;
344 scanPtr.p->m_savePointId = req->savePointId;
345 scanPtr.p->m_readCommitted =
346 AccScanReq::getReadCommittedFlag(req->requestInfo);
347 scanPtr.p->m_lockMode = AccScanReq::getLockMode(req->requestInfo);
348 scanPtr.p->m_descending = AccScanReq::getDescendingFlag(req->requestInfo);
349 c_ctx.scanPtr = scanPtr;
350 /*
351 * readCommitted lockMode keyInfo
352 * 1 0 0 - read committed (no lock)
353 * 0 0 0 - read latest (read lock)
354 * 0 1 1 - read exclusive (write lock)
355 */
356 const bool isStatScan = AccScanReq::getStatScanFlag(req->requestInfo);
357 if (unlikely(isStatScan)) {
358 jam();
359 // Check if index stat can handle this index length
360 Uint32 indexMaxKeyBytes = indexPtr.p->m_keySpec.get_max_data_len(false);
361 if (indexMaxKeyBytes > (StatOp::MaxKeySize * 4)) {
362 jam();
363 errorCode = AccScanRef::TuxInvalidKeySize;
364 break;
365 }
366
367 if (!scanPtr.p->m_readCommitted) {
368 jam();
369 errorCode = AccScanRef::TuxInvalidLockMode;
370 break;
371 }
372 StatOpPtr statPtr;
373 if (!c_statOpPool.seize(statPtr)) {
374 jam();
375 errorCode = AccScanRef::TuxNoFreeStatOp;
376 break;
377 }
378 scanPtr.p->m_statOpPtrI = statPtr.i;
379 new (statPtr.p) StatOp(*indexPtr.p);
380 statPtr.p->m_scanOpPtrI = scanPtr.i;
381 // rest of StatOp is initialized in execTUX_BOUND_INFO
382 #ifdef VM_TRACE
383 if (debugFlags & DebugStat) {
384 tuxDebugOut << "Seize stat op" << endl;
385 }
386 #endif
387 }
388 #ifdef VM_TRACE
389 if (debugFlags & DebugScan) {
390 tuxDebugOut << "Seize scan " << scanPtr.i << " " << *scanPtr.p << endl;
391 }
392 #endif
393 // conf
394 AccScanConf* const conf = (AccScanConf*)signal->getDataPtrSend();
395 conf->scanPtr = req->senderData;
396 conf->accPtr = scanPtr.i;
397 conf->flag = AccScanConf::ZNOT_EMPTY_FRAGMENT;
398 signal->theData[8] = 0;
399 /* Return ACC_SCANCONF */
400 return;
401 } while (0);
402 if (scanPtr.i != RNIL) {
403 jam();
404 releaseScanOp(scanPtr);
405 }
406 // ref
407 ndbrequire(errorCode != 0);
408 signal->theData[8] = errorCode;
409 /* Return ACC_SCANREF */
410 }
411
412 /*
413 * Receive bounds for scan in single direct call. The bounds can arrive
414 * in any order. Attribute ids are those of index table.
415 *
416 * Replace EQ by equivalent LE + GE. Check for conflicting bounds.
417 * Check that sets of lower and upper bounds are on initial sequences of
418 * keys and that all but possibly last bound is non-strict.
419 *
420 * Finally convert the sets of lower and upper bounds (i.e. start key
421 * and end key) to NdbPack format. The data is saved in segmented
422 * memory. The bound is reconstructed at use time via unpackBound().
423 *
424 * Error handling: Error code is set in the scan and also returned in
425 * EXECUTE_DIRECT (the old way).
426 */
427 void
execTUX_BOUND_INFO(Signal * signal)428 Dbtux::execTUX_BOUND_INFO(Signal* signal)
429 {
430 jamEntry();
431 // get records
432 TuxBoundInfo* const req = (TuxBoundInfo*)signal->getDataPtrSend();
433 ScanOpPtr scanPtr = c_ctx.scanPtr;
434 ScanOp& scan = *scanPtr.p;
435 const Index& index = *c_ctx.indexPtr.p;
436
437 // compiler warning unused: const DescHead& descHead = getDescHead(index);
438 // compiler warning unused: const KeyType* keyTypes = getKeyTypes(descHead);
439 // data passed in Signal
440 const Uint32* const boundData = &req->data[0];
441 Uint32 boundLen = req->boundAiLength;
442 Uint32 boundOffset = 0;
443 // initialize stats scan
444 if (unlikely(scan.m_statOpPtrI != RNIL))
445 {
446 // stats options before bounds
447 StatOpPtr statPtr;
448 statPtr.i = scan.m_statOpPtrI;
449 c_statOpPool.getPtr(statPtr);
450 Uint32 usedLen = 0;
451 if (unlikely(statScanInit(statPtr, boundData, boundLen, &usedLen) == -1))
452 {
453 jam();
454 ndbrequire(scan.m_errorCode != 0);
455 req->errorCode = scan.m_errorCode;
456 return;
457 }
458 ndbrequire(usedLen <= boundLen);
459 boundLen -= usedLen;
460 boundOffset += usedLen;
461 }
462 // extract lower and upper bound in separate passes
463 for (unsigned idir = 0; idir <= 1; idir++)
464 {
465 jamDebug();
466 struct BoundInfo {
467 int type2; // with EQ -> LE/GE
468 Uint32 offset; // word offset in signal data
469 Uint32 bytes;
470 };
471 BoundInfo boundInfo[MaxIndexAttributes];
472 // largest attrId seen plus one
473 Uint32 maxAttrId = 0;
474 const Uint32* const data = &boundData[boundOffset];
475 Uint32 offset = 0;
476 while (offset + 2 <= boundLen) {
477 jamDebug();
478 const Uint32 type = data[offset];
479 const AttributeHeader* ah = (const AttributeHeader*)&data[offset + 1];
480 const Uint32 attrId = ah->getAttributeId();
481 const Uint32 byteSize = ah->getByteSize();
482 const Uint32 dataSize = ah->getDataSize();
483 // check type
484 if (unlikely(type > 4))
485 {
486 jam();
487 scan.m_errorCode = TuxBoundInfo::InvalidAttrInfo;
488 req->errorCode = scan.m_errorCode;
489 return;
490 }
491 Uint32 type2 = type;
492 if (type2 == 4)
493 {
494 jamDebug();
495 type2 = (idir << 1); // LE=0 GE=2
496 }
497 // check if attribute belongs to this bound
498 if ((type2 & 0x2) == (idir << 1))
499 {
500 if (unlikely(attrId >= index.m_numAttrs))
501 {
502 jam();
503 scan.m_errorCode = TuxBoundInfo::InvalidAttrInfo;
504 req->errorCode = scan.m_errorCode;
505 return;
506 }
507 // mark entries in any gap as undefined
508 while (maxAttrId <= attrId)
509 {
510 jamDebug();
511 BoundInfo& b = boundInfo[maxAttrId];
512 b.type2 = -1;
513 maxAttrId++;
514 }
515 BoundInfo& b = boundInfo[attrId];
516 // duplicate no longer allowed (wl#4163)
517 if (unlikely(b.type2 != -1))
518 {
519 jam();
520 scan.m_errorCode = TuxBoundInfo::InvalidBounds;
521 req->errorCode = scan.m_errorCode;
522 return;
523 }
524 b.type2 = (int)type2;
525 b.offset = offset + 1; // poai
526 b.bytes = byteSize;
527 }
528 // jump to next
529 offset += 2 + dataSize;
530 }
531 if (unlikely(offset != boundLen))
532 {
533 jam();
534 scan.m_errorCode = TuxBoundInfo::InvalidAttrInfo;
535 req->errorCode = scan.m_errorCode;
536 return;
537 }
538 // check and pack the bound data
539 KeyData searchBoundData(index.m_keySpec, true, 0);
540 KeyBound searchBound(searchBoundData);
541 searchBoundData.set_buf(c_ctx.c_searchKey, MaxAttrDataSize << 2);
542 int strict = 0; // 0 or 1
543 Uint32 i;
544 for (i = 0; i < maxAttrId; i++)
545 {
546 jamDebug();
547 const BoundInfo& b = boundInfo[i];
548 // check for gap or strict bound before last
549 strict = (b.type2 & 0x1);
550 if (unlikely(b.type2 == -1 || (i + 1 < maxAttrId && strict)))
551 {
552 jam();
553 scan.m_errorCode = TuxBoundInfo::InvalidBounds;
554 req->errorCode = scan.m_errorCode;
555 return;
556 }
557 Uint32 len;
558 if (unlikely(searchBoundData.add_poai(&data[b.offset], &len) == -1 ||
559 b.bytes != len))
560 {
561 jam();
562 scan.m_errorCode = TuxBoundInfo::InvalidCharFormat;
563 req->errorCode = scan.m_errorCode;
564 return;
565 }
566 }
567 int side = 0;
568 if (maxAttrId != 0)
569 {
570 // arithmetic is faster
571 // side = (idir == 0 ? (strict ? +1 : -1) : (strict ? -1 : +1));
572 side = (-1) * (1 - 2 * strict) * (1 - 2 * int(idir));
573 }
574 if (unlikely(searchBound.finalize(side) == -1))
575 {
576 jam();
577 scan.m_errorCode = TuxBoundInfo::InvalidCharFormat;
578 req->errorCode = scan.m_errorCode;
579 return;
580 }
581 ScanBound& scanBound = scan.m_scanBound[idir];
582 scanBound.m_cnt = maxAttrId;
583 scanBound.m_side = side;
584 // save data words in segmented memory
585 {
586 ScanBoundBuffer::Head& head = scanBound.m_head;
587 LocalScanBoundBuffer b(c_scanBoundPool, head);
588 const Uint32* data = (const Uint32*)searchBoundData.get_data_buf();
589 Uint32 size = (searchBoundData.get_data_len() + 3) / 4;
590 bool ok = b.append(data, size);
591 if (unlikely(!ok))
592 {
593 jam();
594 scan.m_errorCode = TuxBoundInfo::OutOfBuffers;
595 req->errorCode = scan.m_errorCode;
596 return;
597 }
598 }
599 }
600 if (ERROR_INSERTED(12009)) {
601 jam();
602 CLEAR_ERROR_INSERT_VALUE;
603 scan.m_errorCode = TuxBoundInfo::InvalidBounds;
604 req->errorCode = scan.m_errorCode;
605 return;
606 }
607 prepare_scan_bounds(scanPtr.p, c_ctx.indexPtr.p);
608 prepare_all_tup_ptrs(c_ctx);
609 // no error
610 req->errorCode = 0;
611 }
612
613 void
execNEXT_SCANREQ(Signal * signal)614 Dbtux::execNEXT_SCANREQ(Signal* signal)
615 {
616 const NextScanReq *req = (const NextScanReq*)signal->getDataPtr();
617 ScanOp& scan = *c_ctx.scanPtr.p;
618 Frag& frag = *c_ctx.fragPtr.p;
619 Uint32 scanFlag = req->scanFlag;
620 #ifdef VM_TRACE
621 if (debugFlags & DebugScan) {
622 tuxDebugOut << "NEXT_SCANREQ scan " << c_ctx.scanPtr.i << " "
623 << scan << endl;
624 }
625 #endif
626 // handle unlock previous and close scan
627 switch (scanFlag) {
628 case NextScanReq::ZSCAN_NEXT:
629 jamDebug();
630 break;
631 case NextScanReq::ZSCAN_COMMIT:
632 jamDebug();
633 // Fall through
634 case NextScanReq::ZSCAN_NEXT_COMMIT:
635 jamDebug();
636 if (! scan.m_readCommitted)
637 {
638 jam();
639 Uint32 accOperationPtr = req->accOperationPtr;
640 AccLockReq* const lockReq = (AccLockReq*)signal->getDataPtrSend();
641 lockReq->returnCode = RNIL;
642 lockReq->requestInfo = AccLockReq::Unlock;
643 lockReq->accOpPtr = accOperationPtr;
644 EXECUTE_DIRECT(DBACC, GSN_ACC_LOCKREQ, signal, AccLockReq::UndoSignalLength);
645 jamEntryDebug();
646 ndbrequire(lockReq->returnCode == AccLockReq::Success);
647 removeAccLockOp(c_ctx.scanPtr, accOperationPtr);
648 }
649 if (scanFlag == NextScanReq::ZSCAN_COMMIT)
650 {
651 jamDebug();
652 signal->theData[0] = 0; /* Success */
653 /**
654 * Return with signal->theData[0] = 0 means a return
655 * signal NEXT_SCANCONF for NextScanReq::ZSCAN_COMMIT
656 */
657 return;
658 }
659 break;
660 case NextScanReq::ZSCAN_CLOSE:
661 jamDebug();
662 // unlink from tree node first to avoid state changes
663 if (scan.m_scanPos.m_loc != NullTupLoc)
664 {
665 jam();
666 const TupLoc loc = scan.m_scanPos.m_loc;
667 NodeHandle node(frag);
668 selectNode(c_ctx, node, loc);
669 unlinkScan(node, c_ctx.scanPtr);
670 scan.m_scanPos.m_loc = NullTupLoc;
671 }
672 if (unlikely(scan.m_lockwait))
673 {
674 jam();
675 ndbrequire(scan.m_accLockOp != RNIL);
676 // use ACC_ABORTCONF to flush out any reply in job buffer
677 AccLockReq* const lockReq = (AccLockReq*)signal->getDataPtrSend();
678 lockReq->returnCode = RNIL;
679 lockReq->requestInfo = AccLockReq::AbortWithConf;
680 lockReq->accOpPtr = scan.m_accLockOp;
681 EXECUTE_DIRECT(DBACC, GSN_ACC_LOCKREQ, signal,
682 AccLockReq::UndoSignalLength);
683 jamEntry();
684 ndbrequire(lockReq->returnCode == AccLockReq::Success);
685 scan.m_state = ScanOp::Aborting;
686 return;
687 }
688 if (scan.m_state == ScanOp::Locked)
689 {
690 jam();
691 ndbrequire(scan.m_accLockOp != RNIL);
692 AccLockReq* const lockReq = (AccLockReq*)signal->getDataPtrSend();
693 lockReq->returnCode = RNIL;
694 lockReq->requestInfo = AccLockReq::Abort;
695 lockReq->accOpPtr = scan.m_accLockOp;
696 EXECUTE_DIRECT(DBACC, GSN_ACC_LOCKREQ, signal,
697 AccLockReq::UndoSignalLength);
698 jamEntry();
699 ndbrequire(lockReq->returnCode == AccLockReq::Success);
700 scan.m_accLockOp = RNIL;
701 }
702 scan.m_state = ScanOp::Aborting;
703 scanClose(signal, c_ctx.scanPtr);
704 return;
705 case NextScanReq::ZSCAN_NEXT_ABORT:
706 ndbabort();
707 default:
708 jam();
709 ndbabort();
710 }
711 bool wait_scan_lock_record = check_freeScanLock(scan);
712 continue_scan(signal, c_ctx.scanPtr, frag, wait_scan_lock_record);
713 ndbassert(c_freeScanLock == RNIL); // No ndbrequire, will destroy tail call
714 }
715
716 void
continue_scan(Signal * signal,ScanOpPtr scanPtr,Frag & frag,bool wait_scan_lock_record)717 Dbtux::continue_scan(Signal *signal,
718 ScanOpPtr scanPtr,
719 Frag& frag,
720 bool wait_scan_lock_record)
721 {
722 ScanOp& scan = *scanPtr.p;
723 #ifdef VM_TRACE
724 if (debugFlags & DebugScan) {
725 tuxDebugOut << "ACC_CHECK_SCAN scan " << scanPtr.i << " " << scan << endl;
726 }
727 #endif
728 const Index& index = *c_ctx.indexPtr.p;
729 if (unlikely(scan.m_lockwait || wait_scan_lock_record))
730 {
731 jam();
732 /**
733 * LQH asks if we are waiting for lock and we tell it to ask again
734 * Used to check if TC has ordered close both in situations where we
735 * cannot allocate a lock record and when we encountered a locked row.
736 */
737 release_c_free_scan_lock();
738 NextScanConf* const conf = (NextScanConf*)signal->getDataPtrSend();
739 conf->scanPtr = scan.m_userPtr;
740 conf->accOperationPtr = RNIL; // no tuple returned
741 conf->fragId = frag.m_fragId;
742 // if TC has ordered scan close, it will be detected here
743 sendSignal(scan.m_userRef,
744 GSN_NEXT_SCANCONF,
745 signal,
746 NextScanConf::SignalLengthNoTuple,
747 JBB);
748 return; // stop
749 }
750 // check index online
751 if (unlikely(index.m_state != Index::Online) &&
752 scan.m_errorCode == 0)
753 {
754 jam();
755 #ifdef VM_TRACE
756 if (debugFlags & (DebugMeta | DebugScan)) {
757 tuxDebugOut << "Index dropping at execACC_CHECK_SCAN " << scanPtr.i
758 << " " << *scanPtr.p << endl;
759 }
760 #endif
761 scan.m_errorCode = AccScanRef::TuxIndexNotOnline;
762 }
763 if (unlikely(scan.m_errorCode != 0))
764 {
765 jamDebug();
766 release_c_free_scan_lock();
767 NextScanConf* const conf = (NextScanConf*)signal->getDataPtrSend();
768 conf->scanPtr = scan.m_userPtr;
769 conf->accOperationPtr = RNIL;
770 conf->fragId = RNIL;
771 signal->setLength(NextScanConf::SignalLengthNoTuple);
772 c_lqh->exec_next_scan_conf(signal);
773 return;
774 }
775 if (scan.m_state == ScanOp::First)
776 {
777 jamDebug();
778 // search is done only once in single range scan
779 scanFirst(scanPtr, frag, index);
780 }
781 if (scan.m_state == ScanOp::Current ||
782 scan.m_state == ScanOp::Next)
783 {
784 jamDebug();
785 // look for next
786 scanFind(scanPtr, frag);
787 }
788 // for reading tuple key in Found or Locked state
789 Uint32* pkData = c_ctx.c_dataBuffer;
790 unsigned pkSize = 0; // indicates not yet done
791 if (likely(scan.m_state == ScanOp::Found))
792 {
793 // found an entry to return
794 jamDebug();
795 ndbrequire(scan.m_accLockOp == RNIL);
796 if (unlikely(! scan.m_readCommitted))
797 {
798 jamDebug();
799 const TreeEnt ent = scan.m_scanEnt;
800 // read tuple key
801 readTablePk(ent, pkData, pkSize);
802 // get read lock or exclusive lock
803 AccLockReq* const lockReq = (AccLockReq*)signal->getDataPtrSend();
804 lockReq->returnCode = RNIL;
805 lockReq->requestInfo =
806 scan.m_lockMode == 0 ? AccLockReq::LockShared : AccLockReq::LockExclusive;
807 lockReq->accOpPtr = RNIL;
808 lockReq->userPtr = scanPtr.i;
809 lockReq->userRef = reference();
810 lockReq->tableId = scan.m_tableId;
811 lockReq->fragId = frag.m_fragId;
812 lockReq->fragPtrI = frag.m_accTableFragPtrI;
813 const Uint32* const buf32 = static_cast<Uint32*>(pkData);
814 const Uint64* const buf64 = reinterpret_cast<const Uint64*>(buf32);
815 lockReq->hashValue = md5_hash(buf64, pkSize);
816 Uint32 lkey1, lkey2;
817 getTupAddr(frag, ent, lkey1, lkey2);
818 lockReq->page_id = lkey1;
819 lockReq->page_idx = lkey2;
820 lockReq->transId1 = scan.m_transId1;
821 lockReq->transId2 = scan.m_transId2;
822 lockReq->isCopyFragScan = ZFALSE;
823 // execute
824 EXECUTE_DIRECT(DBACC, GSN_ACC_LOCKREQ, signal, AccLockReq::LockSignalLength);
825 jamEntryDebug();
826 switch (lockReq->returnCode)
827 {
828 case AccLockReq::Success:
829 {
830 jam();
831 scan.m_state = ScanOp::Locked;
832 scan.m_accLockOp = lockReq->accOpPtr;
833 #ifdef VM_TRACE
834 if (debugFlags & (DebugScan | DebugLock))
835 {
836 tuxDebugOut << "Lock immediate scan " << scanPtr.i << " "
837 << scan << endl;
838 }
839 #endif
840 break;
841 }
842 case AccLockReq::IsBlocked:
843 {
844 jam();
845 // normal lock wait
846 scan.m_state = ScanOp::Blocked;
847 scan.m_lockwait = true;
848 scan.m_accLockOp = lockReq->accOpPtr;
849 #ifdef VM_TRACE
850 if (debugFlags & (DebugScan | DebugLock))
851 {
852 tuxDebugOut << "Lock wait scan " << scanPtr.i << " " << scan << endl;
853 }
854 #endif
855 // LQH will wake us up
856 CheckLcpStop* cls = (CheckLcpStop*) signal->theData;
857 cls->scanPtrI = scan.m_userPtr;
858 cls->scanState = CheckLcpStop::ZSCAN_RESOURCE_WAIT;
859 EXECUTE_DIRECT(DBLQH, GSN_CHECK_LCP_STOP, signal, 2);
860 if (signal->theData[0] == CheckLcpStop::ZTAKE_A_BREAK)
861 {
862 jamEntry();
863 /* Normal path */
864 release_c_free_scan_lock();
865 return; // stop for a while
866 }
867 jamEntry();
868 /* DBTC has most likely aborted due to timeout */
869 ndbrequire(signal->theData[0] == CheckLcpStop::ZABORT_SCAN);
870 /* Ensure that we send NEXT_SCANCONF immediately to close */
871 scan.m_state = ScanOp::Last;
872 break;
873 }
874 case AccLockReq::Refused:
875 {
876 jam();
877 // we cannot see deleted tuple (assert only)
878 g_eventLogger->info("(%u) Refused tab(%u,%u) row(%u,%u)",
879 instance(),
880 scan.m_tableId,
881 frag.m_fragId,
882 lkey1,
883 lkey2);
884 ndbassert(false);
885 // skip it
886 scan.m_state = ScanOp::Next;
887 CheckLcpStop* cls = (CheckLcpStop*) signal->theData;
888 cls->scanPtrI = scan.m_userPtr;
889 cls->scanState = CheckLcpStop::ZSCAN_RESOURCE_WAIT;
890 EXECUTE_DIRECT(DBLQH, GSN_CHECK_LCP_STOP, signal, 2);
891 if (signal->theData[0] == CheckLcpStop::ZTAKE_A_BREAK)
892 {
893 jamEntry();
894 /* Normal path */
895 release_c_free_scan_lock();
896 return; // stop for a while
897 }
898 jamEntry();
899 /* DBTC has most likely aborted due to timeout */
900 ndbrequire(signal->theData[0] == CheckLcpStop::ZABORT_SCAN);
901 /* Ensure that we send NEXT_SCANCONF immediately to close */
902 scan.m_state = ScanOp::Last;
903 break;
904 }
905 case AccLockReq::NoFreeOp:
906 {
907 jam();
908 // stay in Found state
909 scan.m_state = ScanOp::Found;
910 CheckLcpStop* cls = (CheckLcpStop*) signal->theData;
911 cls->scanPtrI = scan.m_userPtr;
912 cls->scanState = CheckLcpStop::ZSCAN_RESOURCE_WAIT_STOPPABLE;
913 EXECUTE_DIRECT(DBLQH, GSN_CHECK_LCP_STOP, signal, 2);
914 if (signal->theData[0] == CheckLcpStop::ZTAKE_A_BREAK)
915 {
916 jamEntry();
917 /* Normal path */
918 release_c_free_scan_lock();
919 return; // stop for a while
920 }
921 jamEntry();
922 ndbrequire(signal->theData[0] == CheckLcpStop::ZABORT_SCAN);
923 /* Ensure that we send NEXT_SCANCONF immediately to close */
924 scan.m_state = ScanOp::Last;
925 break;
926 }
927 default:
928 ndbabort();
929 }
930 }
931 else
932 {
933 scan.m_state = ScanOp::Locked;
934 }
935 }
936 else if (scan.m_state == ScanOp::Next)
937 {
938 jam();
939 // Taking a break from searching the tree
940 release_c_free_scan_lock();
941 CheckLcpStop* cls = (CheckLcpStop*) signal->theData;
942 cls->scanPtrI = scan.m_userPtr;
943 cls->scanState = CheckLcpStop::ZSCAN_RUNNABLE_YIELD;
944 EXECUTE_DIRECT(DBLQH, GSN_CHECK_LCP_STOP, signal, 2);
945 jam();
946 ndbassert(signal->theData[0] == CheckLcpStop::ZTAKE_A_BREAK);
947 return;
948 }
949 if (likely(scan.m_state == ScanOp::Locked))
950 {
951 // we have lock or do not need one
952 jamDebug();
953 // read keys if not already done (uses signal)
954 const TreeEnt ent = scan.m_scanEnt;
955 // conf signal
956 NextScanConf* const conf = (NextScanConf*)signal->getDataPtrSend();
957 conf->scanPtr = scan.m_userPtr;
958 // the lock is passed to LQH
959 Uint32 accLockOp = scan.m_accLockOp;
960 if (unlikely(accLockOp != RNIL))
961 {
962 scan.m_accLockOp = RNIL;
963 // remember it until LQH unlocks it
964 addAccLockOp(scanPtr, accLockOp);
965 }
966 else
967 {
968 ndbrequire(scan.m_readCommitted);
969 // operation RNIL in LQH would signal no tuple returned
970 accLockOp = (Uint32)-1;
971 }
972 ndbrequire(c_freeScanLock == RNIL);
973 conf->accOperationPtr = accLockOp;
974 conf->fragId = frag.m_fragId;
975 const TupLoc tupLoc = ent.m_tupLoc;
976 Uint32 lkey1 = tupLoc.getPageId();
977 Uint32 lkey2 = tupLoc.getPageOffset();
978 conf->localKey[0] = lkey1;
979 conf->localKey[1] = lkey2;
980 /**
981 * We can arrive here from a delayed CONTINUEB signal from
982 * LQH when we are waiting for a locked row and we now
983 * acquired the lock. To ensure that we have properly
984 * setup for execution of execTUPKEYREQ we call
985 * prepare_scan_tux_TUPKEYREQ here even if we already did
986 * it from ACC. Also needed to ensure proper operation of
987 * ndbassert's in debug mode.
988 */
989 c_tup->prepare_scan_tux_TUPKEYREQ(lkey1, lkey2);
990 // add key info
991 // next time look for next entry
992 scan.m_state = ScanOp::Next;
993 signal->setLength(NextScanConf::SignalLengthNoGCI);
994 c_lqh->exec_next_scan_conf(signal);
995 return;
996 }
997 // In ACC this is checked before req->checkLcpStop
998 if (scan.m_state == ScanOp::Last)
999 {
1000 jamDebug();
1001 release_c_free_scan_lock();
1002 NextScanConf* const conf = (NextScanConf*)signal->getDataPtrSend();
1003 conf->scanPtr = scan.m_userPtr;
1004 conf->accOperationPtr = RNIL;
1005 conf->fragId = RNIL;
1006 signal->setLength(NextScanConf::SignalLengthNoTuple);
1007 c_lqh->exec_next_scan_conf(signal);
1008 return;
1009 }
1010 ndbabort();
1011 }
1012
1013 /*
1014 * Lock succeeded (after delay) in ACC. If the lock is for current
1015 * entry, set state to Locked. If the lock is for an entry we were
1016 * moved away from, simply unlock it. Finally, if we are closing the
1017 * scan, do nothing since we have already sent an abort request.
1018 */
1019 void
execACCKEYCONF(Signal * signal)1020 Dbtux::execACCKEYCONF(Signal* signal)
1021 {
1022 jamEntry();
1023 ScanOpPtr scanPtr;
1024 scanPtr.i = signal->theData[0];
1025 ndbrequire(c_scanOpPool.getValidPtr(scanPtr));
1026 ScanOp& scan = *scanPtr.p;
1027 #ifdef VM_TRACE
1028 if (debugFlags & (DebugScan | DebugLock)) {
1029 tuxDebugOut << "Lock obtained scan " << scanPtr.i << " " << scan << endl;
1030 }
1031 #endif
1032 ndbrequire(scan.m_lockwait && scan.m_accLockOp != RNIL);
1033 scan.m_lockwait = false;
1034 if (scan.m_state == ScanOp::Blocked) {
1035 // the lock wait was for current entry
1036 jam();
1037 scan.m_state = ScanOp::Locked;
1038 // LQH has the ball
1039 return;
1040 }
1041 if (scan.m_state != ScanOp::Aborting) {
1042 // we were moved, release lock
1043 jam();
1044 AccLockReq* const lockReq = (AccLockReq*)signal->getDataPtrSend();
1045 lockReq->returnCode = RNIL;
1046 lockReq->requestInfo = AccLockReq::Abort;
1047 lockReq->accOpPtr = scan.m_accLockOp;
1048 EXECUTE_DIRECT(DBACC, GSN_ACC_LOCKREQ, signal, AccLockReq::UndoSignalLength);
1049 jamEntry();
1050 ndbrequire(lockReq->returnCode == AccLockReq::Success);
1051 scan.m_accLockOp = RNIL;
1052 // LQH has the ball
1053 return;
1054 }
1055 // lose the lock
1056 scan.m_accLockOp = RNIL;
1057 // continue at ACC_ABORTCONF
1058 }
1059
1060 /*
1061 * Lock failed (after delay) in ACC. Probably means somebody ahead of
1062 * us in lock queue deleted the tuple.
1063 */
1064 void
execACCKEYREF(Signal * signal)1065 Dbtux::execACCKEYREF(Signal* signal)
1066 {
1067 jamEntry();
1068 ScanOpPtr scanPtr;
1069 scanPtr.i = signal->theData[0];
1070 ndbrequire(c_scanOpPool.getValidPtr(scanPtr));
1071 ScanOp& scan = *scanPtr.p;
1072 #ifdef VM_TRACE
1073 if (debugFlags & (DebugScan | DebugLock)) {
1074 tuxDebugOut << "Lock refused scan " << scanPtr.i << " " << scan << endl;
1075 }
1076 #endif
1077 ndbrequire(scan.m_lockwait && scan.m_accLockOp != RNIL);
1078 scan.m_lockwait = false;
1079 if (scan.m_state != ScanOp::Aborting) {
1080 jam();
1081 // release the operation
1082 AccLockReq* const lockReq = (AccLockReq*)signal->getDataPtrSend();
1083 lockReq->returnCode = RNIL;
1084 lockReq->requestInfo = AccLockReq::Abort;
1085 lockReq->accOpPtr = scan.m_accLockOp;
1086 EXECUTE_DIRECT(DBACC, GSN_ACC_LOCKREQ, signal, AccLockReq::UndoSignalLength);
1087 jamEntry();
1088 ndbrequire(lockReq->returnCode == AccLockReq::Success);
1089 scan.m_accLockOp = RNIL;
1090 // scan position should already have been moved (assert only)
1091 if (scan.m_state == ScanOp::Blocked) {
1092 jam();
1093 // can happen when Dropping
1094 #ifdef VM_TRACE
1095 const Frag& frag = *c_fragPool.getPtr(scan.m_fragPtrI);
1096 const Index& index = *c_indexPool.getPtr(frag.m_indexId);
1097 ndbassert(index.m_state != Index::Online);
1098 #endif
1099 scan.m_state = ScanOp::Next;
1100 }
1101 // LQH has the ball
1102 return;
1103 }
1104 // lose the lock
1105 scan.m_accLockOp = RNIL;
1106 // continue at ACC_ABORTCONF
1107 }
1108
1109 /*
1110 * Received when scan is closing. This signal arrives after any
1111 * ACCKEYCONF or ACCKEYREF which may have been in job buffer.
1112 */
1113 void
execACC_ABORTCONF(Signal * signal)1114 Dbtux::execACC_ABORTCONF(Signal* signal)
1115 {
1116 jamEntry();
1117 ScanOpPtr scanPtr;
1118 scanPtr.i = signal->theData[0];
1119 ndbrequire(c_scanOpPool.getValidPtr(scanPtr));
1120 ScanOp& scan = *scanPtr.p;
1121 #ifdef VM_TRACE
1122 if (debugFlags & (DebugScan | DebugLock)) {
1123 tuxDebugOut << "ACC_ABORTCONF scan " << scanPtr.i << " " << scan << endl;
1124 }
1125 #endif
1126 c_lqh->setup_scan_pointers(scan.m_userPtr);
1127 ndbrequire(scan.m_state == ScanOp::Aborting);
1128 // most likely we are still in lock wait
1129 if (scan.m_lockwait) {
1130 jam();
1131 scan.m_lockwait = false;
1132 scan.m_accLockOp = RNIL;
1133 }
1134 scanClose(signal, scanPtr);
1135 }
1136
1137 /*
1138 * Find start position for single range scan.
1139 */
1140 void
scanFirst(ScanOpPtr scanPtr,Frag & frag,const Index & index)1141 Dbtux::scanFirst(ScanOpPtr scanPtr, Frag& frag, const Index& index)
1142 {
1143 ScanOp& scan = *scanPtr.p;
1144 // scan direction 0, 1
1145 const unsigned idir = c_ctx.descending;
1146 // set up bound from segmented memory
1147 const ScanBound& scanBound = scan.m_scanBound[idir];
1148 KeyDataC searchBoundData(index.m_keySpec, true);
1149 KeyBoundC searchBound(searchBoundData);
1150 unpackBound(c_ctx.c_searchKey, scanBound, searchBound);
1151
1152 KeyDataArray *key_data = new (&c_ctx.searchKeyDataArray)
1153 KeyDataArray();
1154 key_data->init_bound(searchBound, scanBound.m_cnt);
1155 KeyBoundArray *searchBoundArray = new (&c_ctx.searchKeyBoundArray)
1156 KeyBoundArray(&index.m_keySpec,
1157 &c_ctx.searchKeyDataArray,
1158 scanBound.m_side);
1159
1160 TreePos treePos;
1161 searchToScan(frag, idir, *searchBoundArray, treePos);
1162 if (likely(treePos.m_loc != NullTupLoc))
1163 {
1164 scan.m_scanPos = treePos;
1165 // link the scan to node found
1166 NodeHandle node(frag);
1167 selectNode(c_ctx, node, treePos.m_loc);
1168 linkScan(node, scanPtr);
1169 if (likely(treePos.m_dir == 3))
1170 {
1171 jamDebug();
1172 // check upper bound
1173 TreeEnt ent = node.getEnt(treePos.m_pos);
1174 const TupLoc tupLoc = ent.m_tupLoc;
1175 jamDebug();
1176 c_tup->prepare_scan_tux_TUPKEYREQ(tupLoc.getPageId(),
1177 tupLoc.getPageOffset());
1178 jamDebug();
1179 if (unlikely(scanCheck(scan, ent)))
1180 {
1181 jamDebug();
1182 c_ctx.m_current_ent = ent;
1183 scan.m_state = ScanOp::Current;
1184 }
1185 else
1186 {
1187 jamDebug();
1188 scan.m_state = ScanOp::Last;
1189 }
1190 } else {
1191 jamDebug();
1192 scan.m_state = ScanOp::Next;
1193 }
1194 } else {
1195 jamDebug();
1196 scan.m_state = ScanOp::Last;
1197 }
1198 }
1199
1200 /*
1201 * Look for entry to return as scan result.
1202 */
1203 void
scanFind(ScanOpPtr scanPtr,Frag & frag)1204 Dbtux::scanFind(ScanOpPtr scanPtr, Frag& frag)
1205 {
1206 ScanOp& scan = *scanPtr.p;
1207 Uint32 scan_state = scan.m_state;
1208 ndbassert(scan_state == ScanOp::Current || scan_state == ScanOp::Next);
1209 while (1)
1210 {
1211 jamDebug();
1212 if (scan_state == ScanOp::Next)
1213 {
1214 scan_state = scanNext(scanPtr, false, frag);
1215 }
1216 else
1217 {
1218 jamDebug();
1219 ndbrequire(scan_state == ScanOp::Current);
1220 const TreePos treePos = scan.m_scanPos;
1221 NodeHandle node(frag);
1222 selectNode(c_ctx, node, treePos.m_loc);
1223 TreeEnt ent = node.getEnt(treePos.m_pos);
1224 const TupLoc tupLoc = ent.m_tupLoc;
1225 c_tup->prepare_scan_tux_TUPKEYREQ(tupLoc.getPageId(),
1226 tupLoc.getPageOffset());
1227 c_ctx.m_current_ent = ent;
1228 }
1229 Uint32 statOpPtrI = scan.m_statOpPtrI;
1230 if (likely(scan_state == ScanOp::Current))
1231 {
1232 jamDebug();
1233 const TreeEnt ent = c_ctx.m_current_ent;
1234 if (likely(statOpPtrI == RNIL))
1235 {
1236 if (likely(scanVisible(scan, ent)))
1237 {
1238 jamDebug();
1239 scan.m_state = ScanOp::Found;
1240 scan.m_scanEnt = ent;
1241 break;
1242 }
1243 }
1244 else
1245 {
1246 StatOpPtr statPtr;
1247 statPtr.i = statOpPtrI;
1248 c_statOpPool.getPtr(statPtr);
1249 // report row to stats, returns true if a sample is available
1250 int ret = statScanAddRow(statPtr, ent);
1251 if (ret == 1)
1252 {
1253 jam();
1254 scan.m_state = ScanOp::Found;
1255 // may not access non-pseudo cols but must return valid ent
1256 scan.m_scanEnt = ent;
1257 break;
1258 }
1259 else if (ret == 2)
1260 {
1261 // take a break
1262 jam();
1263 scan.m_state = ScanOp::Next;
1264 scan.m_scanEnt = ent;
1265 break;
1266 }
1267 }
1268 }
1269 else
1270 {
1271 jamDebug();
1272 break;
1273 }
1274 scan.m_state = scan_state = ScanOp::Next;
1275 }
1276 }
1277
1278 /*
1279 * Move to next entry. The scan is already linked to some node. When
1280 * we leave, if an entry was found, it will be linked to a possibly
1281 * different node. The scan has a position, and a direction which tells
1282 * from where we came to this position. This is one of (all comments
1283 * are in terms of ascending scan):
1284 *
1285 * 0 - up from left child (scan this node next)
1286 * 1 - up from right child (proceed to parent)
1287 * 2 - up from root (the scan ends)
1288 * 3 - left to right within node (at end set state 5)
1289 * 4 - down from parent (proceed to left child)
1290 * 5 - at node end proceed to right child (state becomes 4)
1291 *
1292 * If an entry was found, scan direction is 3. Therefore tree
1293 * re-organizations need not worry about scan direction.
1294 *
1295 * This method is also used to move a scan when its entry is removed
1296 * (see moveScanList). If the scan is Blocked, we check if it remains
1297 * Blocked on a different version of the tuple. Otherwise the tuple is
1298 * lost and state becomes Current.
1299 */
1300 Uint32
scanNext(ScanOpPtr scanPtr,bool fromMaintReq,Frag & frag)1301 Dbtux::scanNext(ScanOpPtr scanPtr, bool fromMaintReq, Frag& frag)
1302 {
1303 ScanOp& scan = *scanPtr.p;
1304 // cannot be moved away from tuple we have locked
1305 #if defined VM_TRACE || defined ERROR_INSERT
1306 ndbrequire(fromMaintReq || scan.m_state != ScanOp::Locked);
1307 #else
1308 ndbassert(fromMaintReq || scan.m_state != ScanOp::Locked);
1309 #endif
1310 // scan direction
1311 const unsigned idir = scan.m_descending; // 0, 1
1312 const int jdir = 1 - 2 * (int)idir; // 1, -1
1313 // use copy of position
1314 TreePos pos = scan.m_scanPos;
1315 Uint32 scan_state = scan.m_state;
1316 // get and remember original node
1317 NodeHandle origNode(frag);
1318 selectNode(c_ctx, origNode, pos.m_loc);
1319 ndbassert(islinkScan(origNode, scanPtr));
1320 if (unlikely(scan_state == ScanOp::Locked))
1321 {
1322 // bug#32040 - no fix, just unlock and continue
1323 jam();
1324 if (scan.m_accLockOp != RNIL)
1325 {
1326 jam();
1327 Signal* signal = c_signal_bug32040;
1328 AccLockReq* const lockReq = (AccLockReq*)signal->getDataPtrSend();
1329 lockReq->returnCode = RNIL;
1330 lockReq->requestInfo = AccLockReq::Abort;
1331 lockReq->accOpPtr = scan.m_accLockOp;
1332 EXECUTE_DIRECT(DBACC, GSN_ACC_LOCKREQ, signal, AccLockReq::UndoSignalLength);
1333 jamEntry();
1334 ndbrequire(lockReq->returnCode == AccLockReq::Success);
1335 scan.m_accLockOp = RNIL;
1336 scan.m_lockwait = false;
1337 }
1338 scan.m_state = ScanOp::Next;
1339 }
1340 // current node in loop
1341 NodeHandle node = origNode;
1342 // copy of entry found
1343 TreeEnt ent;
1344 TupLoc loc;
1345 Uint32 occup;
1346 do
1347 {
1348 jamDebug();
1349 Uint32 dir = pos.m_dir;
1350 {
1351 /* Search in node we are currently scanning. */
1352 const Uint32 node_occup = node.getOccup();
1353 const Uint32 node_pos = pos.m_pos;
1354 // advance position - becomes large (> occup) if 0 and descending
1355 const Uint32 new_node_pos = node_pos + jdir;
1356 if (likely(dir == 3))
1357 {
1358 /**
1359 * We are currently scanning inside a node, proceed until we
1360 * have scanned all items in this node.
1361 */
1362 if (likely(new_node_pos < node_occup))
1363 {
1364 jamDebug();
1365 ent = node.getEnt(new_node_pos);
1366 const TupLoc tupLoc = ent.m_tupLoc;
1367 pos.m_pos = new_node_pos;
1368 c_tup->prepare_scan_tux_TUPKEYREQ(tupLoc.getPageId(),
1369 tupLoc.getPageOffset());
1370 if (unlikely(!scanCheck(scan, ent)))
1371 {
1372 /**
1373 * We have reached the end of the scan, this row is outside
1374 * the range to scan.
1375 */
1376 jamDebug();
1377 pos.m_loc = NullTupLoc;
1378 goto found_none;
1379 }
1380 goto found;
1381 }
1382 /* Start search for next node. */
1383 if (likely(node_occup != 0))
1384 {
1385 pos.m_dir = dir = 5;
1386 }
1387 }
1388 }
1389 do
1390 {
1391 /* Search for a node that is at the leaf level */
1392 if (likely(dir == 5))
1393 {
1394 // at node end proceed to right child
1395 jamDebug();
1396 loc = node.getLink(1 - idir);
1397 if (loc != NullTupLoc)
1398 {
1399 jamDebug();
1400 pos.m_loc = loc;
1401 pos.m_dir = dir = 4; // down from parent as usual
1402 selectNode(c_ctx, node, loc);
1403 }
1404 else
1405 {
1406 // pretend we came from right child
1407 pos.m_dir = dir = 1 - idir;
1408 break;
1409 }
1410 }
1411 while (likely(dir == 4))
1412 {
1413 // coming down from parent proceed to left child
1414 jamDebug();
1415 loc = node.getLink(idir);
1416 if (loc != NullTupLoc)
1417 {
1418 jamDebug();
1419 pos.m_loc = loc;
1420 selectNode(c_ctx, node, loc);
1421 continue;
1422 }
1423 // pretend we came from left child
1424 pos.m_dir = dir = idir;
1425 break;
1426 }
1427 } while (0);
1428 do
1429 {
1430 /* Search for a non-empty node at leaf level to scan. */
1431 occup = node.getOccup();
1432 if (unlikely(occup == 0))
1433 {
1434 jamDebug();
1435 ndbrequire(fromMaintReq);
1436 // move back to parent - see comment in treeRemoveInner
1437 loc = pos.m_loc = node.getLink(2);
1438 pos.m_dir = dir = node.getSide();
1439 }
1440 else if (dir == idir)
1441 {
1442 // coming up from left child scan current node
1443 jamDebug();
1444 pos.m_pos = idir == 0 ? Uint32(~0) : occup;
1445 pos.m_dir = 3;
1446 break;
1447 }
1448 else
1449 {
1450 ndbrequire(dir == 1 - idir);
1451 // coming up from right child proceed to parent
1452 jamDebug();
1453 loc = pos.m_loc = node.getLink(2);
1454 pos.m_dir = dir = node.getSide();
1455 }
1456 if (unlikely(dir == 2))
1457 {
1458 // coming up from root ends the scan
1459 jamDebug();
1460 pos.m_loc = NullTupLoc;
1461 goto found_none;
1462 }
1463 selectNode(c_ctx, node, loc);
1464 } while (true);
1465 } while (true);
1466 found:
1467 // copy back position
1468 scan.m_scanPos = pos;
1469 // relink
1470 ndbassert(pos.m_dir == 3);
1471 ndbassert(pos.m_loc == node.m_loc);
1472 if (unlikely(origNode.m_loc != node.m_loc))
1473 {
1474 jamDebug();
1475 unlinkScan(origNode, scanPtr);
1476 linkScan(node, scanPtr);
1477 }
1478 if (likely(scan.m_state != ScanOp::Blocked))
1479 {
1480 c_ctx.m_current_ent = ent;
1481 scan.m_state = ScanOp::Current;
1482 }
1483 else
1484 {
1485 jamDebug();
1486 ndbrequire(fromMaintReq);
1487 TreeEnt& scanEnt = scan.m_scanEnt;
1488 ndbrequire(scanEnt.m_tupLoc != NullTupLoc);
1489 if (scanEnt.eqtuple(ent))
1490 {
1491 // remains blocked on another version
1492 scanEnt = ent;
1493 } else {
1494 jamDebug();
1495 scanEnt.m_tupLoc = NullTupLoc;
1496 c_ctx.m_current_ent = ent;
1497 scan.m_state = ScanOp::Current;
1498 }
1499 }
1500 return scan.m_state;
1501
1502 found_none:
1503 jamDebug();
1504 scan.m_scanPos = pos;
1505 unlinkScan(origNode, scanPtr);
1506 scan.m_state = ScanOp::Last;
1507 return ScanOp::Last;
1508
1509 }
1510
1511 /*
1512 * Check end key. Return true if scan is still within range.
1513 *
1514 * Error handling: If scan error code has been set, return false at
1515 * once. This terminates the scan and also avoids kernel crash on
1516 * invalid data.
1517 */
1518 inline
1519 bool
scanCheck(ScanOp & scan,TreeEnt ent)1520 Dbtux::scanCheck(ScanOp& scan, TreeEnt ent)
1521 {
1522 jamDebug();
1523 Uint32 scanBoundCnt = c_ctx.scanBoundCnt;
1524 int ret = 0;
1525 if (likely(scanBoundCnt != 0))
1526 {
1527 const Uint32 tupVersion = ent.m_tupVersion;
1528 Uint32* const outputBuffer = c_ctx.c_dataBuffer;
1529 const Uint32 count = c_ctx.scanBoundCnt;
1530 const Uint32* keyAttrs32 = (const Uint32*)&c_ctx.keyAttrs[0];
1531 ret = c_tup->tuxReadAttrsCurr(c_ctx.jamBuffer,
1532 keyAttrs32,
1533 count,
1534 outputBuffer,
1535 false,
1536 tupVersion);
1537 thrjamDebug(c_ctx.jamBuffer);
1538 thrjamLineDebug(c_ctx.jamBuffer, count);
1539 KeyDataArray key_data;
1540 key_data.init_poai(outputBuffer, count);
1541 // compare bound to key
1542 ret = c_ctx.searchScanBoundArray.cmp(&key_data, count, false);
1543 ndbrequire(ret != 0);
1544 const unsigned idir = c_ctx.descending;
1545 const int jdir = 1 - 2 * (int)idir;
1546 ret = (-1) * ret; // reverse for key vs bound
1547 ret = jdir * ret; // reverse for descending scan
1548 }
1549 return (ret <= 0);
1550 }
1551
1552 /*
1553 * Check if an entry is visible to the scan.
1554 *
1555 * There is a special check to never accept same tuple twice in a row.
1556 * This is faster than asking TUP. It also fixes some special cases
1557 * which are not analyzed or handled yet.
1558 *
1559 * Error handling: If scan error code has been set, return false since
1560 * no new result can be returned to LQH. The scan will then look for
1561 * next result and terminate via scanCheck():
1562 */
1563 bool
scanVisible(ScanOp & scan,TreeEnt ent)1564 Dbtux::scanVisible(ScanOp& scan, TreeEnt ent)
1565 {
1566 Uint32 opPtrI = c_tup->get_tuple_operation_ptr_i();
1567 // check for same tuple twice in row
1568 if (unlikely(scan.m_scanEnt.m_tupLoc == ent.m_tupLoc))
1569 {
1570 jamDebug();
1571 return false;
1572 }
1573 if (likely(opPtrI == RNIL))
1574 {
1575 return true;
1576 }
1577 Uint32 tupVersion = ent.m_tupVersion;
1578 Uint32 transId1 = scan.m_transId1;
1579 Uint32 transId2 = scan.m_transId2;
1580 bool dirty = scan.m_readCommitted;
1581 Uint32 savePointId = scan.m_savePointId;
1582 bool ret = c_tup->tuxQueryTh(opPtrI,
1583 tupVersion,
1584 transId1,
1585 transId2,
1586 dirty,
1587 savePointId);
1588 jamEntryDebug();
1589 return ret;
1590 }
1591
1592 /*
1593 * Finish closing of scan and send conf. Any lock wait has been done
1594 * already.
1595 *
1596 * Error handling: Every scan ends here. If error code has been set,
1597 * send a REF.
1598 */
1599 void
scanClose(Signal * signal,ScanOpPtr scanPtr)1600 Dbtux::scanClose(Signal* signal, ScanOpPtr scanPtr)
1601 {
1602 ScanOp& scan = *scanPtr.p;
1603 ndbrequire(! scan.m_lockwait && scan.m_accLockOp == RNIL);
1604 // unlock all not unlocked by LQH
1605 if (! scan.m_accLockOps.isEmpty()) {
1606 jam();
1607 abortAccLockOps(signal, scanPtr);
1608 }
1609 Uint32 blockNo = refToMain(scanPtr.p->m_userRef);
1610 if (scanPtr.p->m_errorCode == 0) {
1611 jamDebug();
1612 // send conf
1613 NextScanConf* const conf = (NextScanConf*)signal->getDataPtrSend();
1614 conf->scanPtr = scanPtr.p->m_userPtr;
1615 conf->accOperationPtr = RNIL;
1616 conf->fragId = RNIL;
1617 releaseScanOp(scanPtr);
1618 signal->setLength(NextScanConf::SignalLengthNoTuple);
1619 c_lqh->exec_next_scan_conf(signal);
1620 return;
1621 } else {
1622 // send ref
1623 NextScanRef* ref = (NextScanRef*)signal->getDataPtr();
1624 ref->scanPtr = scanPtr.p->m_userPtr;
1625 ref->accOperationPtr = RNIL;
1626 ref->fragId = RNIL;
1627 ref->errorCode = scanPtr.p->m_errorCode;
1628 releaseScanOp(scanPtr);
1629 EXECUTE_DIRECT(blockNo,
1630 GSN_NEXT_SCANREF,
1631 signal,
1632 NextScanRef::SignalLength);
1633 return;
1634 }
1635 }
1636
1637 void
abortAccLockOps(Signal * signal,ScanOpPtr scanPtr)1638 Dbtux::abortAccLockOps(Signal* signal, ScanOpPtr scanPtr)
1639 {
1640 ScanOp& scan = *scanPtr.p;
1641 #ifdef VM_TRACE
1642 if (debugFlags & (DebugScan | DebugLock)) {
1643 tuxDebugOut << "Abort locks in scan " << scanPtr.i << " " << scan << endl;
1644 }
1645 #endif
1646 Local_ScanLock_fifo list(c_scanLockPool, scan.m_accLockOps);
1647 ScanLockPtr lockPtr;
1648 while (list.first(lockPtr)) {
1649 jam();
1650 AccLockReq* const lockReq = (AccLockReq*)signal->getDataPtrSend();
1651 lockReq->returnCode = RNIL;
1652 lockReq->requestInfo = AccLockReq::Abort;
1653 lockReq->accOpPtr = lockPtr.p->m_accLockOp;
1654 EXECUTE_DIRECT(DBACC, GSN_ACC_LOCKREQ, signal, AccLockReq::UndoSignalLength);
1655 jamEntry();
1656 ndbrequire(lockReq->returnCode == AccLockReq::Success);
1657 list.remove(lockPtr);
1658 c_scanLockPool.release(lockPtr);
1659 }
1660 checkPoolShrinkNeed(DBTUX_SCAN_LOCK_TRANSIENT_POOL_INDEX,
1661 c_scanLockPool);
1662 }
1663
1664 void
addAccLockOp(ScanOpPtr scanPtr,Uint32 accLockOp)1665 Dbtux::addAccLockOp(ScanOpPtr scanPtr, Uint32 accLockOp)
1666 {
1667 ScanOp& scan = *scanPtr.p;
1668 #ifdef VM_TRACE
1669 if (debugFlags & (DebugScan | DebugLock)) {
1670 tuxDebugOut << "Add lock " << hex << accLockOp << dec
1671 << " to scan " << scanPtr.i << " " << scan << endl;
1672 }
1673 #endif
1674 Local_ScanLock_fifo list(c_scanLockPool, scan.m_accLockOps);
1675 ScanLockPtr lockPtr;
1676 #ifdef VM_TRACE
1677 list.first(lockPtr);
1678 while (lockPtr.i != RNIL) {
1679 ndbrequire(lockPtr.p->m_accLockOp != accLockOp);
1680 list.next(lockPtr);
1681 }
1682 #endif
1683 lockPtr.i = c_freeScanLock;
1684 ndbrequire(c_scanLockPool.getValidPtr(lockPtr));
1685 c_freeScanLock = RNIL;
1686 ndbrequire(accLockOp != RNIL);
1687 lockPtr.p->m_accLockOp = accLockOp;
1688 list.addLast(lockPtr);
1689 }
1690
1691 void
removeAccLockOp(ScanOpPtr scanPtr,Uint32 accLockOp)1692 Dbtux::removeAccLockOp(ScanOpPtr scanPtr, Uint32 accLockOp)
1693 {
1694 ScanOp& scan = *scanPtr.p;
1695 #ifdef VM_TRACE
1696 if (debugFlags & (DebugScan | DebugLock)) {
1697 tuxDebugOut << "Remove lock " << hex << accLockOp << dec
1698 << " from scan " << scanPtr.i << " " << scan << endl;
1699 }
1700 #endif
1701 Local_ScanLock_fifo list(c_scanLockPool, scan.m_accLockOps);
1702 ScanLockPtr lockPtr;
1703 list.first(lockPtr);
1704 while (lockPtr.i != RNIL) {
1705 if (lockPtr.p->m_accLockOp == accLockOp) {
1706 jam();
1707 break;
1708 }
1709 list.next(lockPtr);
1710 }
1711 ndbrequire(lockPtr.i != RNIL);
1712 list.remove(lockPtr);
1713 c_scanLockPool.release(lockPtr);
1714 checkPoolShrinkNeed(DBTUX_SCAN_LOCK_TRANSIENT_POOL_INDEX,
1715 c_scanLockPool);
1716 }
1717
1718 /*
1719 * Release allocated records.
1720 */
1721 void
releaseScanOp(ScanOpPtr & scanPtr)1722 Dbtux::releaseScanOp(ScanOpPtr& scanPtr)
1723 {
1724 #ifdef VM_TRACE
1725 if (debugFlags & DebugScan) {
1726 tuxDebugOut << "Release scan " << scanPtr.i << " " << *scanPtr.p << endl;
1727 }
1728 #endif
1729 Frag& frag = *c_fragPool.getPtr(scanPtr.p->m_fragPtrI);
1730 for (unsigned i = 0; i <= 1; i++) {
1731 ScanBound& scanBound = scanPtr.p->m_scanBound[i];
1732 ScanBoundBuffer::Head& head = scanBound.m_head;
1733 LocalScanBoundBuffer b(c_scanBoundPool, head);
1734 b.release();
1735 }
1736 checkPoolShrinkNeed(DBTUX_SCAN_BOUND_TRANSIENT_POOL_INDEX,
1737 c_scanBoundPool);
1738 if (unlikely(scanPtr.p->m_statOpPtrI != RNIL)) {
1739 jam();
1740 StatOpPtr statPtr;
1741 statPtr.i = scanPtr.p->m_statOpPtrI;
1742 c_statOpPool.getPtr(statPtr);
1743 c_statOpPool.release(statPtr);
1744 }
1745 // unlink from per-fragment list and release from pool
1746 frag.m_scanList.remove(scanPtr);
1747 c_scanOpPool.release(scanPtr);
1748 checkPoolShrinkNeed(DBTUX_SCAN_OPERATION_TRANSIENT_POOL_INDEX,
1749 c_scanOpPool);
1750 }
1751