1 /*
2 Copyright (c) 2012, 2021, Oracle and/or its affiliates.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 */
24
25 #define DBSPJ_C
26 #include "Dbspj.hpp"
27
28 #include <ndb_version.h>
29 #include <SectionReader.hpp>
30 #include <signaldata/LqhKey.hpp>
31 #include <signaldata/QueryTree.hpp>
32 #include <signaldata/TcKeyRef.hpp>
33 #include <signaldata/RouteOrd.hpp>
34 #include <signaldata/TransIdAI.hpp>
35 #include <signaldata/DiGetNodes.hpp>
36 #include <signaldata/DihScanTab.hpp>
37 #include <signaldata/AttrInfo.hpp>
38 #include <signaldata/CreateTab.hpp>
39 #include <signaldata/PrepDropTab.hpp>
40 #include <signaldata/DropTab.hpp>
41 #include <signaldata/AlterTab.hpp>
42 #include <signaldata/DbspjErr.hpp>
43 #include <Interpreter.hpp>
44 #include <AttributeHeader.hpp>
45 #include <AttributeDescriptor.hpp>
46 #include <KeyDescriptor.hpp>
47 #include <md5_hash.hpp>
48 #include <signaldata/TcKeyConf.hpp>
49
50 #include <signaldata/NodeFailRep.hpp>
51 #include <signaldata/ReadNodesConf.hpp>
52 #include <signaldata/SignalDroppedRep.hpp>
53
54 #define JAM_FILE_ID 479
55
56
57 #ifdef VM_TRACE
58
59 /**
60 * DEBUG options for different parts od SPJ block
61 * Comment out those part you don't want DEBUG'ed.
62 */
63 //#define DEBUG(x) ndbout << "DBSPJ: "<< x << endl;
64 //#define DEBUG_DICT(x) ndbout << "DBSPJ: "<< x << endl;
65 //#define DEBUG_LQHKEYREQ
66 //#define DEBUG_SCAN_FRAGREQ
67 #endif
68
69 /**
70 * Provide empty defs for those DEBUGs which has to be defined.
71 */
72 #if !defined(DEBUG)
73 #define DEBUG(x)
74 #endif
75
76 #if !defined(DEBUG_DICT)
77 #define DEBUG_DICT(x)
78 #endif
79
80 #define DEBUG_CRASH() ndbassert(false)
81
82 const Ptr<Dbspj::TreeNode> Dbspj::NullTreeNodePtr(0, RNIL );
83 const Dbspj::RowRef Dbspj::NullRowRef = { RNIL, GLOBAL_PAGE_SIZE_WORDS, { 0 } };
84
85
execSIGNAL_DROPPED_REP(Signal * signal)86 void Dbspj::execSIGNAL_DROPPED_REP(Signal* signal)
87 {
88 /* An incoming signal was dropped, handle it.
89 * Dropped signal really means that we ran out of
90 * long signal buffering to store its sections.
91 */
92 jamEntry();
93
94 if (!assembleDroppedFragments(signal))
95 {
96 jam();
97 return;
98 }
99
100 const SignalDroppedRep* rep = (SignalDroppedRep*) &signal->theData[0];
101 Uint32 originalGSN= rep->originalGsn;
102
103 DEBUG("SignalDroppedRep received for GSN " << originalGSN);
104
105 switch(originalGSN) {
106 case GSN_SCAN_FRAGREQ:
107 {
108 jam();
109 /* Get information necessary to send SCAN_FRAGREF back to TC */
110 // TODO : Handle dropped signal fragments
111
112 const ScanFragReq * const truncatedScanFragReq =
113 (ScanFragReq *) &rep->originalData[0];
114
115 handle_early_scanfrag_ref(signal, truncatedScanFragReq,
116 DbspjErr::OutOfSectionMemory);
117 break;
118 }
119 default:
120 jam();
121 /* Don't expect dropped signals for other GSNs
122 */
123 SimulatedBlock::execSIGNAL_DROPPED_REP(signal);
124 };
125
126 return;
127 }
128
129 inline
130 Uint32
checkTableError(Uint32 schemaVersion) const131 Dbspj::TableRecord::checkTableError(Uint32 schemaVersion) const
132 {
133 DEBUG_DICT("Dbspj::TableRecord::checkTableError"
134 << ", m_flags: " << m_flags
135 << ", m_currentSchemaVersion: " << m_currentSchemaVersion
136 << ", check schemaVersion: " << schemaVersion);
137
138 if (!get_enabled())
139 return DbspjErr::NoSuchTable;
140 if (get_dropping())
141 return DbspjErr::DropTableInProgress;
142 if (table_version_major(schemaVersion) != table_version_major(m_currentSchemaVersion))
143 return DbspjErr::WrongSchemaVersion;
144
145 return 0;
146 }
147
148 // create table prepare
execTC_SCHVERREQ(Signal * signal)149 void Dbspj::execTC_SCHVERREQ(Signal* signal)
150 {
151 jamEntry();
152 if (! assembleFragments(signal)) {
153 jam();
154 return;
155 }
156 const TcSchVerReq* req = CAST_CONSTPTR(TcSchVerReq, signal->getDataPtr());
157 const Uint32 tableId = req->tableId;
158 const Uint32 senderRef = req->senderRef;
159 const Uint32 senderData = req->senderData;
160
161 DEBUG_DICT("Dbspj::execTC_SCHVERREQ"
162 << ", tableId: " << tableId
163 << ", version: " << req->tableVersion
164 );
165
166 TableRecordPtr tablePtr;
167 tablePtr.i = tableId;
168 ptrCheckGuard(tablePtr, c_tabrecFilesize, m_tableRecord);
169
170 ndbrequire(tablePtr.p->get_prepared() == false);
171 ndbrequire(tablePtr.p->get_enabled() == false);
172 new (tablePtr.p) TableRecord(req->tableVersion);
173
174 /**
175 * NOTE: Even if there are more information, like
176 * 'tableType', 'noOfPrimaryKeys'etc available from
177 * TcSchVerReq, we do *not* store that in TableRecord.
178 * Instead this information is retrieved on demand from
179 * g_key_descriptor_pool where it is readily available.
180 * The 'contract' for consistency of this information is
181 * such that:
182 * 1) g_key_descriptor[ENTRY] will be populated *before*
183 * any blocks receiving CREATE_TAB_REQ (or equivalent).
184 * 2) g_key_descriptor[ENTRY] will be invalidated *after*
185 * all blocks sent DROP_TAB_CONF (commit)
186 * Thus, this info is consistent whenever required by SPJ.
187 */
188 TcSchVerConf * conf = (TcSchVerConf*)signal->getDataPtr();
189 conf->senderRef = reference();
190 conf->senderData = senderData;
191 sendSignal(senderRef, GSN_TC_SCHVERCONF, signal,
192 TcSchVerConf::SignalLength, JBB);
193 }//Dbspj::execTC_SCHVERREQ()
194
195 // create table commit
execTAB_COMMITREQ(Signal * signal)196 void Dbspj::execTAB_COMMITREQ(Signal* signal)
197 {
198 jamEntry();
199 const Uint32 senderData = signal->theData[0];
200 const Uint32 senderRef = signal->theData[1];
201 const Uint32 tableId = signal->theData[2];
202
203 DEBUG_DICT("Dbspj::execTAB_COMMITREQ"
204 << ", tableId: " << tableId
205 );
206
207 TableRecordPtr tablePtr;
208 tablePtr.i = tableId;
209 ptrCheckGuard(tablePtr, c_tabrecFilesize, m_tableRecord);
210
211 ndbrequire(tablePtr.p->get_prepared() == true);
212 ndbrequire(tablePtr.p->get_enabled() == false);
213 tablePtr.p->set_enabled(true);
214 tablePtr.p->set_prepared(false);
215 tablePtr.p->set_dropping(false);
216
217 signal->theData[0] = senderData;
218 signal->theData[1] = reference();
219 signal->theData[2] = tableId;
220 sendSignal(senderRef, GSN_TAB_COMMITCONF, signal, 3, JBB);
221 }//Dbspj::execTAB_COMMITREQ
222
223 void
execPREP_DROP_TAB_REQ(Signal * signal)224 Dbspj::execPREP_DROP_TAB_REQ(Signal* signal)
225 {
226 jamEntry();
227
228 PrepDropTabReq* req = (PrepDropTabReq*)signal->getDataPtr();
229 const Uint32 tableId = req->tableId;
230 const Uint32 senderRef = req->senderRef;
231 const Uint32 senderData = req->senderData;
232
233 DEBUG_DICT("Dbspj::execPREP_DROP_TAB_REQ"
234 << ", tableId: " << tableId
235 );
236
237 TableRecordPtr tablePtr;
238 tablePtr.i = tableId;
239 ptrCheckGuard(tablePtr, c_tabrecFilesize, m_tableRecord);
240
241 if (!tablePtr.p->get_enabled())
242 {
243 jam();
244 PrepDropTabRef* ref = (PrepDropTabRef*)signal->getDataPtrSend();
245 ref->senderRef = reference();
246 ref->senderData = senderData;
247 ref->tableId = tableId;
248 ref->errorCode = PrepDropTabRef::NoSuchTable;
249 sendSignal(senderRef, GSN_PREP_DROP_TAB_REF, signal,
250 PrepDropTabRef::SignalLength, JBB);
251 return;
252 }
253
254 if (tablePtr.p->get_dropping())
255 {
256 jam();
257 PrepDropTabRef* ref = (PrepDropTabRef*)signal->getDataPtrSend();
258 ref->senderRef = reference();
259 ref->senderData = senderData;
260 ref->tableId = tableId;
261 ref->errorCode = PrepDropTabRef::DropInProgress;
262 sendSignal(senderRef, GSN_PREP_DROP_TAB_REF, signal,
263 PrepDropTabRef::SignalLength, JBB);
264 return;
265 }
266
267 tablePtr.p->set_dropping(true);
268 tablePtr.p->set_prepared(false);
269
270 PrepDropTabConf* conf = (PrepDropTabConf*)signal->getDataPtrSend();
271 conf->tableId = tableId;
272 conf->senderRef = reference();
273 conf->senderData = senderData;
274 sendSignal(senderRef, GSN_PREP_DROP_TAB_CONF, signal,
275 PrepDropTabConf::SignalLength, JBB);
276 }//Dbspj::execPREP_DROP_TAB_REQ
277
278 void
execDROP_TAB_REQ(Signal * signal)279 Dbspj::execDROP_TAB_REQ(Signal* signal)
280 {
281 jamEntry();
282
283 const DropTabReq* req = (DropTabReq*)signal->getDataPtr();
284 const Uint32 tableId = req->tableId;
285 const Uint32 senderRef = req->senderRef;
286 const Uint32 senderData = req->senderData;
287 DropTabReq::RequestType rt = (DropTabReq::RequestType)req->requestType;
288
289 DEBUG_DICT("Dbspj::execDROP_TAB_REQ"
290 << ", tableId: " << tableId
291 );
292
293 TableRecordPtr tablePtr;
294 tablePtr.i = tableId;
295 ptrCheckGuard(tablePtr, c_tabrecFilesize, m_tableRecord);
296
297 if (rt == DropTabReq::OnlineDropTab){
298 if (!tablePtr.p->get_enabled()){
299 jam();
300 DropTabRef* ref = (DropTabRef*)signal->getDataPtrSend();
301 ref->senderRef = reference();
302 ref->senderData = senderData;
303 ref->tableId = tableId;
304 ref->errorCode = DropTabRef::NoSuchTable;
305 sendSignal(senderRef, GSN_DROP_TAB_REF, signal,
306 DropTabRef::SignalLength, JBB);
307 return;
308 }
309 if (!tablePtr.p->get_dropping()){
310 jam();
311 DropTabRef* ref = (DropTabRef*)signal->getDataPtrSend();
312 ref->senderRef = reference();
313 ref->senderData = senderData;
314 ref->tableId = tableId;
315 ref->errorCode = DropTabRef::DropWoPrep;
316 sendSignal(senderRef, GSN_DROP_TAB_REF, signal,
317 DropTabRef::SignalLength, JBB);
318 return;
319 }
320 }
321
322 tablePtr.p->set_enabled(false);
323 tablePtr.p->set_prepared(false);
324 tablePtr.p->set_dropping(false);
325
326 DropTabConf * conf = (DropTabConf*)signal->getDataPtrSend();
327 conf->tableId = tableId;
328 conf->senderRef = reference();
329 conf->senderData = senderData;
330 sendSignal(senderRef, GSN_DROP_TAB_CONF, signal,
331 PrepDropTabConf::SignalLength, JBB);
332 }//Dbspj::execDROP_TAB_REQ
333
334 void
execALTER_TAB_REQ(Signal * signal)335 Dbspj::execALTER_TAB_REQ(Signal* signal)
336 {
337 jamEntry();
338
339 const AlterTabReq* req = (const AlterTabReq*)signal->getDataPtr();
340 const Uint32 tableId = req->tableId;
341 const Uint32 senderRef = req->senderRef;
342 const Uint32 senderData = req->senderData;
343 const Uint32 tableVersion = req->tableVersion;
344 const Uint32 newTableVersion = req->newTableVersion;
345 AlterTabReq::RequestType requestType =
346 (AlterTabReq::RequestType) req->requestType;
347
348 DEBUG_DICT("Dbspj::execALTER_TAB_REQ"
349 << ", tableId: " << tableId
350 << ", version: " << tableVersion << " --> " << newTableVersion
351 );
352
353 TableRecordPtr tablePtr;
354 tablePtr.i = tableId;
355 ptrCheckGuard(tablePtr, c_tabrecFilesize, m_tableRecord);
356
357 switch (requestType) {
358 case AlterTabReq::AlterTablePrepare:
359 jam();
360 break;
361 case AlterTabReq::AlterTableRevert:
362 jam();
363 tablePtr.p->m_currentSchemaVersion = tableVersion;
364 break;
365 case AlterTabReq::AlterTableCommit:
366 jam();
367 tablePtr.p->m_currentSchemaVersion = newTableVersion;
368 break;
369 default:
370 ndbrequire(false);
371 break;
372 }
373
374 AlterTabConf* conf = (AlterTabConf*)signal->getDataPtrSend();
375 conf->senderRef = reference();
376 conf->senderData = senderData;
377 conf->connectPtr = RNIL;
378 sendSignal(senderRef, GSN_ALTER_TAB_CONF, signal,
379 AlterTabConf::SignalLength, JBB);
380 }//Dbspj::execALTER_TAB_REQ
381
382 /** A noop for now.*/
execREAD_CONFIG_REQ(Signal * signal)383 void Dbspj::execREAD_CONFIG_REQ(Signal* signal)
384 {
385 jamEntry();
386 const ReadConfigReq req =
387 *reinterpret_cast<const ReadConfigReq*>(signal->getDataPtr());
388
389 Pool_context pc;
390 pc.m_block = this;
391
392 DEBUG("execREAD_CONFIG_REQ");
393 DEBUG("sizeof(Request): " << sizeof(Request) <<
394 " sizeof(TreeNode): " << sizeof(TreeNode));
395
396 m_arenaAllocator.init(1024, RT_SPJ_ARENA_BLOCK, pc);
397 m_request_pool.arena_pool_init(&m_arenaAllocator, RT_SPJ_REQUEST, pc);
398 m_treenode_pool.arena_pool_init(&m_arenaAllocator, RT_SPJ_TREENODE, pc);
399 m_scanfraghandle_pool.arena_pool_init(&m_arenaAllocator, RT_SPJ_SCANFRAG, pc);
400 m_lookup_request_hash.setSize(16);
401 m_scan_request_hash.setSize(16);
402 void* ptr = m_ctx.m_mm.get_memroot();
403 m_page_pool.set((RowPage*)ptr, (Uint32)~0);
404
405 Record_info ri;
406 Dependency_map::createRecordInfo(ri, RT_SPJ_DATABUFFER);
407 m_dependency_map_pool.init(&m_arenaAllocator, ri, pc);
408
409 {
410 const ndb_mgm_configuration_iterator * p =
411 m_ctx.m_config.getOwnConfigIterator();
412 ndbrequire(p != 0);
413
414 ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_SPJ_TABLE, &c_tabrecFilesize));
415 }
416 m_tableRecord = (TableRecord*)allocRecord("TableRecord",
417 sizeof(TableRecord),
418 c_tabrecFilesize);
419
420 TableRecordPtr tablePtr;
421 for (tablePtr.i = 0; tablePtr.i < c_tabrecFilesize; tablePtr.i++) {
422 ptrAss(tablePtr, m_tableRecord);
423 new (tablePtr.p) TableRecord;
424 }//for
425
426 ReadConfigConf* const conf =
427 reinterpret_cast<ReadConfigConf*>(signal->getDataPtrSend());
428 conf->senderRef = reference();
429 conf->senderData = req.senderData;
430
431 sendSignal(req.senderRef, GSN_READ_CONFIG_CONF, signal,
432 ReadConfigConf::SignalLength, JBB);
433 }//Dbspj::execREAD_CONF_REQ()
434
435 static Uint32 f_STTOR_REF = 0;
436
execSTTOR(Signal * signal)437 void Dbspj::execSTTOR(Signal* signal)
438 {
439 //#define UNIT_TEST_DATABUFFER2
440
441 jamEntry();
442 /* START CASE */
443 const Uint16 tphase = signal->theData[1];
444 f_STTOR_REF = signal->getSendersBlockRef();
445
446 if (tphase == 1)
447 {
448 jam();
449 signal->theData[0] = 0;
450 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 1000, 1);
451 }
452
453 if (tphase == 4)
454 {
455 jam();
456
457 signal->theData[0] = reference();
458 sendSignal(NDBCNTR_REF, GSN_READ_NODESREQ, signal, 1, JBB);
459 return;
460 }
461
462 sendSTTORRY(signal);
463
464 #ifdef UNIT_TEST_DATABUFFER2
465 if (tphase == 120)
466 {
467 ndbout_c("basic test of ArenaPool / DataBuffer2");
468
469 for (Uint32 i = 0; i<100; i++)
470 {
471 ArenaHead ah;
472 if (!m_arenaAllocator.seize(ah))
473 {
474 ndbout_c("Failed to allocate arena");
475 break;
476 }
477
478 ndbout_c("*** LOOP %u", i);
479 Uint32 sum = 0;
480 Dependency_map::Head head;
481 LocalArenaPoolImpl pool(ah, m_dependency_map_pool);
482 for (Uint32 j = 0; j<100; j++)
483 {
484 Uint32 sz = rand() % 1000;
485 if (0)
486 ndbout_c("adding %u", sz);
487 Local_dependency_map list(pool, head);
488 for (Uint32 i = 0; i<sz; i++)
489 signal->theData[i] = sum + i;
490 list.append(signal->theData, sz);
491 sum += sz;
492 }
493
494 {
495 ndbrequire(head.getSize() == sum);
496 Local_dependency_map list(pool, head);
497 Dependency_map::ConstDataBufferIterator it;
498 Uint32 cnt = 0;
499 for (list.first(it); !it.isNull(); list.next(it))
500 {
501 ndbrequire(* it.data == cnt);
502 cnt++;
503 }
504
505 ndbrequire(cnt == sum);
506 }
507
508 Resource_limit rl;
509 if (m_ctx.m_mm.get_resource_limit(7, rl))
510 {
511 ndbout_c("Resource %d min: %d max: %d curr: %d",
512 7, rl.m_min, rl.m_max, rl.m_curr);
513 }
514
515 {
516 ndbout_c("release map");
517 Local_dependency_map list(pool, head);
518 list.release();
519 }
520
521 ndbout_c("release all");
522 m_arenaAllocator.release(ah);
523 ndbout_c("*** LOOP %u sum: %u", i, sum);
524 }
525 }
526 #endif
527 }//Dbspj::execSTTOR()
528
529 void
sendSTTORRY(Signal * signal)530 Dbspj::sendSTTORRY(Signal* signal)
531 {
532 signal->theData[0] = 0;
533 signal->theData[1] = 0; /* BLOCK CATEGORY */
534 signal->theData[2] = 0; /* SIGNAL VERSION NUMBER */
535 signal->theData[3] = 4;
536 #ifdef UNIT_TEST_DATABUFFER2
537 signal->theData[4] = 120; /* Start phase end*/
538 #else
539 signal->theData[4] = 255;
540 #endif
541 signal->theData[5] = 255;
542 sendSignal(f_STTOR_REF, GSN_STTORRY, signal, 6, JBB);
543 }
544
545 void
execREAD_NODESCONF(Signal * signal)546 Dbspj::execREAD_NODESCONF(Signal* signal)
547 {
548 jamEntry();
549
550 ReadNodesConf * const conf = (ReadNodesConf *)signal->getDataPtr();
551
552 if (getNodeState().getNodeRestartInProgress())
553 {
554 jam();
555 c_alive_nodes.assign(NdbNodeBitmask::Size, conf->startedNodes);
556 c_alive_nodes.set(getOwnNodeId());
557 }
558 else
559 {
560 jam();
561 c_alive_nodes.assign(NdbNodeBitmask::Size, conf->startingNodes);
562 NdbNodeBitmask tmp;
563 tmp.assign(NdbNodeBitmask::Size, conf->startedNodes);
564 c_alive_nodes.bitOR(tmp);
565 }
566
567 sendSTTORRY(signal);
568 }
569
570 void
execINCL_NODEREQ(Signal * signal)571 Dbspj::execINCL_NODEREQ(Signal* signal)
572 {
573 jamEntry();
574 const Uint32 senderRef = signal->theData[0];
575 const Uint32 nodeId = signal->theData[1];
576
577 ndbrequire(!c_alive_nodes.get(nodeId));
578 c_alive_nodes.set(nodeId);
579
580 signal->theData[0] = nodeId;
581 signal->theData[1] = reference();
582 sendSignal(senderRef, GSN_INCL_NODECONF, signal, 2, JBB);
583 }
584
585 void
execNODE_FAILREP(Signal * signal)586 Dbspj::execNODE_FAILREP(Signal* signal)
587 {
588 jamEntry();
589
590 const NodeFailRep * rep = (NodeFailRep*)signal->getDataPtr();
591 NdbNodeBitmask failed;
592 failed.assign(NdbNodeBitmask::Size, rep->theNodes);
593
594 c_alive_nodes.bitANDC(failed);
595
596 /* Clean up possibly fragmented signals being received or sent */
597 for (Uint32 node = 1; node < MAX_NDB_NODES; node++)
598 {
599 if (failed.get(node))
600 {
601 jam();
602 simBlockNodeFailure(signal, node);
603 }//if
604 }//for
605
606 signal->theData[0] = 1;
607 signal->theData[1] = 0;
608 failed.copyto(NdbNodeBitmask::Size, signal->theData + 2);
609 sendSignal(reference(), GSN_CONTINUEB, signal, 2 + NdbNodeBitmask::Size,
610 JBB);
611 }
612
613 void
execAPI_FAILREQ(Signal * signal)614 Dbspj::execAPI_FAILREQ(Signal* signal)
615 {
616 jamEntry();
617 Uint32 failedApiNode = signal->theData[0];
618 Uint32 ref = signal->theData[1];
619
620 /**
621 * We only need to care about lookups
622 * as SCAN's are aborted by DBTC
623 *
624 * As SPJ does not receive / send fragmented signals
625 * directly to API nodes, simBlockNodeFailure()
626 * should not really be required - assert this.
627 */
628 Uint32 elementsCleaned = simBlockNodeFailure(signal, failedApiNode);
629 ndbassert(elementsCleaned == 0); // As SPJ has no fragmented API signals
630 (void) elementsCleaned; // Avoid compiler error
631
632 signal->theData[0] = failedApiNode;
633 signal->theData[1] = reference();
634 sendSignal(ref, GSN_API_FAILCONF, signal, 2, JBB);
635 }
636
637 void
execCONTINUEB(Signal * signal)638 Dbspj::execCONTINUEB(Signal* signal)
639 {
640 jamEntry();
641 switch(signal->theData[0]) {
642 case 0:
643 releaseGlobal(signal);
644 return;
645 case 1:
646 nodeFail_checkRequests(signal);
647 return;
648 case 2:
649 nodeFail_checkRequests(signal);
650 return;
651 }
652
653 ndbrequire(false);
654 }
655
656 void
nodeFail_checkRequests(Signal * signal)657 Dbspj::nodeFail_checkRequests(Signal* signal)
658 {
659 jam();
660 const Uint32 type = signal->theData[0];
661 const Uint32 bucket = signal->theData[1];
662
663 NdbNodeBitmask failed;
664 failed.assign(NdbNodeBitmask::Size, signal->theData+2);
665
666 Request_iterator iter;
667 Request_hash * hash;
668 switch(type){
669 case 1:
670 hash = &m_lookup_request_hash;
671 break;
672 case 2:
673 hash = &m_scan_request_hash;
674 break;
675 }
676 hash->next(bucket, iter);
677
678 const Uint32 RT_BREAK = 64;
679 for(Uint32 i = 0; (i<RT_BREAK || iter.bucket == bucket) &&
680 !iter.curr.isNull(); i++)
681 {
682 jam();
683
684 Ptr<Request> requestPtr = iter.curr;
685 hash->next(iter);
686 i += nodeFail(signal, requestPtr, failed);
687 }
688
689 if (!iter.curr.isNull())
690 {
691 jam();
692 signal->theData[0] = type;
693 signal->theData[1] = bucket;
694 failed.copyto(NdbNodeBitmask::Size, signal->theData+2);
695 sendSignal(reference(), GSN_CONTINUEB, signal, 2 + NdbNodeBitmask::Size,
696 JBB);
697 }
698 else if (type == 1)
699 {
700 jam();
701 signal->theData[0] = 2;
702 signal->theData[1] = 0;
703 failed.copyto(NdbNodeBitmask::Size, signal->theData+2);
704 sendSignal(reference(), GSN_CONTINUEB, signal, 2 + NdbNodeBitmask::Size,
705 JBB);
706 }
707 else if (type == 2)
708 {
709 jam();
710 }
711 }
712
713 /**
714 * MODULE LQHKEYREQ
715 */
execLQHKEYREQ(Signal * signal)716 void Dbspj::execLQHKEYREQ(Signal* signal)
717 {
718 jamEntry();
719 c_Counters.incr_counter(CI_READS_RECEIVED, 1);
720
721 const LqhKeyReq* req = reinterpret_cast<const LqhKeyReq*>(signal->getDataPtr());
722
723 /**
724 * #0 - KEYINFO contains key for first operation (used for hash in TC)
725 * #1 - ATTRINFO contains tree + parameters
726 * (unless StoredProcId is set, when only paramters are sent,
727 * but this is not yet implemented)
728 */
729 SegmentedSectionPtr attrPtr;
730 SectionHandle handle = SectionHandle(this, signal);
731 handle.getSection(attrPtr, LqhKeyReq::AttrInfoSectionNum);
732 const Uint32 keyPtrI = handle.m_ptr[LqhKeyReq::KeyInfoSectionNum].i;
733
734 Uint32 err;
735 Ptr<Request> requestPtr(0, RNIL);
736 do
737 {
738 ArenaHead ah;
739 err = DbspjErr::OutOfQueryMemory;
740 if (unlikely(!m_arenaAllocator.seize(ah)))
741 break;
742
743 if (ERROR_INSERTED_CLEAR(17001))
744 {
745 jam();
746 ndbout_c("Injecting OutOfQueryMem error 17001 at line %d file %s",
747 __LINE__, __FILE__);
748 break;
749 }
750 if (unlikely(!m_request_pool.seize(ah, requestPtr)))
751 {
752 jam();
753 break;
754 }
755 new (requestPtr.p) Request(ah);
756 do_init(requestPtr.p, req, signal->getSendersBlockRef());
757
758 Uint32 len_cnt;
759
760 {
761 SectionReader r0(attrPtr, getSectionSegmentPool());
762
763 err = DbspjErr::ZeroLengthQueryTree;
764 if (unlikely(!r0.getWord(&len_cnt)))
765 break;
766 }
767
768 Uint32 len = QueryTree::getLength(len_cnt);
769 Uint32 cnt = QueryTree::getNodeCnt(len_cnt);
770
771 {
772 SectionReader treeReader(attrPtr, getSectionSegmentPool());
773 SectionReader paramReader(attrPtr, getSectionSegmentPool());
774 paramReader.step(len); // skip over tree to parameters
775
776 Build_context ctx;
777 ctx.m_resultRef = req->variableData[0];
778 ctx.m_savepointId = req->savePointId;
779 ctx.m_scanPrio = 1;
780 ctx.m_start_signal = signal;
781 ctx.m_senderRef = signal->getSendersBlockRef();
782
783 err = build(ctx, requestPtr, treeReader, paramReader);
784 if (unlikely(err != 0))
785 break;
786
787 /**
788 * Root TreeNode in Request takes ownership of keyPtr
789 * section when build has completed.
790 * We are done with attrPtr which is now released.
791 */
792 Ptr<TreeNode> rootNodePtr = ctx.m_node_list[0];
793 rootNodePtr.p->m_send.m_keyInfoPtrI = keyPtrI;
794 release(attrPtr);
795 handle.clear();
796 }
797
798 /**
799 * Store request in list(s)/hash(es)
800 */
801 store_lookup(requestPtr);
802
803 /**
804 * A query being shipped as a LQHKEYREQ may return at most a row
805 * per operation i.e be a (multi-)lookup
806 */
807 if (ERROR_INSERTED_CLEAR(17013) ||
808 unlikely(!requestPtr.p->isLookup() || requestPtr.p->m_node_cnt != cnt))
809 {
810 jam();
811 err = DbspjErr::InvalidRequest;
812 break;
813 }
814
815 start(signal, requestPtr);
816 return;
817 } while (0);
818
819 /**
820 * Error handling below,
821 * 'err' may contain error code.
822 */
823 if (!requestPtr.isNull())
824 {
825 jam();
826 cleanup(requestPtr);
827 }
828 releaseSections(handle); // a NOOP, if we reached 'handle.clear()' above
829 handle_early_lqhkey_ref(signal, req, err);
830 }
831
832 void
do_init(Request * requestP,const LqhKeyReq * req,Uint32 senderRef)833 Dbspj::do_init(Request* requestP, const LqhKeyReq* req, Uint32 senderRef)
834 {
835 requestP->m_bits = 0;
836 requestP->m_errCode = 0;
837 requestP->m_state = Request::RS_BUILDING;
838 requestP->m_node_cnt = 0;
839 requestP->m_cnt_active = 0;
840 requestP->m_rows = 0;
841 requestP->m_active_nodes.clear();
842 requestP->m_completed_nodes.clear();
843 requestP->m_outstanding = 0;
844 requestP->m_transId[0] = req->transId1;
845 requestP->m_transId[1] = req->transId2;
846 requestP->m_rootFragId = LqhKeyReq::getFragmentId(req->fragmentData);
847 bzero(requestP->m_lookup_node_data, sizeof(requestP->m_lookup_node_data));
848 #ifdef SPJ_TRACE_TIME
849 requestP->m_cnt_batches = 0;
850 requestP->m_sum_rows = 0;
851 requestP->m_sum_running = 0;
852 requestP->m_sum_waiting = 0;
853 requestP->m_save_time = NdbTick_getCurrentTicks();
854 #endif
855 const Uint32 reqInfo = req->requestInfo;
856 Uint32 tmp = req->clientConnectPtr;
857 if (LqhKeyReq::getDirtyFlag(reqInfo) &&
858 LqhKeyReq::getOperation(reqInfo) == ZREAD)
859 {
860 jam();
861
862 ndbrequire(LqhKeyReq::getApplicationAddressFlag(reqInfo));
863 //const Uint32 apiRef = lqhKeyReq->variableData[0];
864 //const Uint32 apiOpRec = lqhKeyReq->variableData[1];
865 tmp = req->variableData[1];
866 requestP->m_senderData = tmp;
867 requestP->m_senderRef = senderRef;
868 }
869 else
870 {
871 if (LqhKeyReq::getSameClientAndTcFlag(reqInfo) == 1)
872 {
873 if (LqhKeyReq::getApplicationAddressFlag(reqInfo))
874 tmp = req->variableData[2];
875 else
876 tmp = req->variableData[0];
877 }
878 requestP->m_senderData = tmp;
879 requestP->m_senderRef = senderRef;
880 }
881 requestP->m_rootResultData = tmp;
882 }
883
884 void
store_lookup(Ptr<Request> requestPtr)885 Dbspj::store_lookup(Ptr<Request> requestPtr)
886 {
887 ndbassert(requestPtr.p->isLookup());
888 Ptr<Request> tmp;
889 bool found = m_lookup_request_hash.find(tmp, *requestPtr.p);
890 ndbrequire(found == false);
891 m_lookup_request_hash.add(requestPtr);
892 }
893
894 void
handle_early_lqhkey_ref(Signal * signal,const LqhKeyReq * lqhKeyReq,Uint32 err)895 Dbspj::handle_early_lqhkey_ref(Signal* signal,
896 const LqhKeyReq * lqhKeyReq,
897 Uint32 err)
898 {
899 /**
900 * Error path...
901 */
902 ndbrequire(err);
903 const Uint32 reqInfo = lqhKeyReq->requestInfo;
904 const Uint32 transid[2] = { lqhKeyReq->transId1, lqhKeyReq->transId2 };
905
906 if (LqhKeyReq::getDirtyFlag(reqInfo) &&
907 LqhKeyReq::getOperation(reqInfo) == ZREAD)
908 {
909 jam();
910 /* Dirty read sends TCKEYREF direct to client, and nothing to TC */
911 ndbrequire(LqhKeyReq::getApplicationAddressFlag(reqInfo));
912 const Uint32 apiRef = lqhKeyReq->variableData[0];
913 const Uint32 apiOpRec = lqhKeyReq->variableData[1];
914
915 TcKeyRef* const tcKeyRef = reinterpret_cast<TcKeyRef*>(signal->getDataPtrSend());
916
917 tcKeyRef->connectPtr = apiOpRec;
918 tcKeyRef->transId[0] = transid[0];
919 tcKeyRef->transId[1] = transid[1];
920 tcKeyRef->errorCode = err;
921 sendTCKEYREF(signal, apiRef, signal->getSendersBlockRef());
922 }
923 else
924 {
925 jam();
926 const Uint32 returnref = signal->getSendersBlockRef();
927 const Uint32 clientPtr = lqhKeyReq->clientConnectPtr;
928
929 Uint32 TcOprec = clientPtr;
930 if (LqhKeyReq::getSameClientAndTcFlag(reqInfo) == 1)
931 {
932 if (LqhKeyReq::getApplicationAddressFlag(reqInfo))
933 TcOprec = lqhKeyReq->variableData[2];
934 else
935 TcOprec = lqhKeyReq->variableData[0];
936 }
937
938 LqhKeyRef* const ref = reinterpret_cast<LqhKeyRef*>(signal->getDataPtrSend());
939 ref->userRef = clientPtr;
940 ref->connectPtr = TcOprec;
941 ref->errorCode = err;
942 ref->transId1 = transid[0];
943 ref->transId2 = transid[1];
944 sendSignal(returnref, GSN_LQHKEYREF, signal,
945 LqhKeyRef::SignalLength, JBB);
946 }
947 }
948
949 void
sendTCKEYREF(Signal * signal,Uint32 ref,Uint32 routeRef)950 Dbspj::sendTCKEYREF(Signal* signal, Uint32 ref, Uint32 routeRef)
951 {
952 const Uint32 nodeId = refToNode(ref);
953 const bool connectedToNode = getNodeInfo(nodeId).m_connected;
954
955 if (likely(connectedToNode))
956 {
957 jam();
958 sendSignal(ref, GSN_TCKEYREF, signal, TcKeyRef::SignalLength, JBB);
959 }
960 else
961 {
962 jam();
963 memmove(signal->theData+25, signal->theData, 4*TcKeyRef::SignalLength);
964 RouteOrd* ord = (RouteOrd*)signal->getDataPtrSend();
965 ord->dstRef = ref;
966 ord->srcRef = reference();
967 ord->gsn = GSN_TCKEYREF;
968 ord->cnt = 0;
969 LinearSectionPtr ptr[3];
970 ptr[0].p = signal->theData+25;
971 ptr[0].sz = TcKeyRef::SignalLength;
972 sendSignal(routeRef, GSN_ROUTE_ORD, signal, RouteOrd::SignalLength, JBB,
973 ptr, 1);
974 }
975 }
976
977 void
sendTCKEYCONF(Signal * signal,Uint32 len,Uint32 ref,Uint32 routeRef)978 Dbspj::sendTCKEYCONF(Signal* signal, Uint32 len, Uint32 ref, Uint32 routeRef)
979 {
980 const Uint32 nodeId = refToNode(ref);
981 const bool connectedToNode = getNodeInfo(nodeId).m_connected;
982
983 if (likely(connectedToNode))
984 {
985 jam();
986 sendSignal(ref, GSN_TCKEYCONF, signal, len, JBB);
987 }
988 else
989 {
990 jam();
991 memmove(signal->theData+25, signal->theData, 4*len);
992 RouteOrd* ord = (RouteOrd*)signal->getDataPtrSend();
993 ord->dstRef = ref;
994 ord->srcRef = reference();
995 ord->gsn = GSN_TCKEYCONF;
996 ord->cnt = 0;
997 LinearSectionPtr ptr[3];
998 ptr[0].p = signal->theData+25;
999 ptr[0].sz = len;
1000 sendSignal(routeRef, GSN_ROUTE_ORD, signal, RouteOrd::SignalLength, JBB,
1001 ptr, 1);
1002 }
1003 }
1004
1005 /**
1006 * END - MODULE LQHKEYREQ
1007 */
1008
1009
1010 /**
1011 * MODULE SCAN_FRAGREQ
1012 */
1013 void
execSCAN_FRAGREQ(Signal * signal)1014 Dbspj::execSCAN_FRAGREQ(Signal* signal)
1015 {
1016 jamEntry();
1017
1018 /* Reassemble if the request was fragmented */
1019 if (!assembleFragments(signal))
1020 {
1021 jam();
1022 return;
1023 }
1024
1025 const ScanFragReq * req = (ScanFragReq *)&signal->theData[0];
1026
1027 #ifdef DEBUG_SCAN_FRAGREQ
1028 ndbout_c("Incomming SCAN_FRAGREQ ");
1029 printSCAN_FRAGREQ(stdout, signal->getDataPtrSend(),
1030 ScanFragReq::SignalLength + 2,
1031 DBLQH);
1032 #endif
1033
1034 /**
1035 * #0 - ATTRINFO contains tree + parameters
1036 * (unless StoredProcId is set, when only paramters are sent,
1037 * but this is not yet implemented)
1038 * #1 - KEYINFO if first op is index scan - contains bounds for first scan
1039 * if first op is lookup - contains keyinfo for lookup
1040 */
1041 SectionHandle handle = SectionHandle(this, signal);
1042 SegmentedSectionPtr attrPtr;
1043 handle.getSection(attrPtr, ScanFragReq::AttrInfoSectionNum);
1044
1045 Uint32 err;
1046 Ptr<Request> requestPtr(0, RNIL);
1047 do
1048 {
1049 ArenaHead ah;
1050 err = DbspjErr::OutOfQueryMemory;
1051 if (unlikely(!m_arenaAllocator.seize(ah)))
1052 break;
1053
1054 if (ERROR_INSERTED_CLEAR(17002))
1055 {
1056 ndbout_c("Injecting OutOfQueryMem error 17002 at line %d file %s",
1057 __LINE__, __FILE__);
1058 jam();
1059 break;
1060 }
1061 if (unlikely(!m_request_pool.seize(ah, requestPtr)))
1062 {
1063 jam();
1064 break;
1065 }
1066 new (requestPtr.p) Request(ah);
1067 do_init(requestPtr.p, req, signal->getSendersBlockRef());
1068
1069 Uint32 len_cnt;
1070 {
1071 SectionReader r0(attrPtr, getSectionSegmentPool());
1072 err = DbspjErr::ZeroLengthQueryTree;
1073 if (unlikely(!r0.getWord(&len_cnt)))
1074 break;
1075 }
1076
1077 Uint32 len = QueryTree::getLength(len_cnt);
1078 Uint32 cnt = QueryTree::getNodeCnt(len_cnt);
1079
1080 {
1081 SectionReader treeReader(attrPtr, getSectionSegmentPool());
1082 SectionReader paramReader(attrPtr, getSectionSegmentPool());
1083 paramReader.step(len); // skip over tree to parameters
1084
1085 Build_context ctx;
1086 ctx.m_resultRef = req->resultRef;
1087 ctx.m_scanPrio = ScanFragReq::getScanPrio(req->requestInfo);
1088 ctx.m_savepointId = req->savePointId;
1089 ctx.m_batch_size_rows = req->batch_size_rows;
1090 ctx.m_start_signal = signal;
1091 ctx.m_senderRef = signal->getSendersBlockRef();
1092
1093 err = build(ctx, requestPtr, treeReader, paramReader);
1094 if (unlikely(err != 0))
1095 break;
1096
1097 /**
1098 * Root TreeNode in Request takes ownership of keyPtr
1099 * section when build has completed.
1100 * We are done with attrPtr which is now released.
1101 */
1102 Ptr<TreeNode> rootNodePtr = ctx.m_node_list[0];
1103 if (handle.m_cnt > 1)
1104 {
1105 jam();
1106 const Uint32 keyPtrI = handle.m_ptr[ScanFragReq::KeyInfoSectionNum].i;
1107 rootNodePtr.p->m_send.m_keyInfoPtrI = keyPtrI;
1108 }
1109 release(attrPtr);
1110 handle.clear();
1111 }
1112
1113 /**
1114 * Store request in list(s)/hash(es)
1115 */
1116 store_scan(requestPtr);
1117
1118 if (ERROR_INSERTED_CLEAR(17013) ||
1119 unlikely(!requestPtr.p->isScan() || requestPtr.p->m_node_cnt != cnt))
1120 {
1121 jam();
1122 err = DbspjErr::InvalidRequest;
1123 break;
1124 }
1125
1126 start(signal, requestPtr);
1127 return;
1128 } while (0);
1129
1130 if (!requestPtr.isNull())
1131 {
1132 jam();
1133 cleanup(requestPtr);
1134 }
1135 releaseSections(handle); // a NOOP, if we reached 'handle.clear()' above
1136 handle_early_scanfrag_ref(signal, req, err);
1137 }
1138
1139 void
do_init(Request * requestP,const ScanFragReq * req,Uint32 senderRef)1140 Dbspj::do_init(Request* requestP, const ScanFragReq* req, Uint32 senderRef)
1141 {
1142 requestP->m_bits = 0;
1143 requestP->m_errCode = 0;
1144 requestP->m_state = Request::RS_BUILDING;
1145 requestP->m_node_cnt = 0;
1146 requestP->m_cnt_active = 0;
1147 requestP->m_rows = 0;
1148 requestP->m_active_nodes.clear();
1149 requestP->m_completed_nodes.clear();
1150 requestP->m_outstanding = 0;
1151 requestP->m_senderRef = senderRef;
1152 requestP->m_senderData = req->senderData;
1153 requestP->m_transId[0] = req->transId1;
1154 requestP->m_transId[1] = req->transId2;
1155 requestP->m_rootResultData = req->resultData;
1156 requestP->m_rootFragId = req->fragmentNoKeyLen;
1157 bzero(requestP->m_lookup_node_data, sizeof(requestP->m_lookup_node_data));
1158 #ifdef SPJ_TRACE_TIME
1159 requestP->m_cnt_batches = 0;
1160 requestP->m_sum_rows = 0;
1161 requestP->m_sum_running = 0;
1162 requestP->m_sum_waiting = 0;
1163 requestP->m_save_time = NdbTick_getCurrentTicks();
1164 #endif
1165 }
1166
1167 void
store_scan(Ptr<Request> requestPtr)1168 Dbspj::store_scan(Ptr<Request> requestPtr)
1169 {
1170 ndbassert(requestPtr.p->isScan());
1171 Ptr<Request> tmp;
1172 bool found = m_scan_request_hash.find(tmp, *requestPtr.p);
1173 ndbrequire(found == false);
1174 m_scan_request_hash.add(requestPtr);
1175 }
1176
1177 void
handle_early_scanfrag_ref(Signal * signal,const ScanFragReq * _req,Uint32 err)1178 Dbspj::handle_early_scanfrag_ref(Signal* signal,
1179 const ScanFragReq * _req,
1180 Uint32 err)
1181 {
1182 ScanFragReq req = *_req;
1183 Uint32 senderRef = signal->getSendersBlockRef();
1184
1185 ScanFragRef * ref = (ScanFragRef*)&signal->theData[0];
1186 ref->senderData = req.senderData;
1187 ref->transId1 = req.transId1;
1188 ref->transId2 = req.transId2;
1189 ref->errorCode = err;
1190 sendSignal(senderRef, GSN_SCAN_FRAGREF, signal,
1191 ScanFragRef::SignalLength, JBB);
1192 }
1193
1194 /**
1195 * END - MODULE SCAN_FRAGREQ
1196 */
1197
1198 /**
1199 * MODULE GENERIC
1200 */
1201 Uint32
build(Build_context & ctx,Ptr<Request> requestPtr,SectionReader & tree,SectionReader & param)1202 Dbspj::build(Build_context& ctx,
1203 Ptr<Request> requestPtr,
1204 SectionReader & tree,
1205 SectionReader & param)
1206 {
1207 Uint32 tmp0, tmp1;
1208 Uint32 err = DbspjErr::ZeroLengthQueryTree;
1209 ctx.m_cnt = 0;
1210 ctx.m_scan_cnt = 0;
1211
1212 tree.getWord(&tmp0);
1213 Uint32 loop = QueryTree::getNodeCnt(tmp0);
1214
1215 DEBUG("::build()");
1216 err = DbspjErr::InvalidTreeNodeCount;
1217 if (loop == 0 || loop > NDB_SPJ_MAX_TREE_NODES)
1218 {
1219 jam();
1220 goto error;
1221 }
1222
1223 while (ctx.m_cnt < loop)
1224 {
1225 DEBUG(" - loop " << ctx.m_cnt << " pos: " << tree.getPos().currPos);
1226 tree.peekWord(&tmp0);
1227 param.peekWord(&tmp1);
1228 Uint32 node_op = QueryNode::getOpType(tmp0);
1229 Uint32 node_len = QueryNode::getLength(tmp0);
1230 Uint32 param_op = QueryNodeParameters::getOpType(tmp1);
1231 Uint32 param_len = QueryNodeParameters::getLength(tmp1);
1232
1233 err = DbspjErr::QueryNodeTooBig;
1234 if (unlikely(node_len >= NDB_ARRAY_SIZE(m_buffer0)))
1235 {
1236 jam();
1237 goto error;
1238 }
1239
1240 err = DbspjErr::QueryNodeParametersTooBig;
1241 if (unlikely(param_len >= NDB_ARRAY_SIZE(m_buffer1)))
1242 {
1243 jam();
1244 goto error;
1245 }
1246
1247 err = DbspjErr::InvalidTreeNodeSpecification;
1248 if (unlikely(tree.getWords(m_buffer0, node_len) == false))
1249 {
1250 jam();
1251 goto error;
1252 }
1253
1254 err = DbspjErr::InvalidTreeParametersSpecification;
1255 if (unlikely(param.getWords(m_buffer1, param_len) == false))
1256 {
1257 jam();
1258 goto error;
1259 }
1260
1261 #if defined(DEBUG_LQHKEYREQ) || defined(DEBUG_SCAN_FRAGREQ)
1262 printf("node: ");
1263 for (Uint32 i = 0; i<node_len; i++)
1264 printf("0x%.8x ", m_buffer0[i]);
1265 printf("\n");
1266
1267 printf("param: ");
1268 for (Uint32 i = 0; i<param_len; i++)
1269 printf("0x%.8x ", m_buffer1[i]);
1270 printf("\n");
1271 #endif
1272
1273 err = DbspjErr::UnknowQueryOperation;
1274 if (unlikely(node_op != param_op))
1275 {
1276 jam();
1277 goto error;
1278 }
1279 if (ERROR_INSERTED_CLEAR(17006))
1280 {
1281 ndbout_c("Injecting UnknowQueryOperation error 17006 at line %d file %s",
1282 __LINE__, __FILE__);
1283 jam();
1284 goto error;
1285 }
1286
1287 const OpInfo* info = getOpInfo(node_op);
1288 if (unlikely(info == 0))
1289 {
1290 jam();
1291 goto error;
1292 }
1293
1294 QueryNode* qn = (QueryNode*)m_buffer0;
1295 QueryNodeParameters * qp = (QueryNodeParameters*)m_buffer1;
1296 qn->len = node_len;
1297 qp->len = param_len;
1298 err = (this->*(info->m_build))(ctx, requestPtr, qn, qp);
1299 if (unlikely(err != 0))
1300 {
1301 jam();
1302 goto error;
1303 }
1304
1305 /**
1306 * only first node gets access to signal
1307 */
1308 ctx.m_start_signal = 0;
1309
1310 ndbrequire(ctx.m_cnt < NDB_ARRAY_SIZE(ctx.m_node_list));
1311 ctx.m_cnt++;
1312 }
1313 requestPtr.p->m_node_cnt = ctx.m_cnt;
1314
1315 if (ctx.m_scan_cnt > 1)
1316 {
1317 jam();
1318 requestPtr.p->m_bits |= Request::RT_MULTI_SCAN;
1319 }
1320
1321 // Construct RowBuffers where required
1322 err = initRowBuffers(requestPtr);
1323 if (unlikely(err != 0))
1324 {
1325 jam();
1326 goto error;
1327 }
1328
1329 return 0;
1330
1331 error:
1332 jam();
1333 return err;
1334 }
1335
1336 /**
1337 * initRowBuffers will decide row-buffering strategy, and init
1338 * the RowBuffers where required.
1339 */
1340 Uint32
initRowBuffers(Ptr<Request> requestPtr)1341 Dbspj::initRowBuffers(Ptr<Request> requestPtr)
1342 {
1343 jam();
1344 /**
1345 * Execution of scan request requires restrictions
1346 * of how lookup-children issues their LQHKEYREQs:
1347 * A large scan result with many parallel lookup
1348 * siblings can easily flood the job buffers with too many
1349 * REQs. So we set up an 'execution plan' for how a
1350 * scan request should be executed:
1351 *
1352 * NOTE: It could make sense to do the same for a lookup Req.
1353 * However, CONF/REF for these leafs operations are not
1354 * returned to SPJ. Thus, there are no way to know when
1355 * the operation has completed, and other operation could
1356 * be resumed.
1357 *
1358 * As a lookup request does not have the same potential for
1359 * producing lots of LQHKEYREQs, we believe/hope the risk
1360 * of flooding job buffers for a lookup request can be ignored.
1361 */
1362 if (requestPtr.p->isScan())
1363 {
1364 jam();
1365 Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
1366 Ptr<TreeNode> treeRootPtr;
1367
1368 list.first(treeRootPtr); // treeRootPtr is a scan
1369 ndbrequire(!treeRootPtr.isNull());
1370 buildExecPlan(requestPtr, treeRootPtr, NullTreeNodePtr);
1371 }
1372
1373 /**
1374 * Init ROW_BUFFERS iff Request has to buffer any rows.
1375 */
1376 if (requestPtr.p->m_bits & Request::RT_ROW_BUFFERS)
1377 {
1378 jam();
1379
1380 /**
1381 * Iff, multi-scan is non-bushy (normal case)
1382 * we don't strictly need BUFFER_VAR for RT_ROW_BUFFERS
1383 * but could instead pop-row stack frame,
1384 * however this is not implemented...
1385 *
1386 * so, currently use BUFFER_VAR if 'RT_MULTI_SCAN'
1387 *
1388 * NOTE: This should easily be solvable by having a
1389 * RowBuffer for each TreeNode instead
1390 */
1391 if (requestPtr.p->m_bits & Request::RT_MULTI_SCAN)
1392 {
1393 jam();
1394 requestPtr.p->m_rowBuffer.init(BUFFER_VAR);
1395 }
1396 else
1397 {
1398 jam();
1399 requestPtr.p->m_rowBuffer.init(BUFFER_STACK);
1400 }
1401
1402 Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
1403 Ptr<TreeNode> treeNodePtr;
1404 for (list.first(treeNodePtr); !treeNodePtr.isNull(); list.next(treeNodePtr))
1405 {
1406 jam();
1407 ndbassert(treeNodePtr.p->m_batch_size > 0);
1408 /**
1409 * Construct a List or Map RowCollection for those TreeNodes
1410 * requiring rows to be buffered.
1411 */
1412 if (treeNodePtr.p->m_bits & TreeNode::T_ROW_BUFFER_MAP)
1413 {
1414 jam();
1415 treeNodePtr.p->m_rows.construct (RowCollection::COLLECTION_MAP,
1416 requestPtr.p->m_rowBuffer,
1417 treeNodePtr.p->m_batch_size);
1418 }
1419 else if (treeNodePtr.p->m_bits & TreeNode::T_ROW_BUFFER)
1420 {
1421 jam();
1422 treeNodePtr.p->m_rows.construct (RowCollection::COLLECTION_LIST,
1423 requestPtr.p->m_rowBuffer,
1424 treeNodePtr.p->m_batch_size);
1425 }
1426 }
1427 }
1428
1429 return 0;
1430 } // Dbspj::initRowBuffers
1431
1432 /**
1433 * buildExecPlan():
1434 * Decides the order/pace in which the different
1435 * TreeNodes should be executed.
1436 * Currently it is only used to insert sequentialization point in
1437 * the execution of bushy lookup-child nodes. (aka star-join).
1438 * This is done in order to avoid too many LQHKEYREQ-signals to
1439 * be sent which could overflow the job buffers.
1440 *
1441 * For each branch of TreeNodes starting with a scan, we identify
1442 * any 'bushines' among its lookup children. We set up a left -> right
1443 * execution order among these such that:
1444 * - A child lookup operation can not be REQuested before we
1445 * either has executed a TRANSID_AI from the scan parent,
1446 * or executed a CONF / REF from another lookup child.
1447 * - When a lookup CONF or REF is executed, its TreeNode is
1448 * annotated with 'resume' info which decides if/which TreeNode
1449 * we should execute next.
1450 *
1451 * This will maintain a strict 1:1 fanout between incomming rows
1452 * being processed, and new row REQuest being produced.
1453 * Thus avoiding that large scan result will flood the jobb buffers
1454 * with too many lookup requests.
1455 *
1456 * FUTURE:
1457 * For join children where child execution now is T_EXEC_SEQUENTIAL,
1458 * it should be relatively simple to extend SPJ to do 'inner join'.
1459 * As we at these sequential point knows wheteher the previous
1460 * joined children didn't found any matches, we can skip REQuesting
1461 * rows from other children having the same parent row.
1462 */
1463 void
buildExecPlan(Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,Ptr<TreeNode> nextLookup)1464 Dbspj::buildExecPlan(Ptr<Request> requestPtr,
1465 Ptr<TreeNode> treeNodePtr,
1466 Ptr<TreeNode> nextLookup)
1467 {
1468 Uint32 lookupChildren[NDB_SPJ_MAX_TREE_NODES];
1469 Uint32 lookupChildCnt = 0;
1470
1471 /**
1472 * Need to iterate lookup childs in reverse order to set up 'next'
1473 * operations. As this is not possible throught ConstDataBufferIterator,
1474 * store any lookup childs into temp array childPtrI[].
1475 * Scan childs are parents of new 'scan -> lookup' branches.
1476 */
1477 {
1478 LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
1479 Local_dependency_map childList(pool, treeNodePtr.p->m_dependent_nodes);
1480 Dependency_map::ConstDataBufferIterator it;
1481 for (childList.first(it); !it.isNull(); childList.next(it))
1482 {
1483 jam();
1484 Ptr<TreeNode> childPtr;
1485 m_treenode_pool.getPtr(childPtr, *it.data);
1486
1487 if (childPtr.p->m_info == &g_LookupOpInfo)
1488 {
1489 jam();
1490 lookupChildren[lookupChildCnt++] = *it.data;
1491 }
1492 else
1493 {
1494 // Build a new plan starting from this scan operation
1495 jam();
1496 buildExecPlan(requestPtr, childPtr, NullTreeNodePtr);
1497 }
1498 }
1499 }
1500
1501 /**
1502 * Lookup children might have to wait for previous LQHKEYREQs to
1503 * complete before they are allowed to send their own requests.
1504 * (In order to not overfill jobb buffers)
1505 */
1506 if (treeNodePtr.p->m_info == &g_LookupOpInfo &&
1507 !nextLookup.isNull())
1508 {
1509 jam();
1510 /**
1511 * Annotate that:
1512 * - 'nextLookup' is not allowed to start immediately.
1513 * - 'treeNode' restart 'nextLookup' when it completes
1514 */
1515 nextLookup.p->m_bits |= TreeNode::T_EXEC_SEQUENTIAL;
1516
1517 if (lookupChildCnt==0) //'isLeaf() or only scan children
1518 {
1519 jam();
1520 treeNodePtr.p->m_resumeEvents = TreeNode::TN_RESUME_CONF |
1521 TreeNode::TN_RESUME_REF;
1522 DEBUG("ExecPlan: 'REF/CONF' from node " << treeNodePtr.p->m_node_no
1523 << " resumes node " << nextLookup.p->m_node_no);
1524 }
1525 else
1526 {
1527 /**
1528 * Will REQuest from one of its child lookups if CONF,
1529 * so we don't resume another TreeNode in addition.
1530 */
1531 jam();
1532 treeNodePtr.p->m_resumeEvents = TreeNode::TN_RESUME_REF;
1533 DEBUG("ExecPlan: 'REF' from node " << treeNodePtr.p->m_node_no
1534 << " resumes node " << nextLookup.p->m_node_no);
1535 }
1536 treeNodePtr.p->m_resumePtrI = nextLookup.i;
1537
1538 /**
1539 * When we T_EXEC_SEQUENTIAL, TreeNode will iterate its
1540 * parent rows in order to create new REQ's as previous
1541 * are completed (CONF or REF).
1542 * - Prepare RowIterator for parent rows
1543 * - Buffer rows to be iterated in the parent node
1544 */
1545 {
1546 jam();
1547
1548 ndbassert(nextLookup.p->m_parentPtrI != RNIL);
1549 Ptr<TreeNode> parentPtr;
1550 m_treenode_pool.getPtr(parentPtr, nextLookup.p->m_parentPtrI);
1551 parentPtr.p->m_bits |= TreeNode::T_ROW_BUFFER
1552 | TreeNode::T_ROW_BUFFER_MAP;
1553 requestPtr.p->m_bits |= Request::RT_ROW_BUFFERS;
1554
1555 DEBUG("ExecPlan: rows from node " << parentPtr.p->m_node_no
1556 << " are buffered");
1557 }
1558 }
1559
1560 /**
1561 * Recursively build exec. plan for any lookup child.
1562 */
1563 for (int i = lookupChildCnt-1; i >= 0; i--)
1564 {
1565 jam();
1566 Ptr<TreeNode> childPtr;
1567 m_treenode_pool.getPtr(childPtr, lookupChildren[i]);
1568 ndbassert(childPtr.p->m_info == &g_LookupOpInfo);
1569
1570 buildExecPlan(requestPtr, childPtr, nextLookup);
1571 nextLookup = childPtr;
1572 }
1573 } // Dbspj::buildExecPlan
1574
1575 Uint32
createNode(Build_context & ctx,Ptr<Request> requestPtr,Ptr<TreeNode> & treeNodePtr)1576 Dbspj::createNode(Build_context& ctx, Ptr<Request> requestPtr,
1577 Ptr<TreeNode> & treeNodePtr)
1578 {
1579 /**
1580 * In the future, we can have different TreeNode-allocation strategies
1581 * that can be setup using the Build_context
1582 *
1583 */
1584 if (ERROR_INSERTED_CLEAR(17005))
1585 {
1586 ndbout_c("Injecting OutOfOperations error 17005 at line %d file %s",
1587 __LINE__, __FILE__);
1588 jam();
1589 return DbspjErr::OutOfOperations;
1590 }
1591 if (m_treenode_pool.seize(requestPtr.p->m_arena, treeNodePtr))
1592 {
1593 DEBUG("createNode - seize -> ptrI: " << treeNodePtr.i);
1594 new (treeNodePtr.p) TreeNode(requestPtr.i);
1595 ctx.m_node_list[ctx.m_cnt] = treeNodePtr;
1596 Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
1597 list.addLast(treeNodePtr);
1598 treeNodePtr.p->m_node_no = ctx.m_cnt;
1599 return 0;
1600 }
1601 return DbspjErr::OutOfOperations;
1602 }
1603
1604 void
start(Signal * signal,Ptr<Request> requestPtr)1605 Dbspj::start(Signal* signal,
1606 Ptr<Request> requestPtr)
1607 {
1608 Uint32 err = 0;
1609 if (requestPtr.p->m_bits & Request::RT_NEED_PREPARE)
1610 {
1611 jam();
1612 requestPtr.p->m_outstanding = 0;
1613 requestPtr.p->m_state = Request::RS_PREPARING;
1614
1615 Ptr<TreeNode> nodePtr;
1616 Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
1617 for (list.first(nodePtr); !nodePtr.isNull(); list.next(nodePtr))
1618 {
1619 jam();
1620 /**
1621 * Verify existence of all involved tables.
1622 */
1623 err = checkTableError(nodePtr);
1624 if (unlikely(err))
1625 {
1626 jam();
1627 break;
1628 }
1629 ndbrequire(nodePtr.p->m_info != 0);
1630 if (nodePtr.p->m_info->m_prepare != 0)
1631 {
1632 jam();
1633 (this->*(nodePtr.p->m_info->m_prepare))(signal, requestPtr, nodePtr);
1634 }
1635 }
1636
1637 /**
1638 * preferably RT_NEED_PREPARE should only be set if blocking
1639 * calls are used, in which case m_outstanding should have been increased
1640 */
1641 ndbassert(err || requestPtr.p->m_outstanding);
1642 }
1643 if (unlikely(err))
1644 {
1645 jam();
1646 abort(signal, requestPtr, err);
1647 return;
1648 }
1649
1650 checkPrepareComplete(signal, requestPtr, 0);
1651 }
1652
1653 void
checkPrepareComplete(Signal * signal,Ptr<Request> requestPtr,Uint32 cnt)1654 Dbspj::checkPrepareComplete(Signal * signal, Ptr<Request> requestPtr,
1655 Uint32 cnt)
1656 {
1657 ndbrequire(requestPtr.p->m_outstanding >= cnt);
1658 requestPtr.p->m_outstanding -= cnt;
1659
1660 if (requestPtr.p->m_outstanding == 0)
1661 {
1662 jam();
1663
1664 if (unlikely((requestPtr.p->m_state & Request::RS_ABORTING) != 0))
1665 {
1666 jam();
1667 batchComplete(signal, requestPtr);
1668 return;
1669 }
1670
1671 Ptr<TreeNode> nodePtr;
1672 {
1673 Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
1674 ndbrequire(list.first(nodePtr));
1675 }
1676 Uint32 err = checkTableError(nodePtr);
1677 if (unlikely(err != 0))
1678 {
1679 jam();
1680 abort(signal, requestPtr, err);
1681 return;
1682 }
1683
1684 requestPtr.p->m_state = Request::RS_RUNNING;
1685 ndbrequire(nodePtr.p->m_info != 0 && nodePtr.p->m_info->m_start != 0);
1686 (this->*(nodePtr.p->m_info->m_start))(signal, requestPtr, nodePtr);
1687 }
1688 }
1689
1690 /**
1691 * Check if all outstanding work for 'Request' has completed.
1692 */
1693 void
checkBatchComplete(Signal * signal,Ptr<Request> requestPtr,Uint32 cnt)1694 Dbspj::checkBatchComplete(Signal * signal, Ptr<Request> requestPtr,
1695 Uint32 cnt)
1696 {
1697 ndbrequire(requestPtr.p->m_outstanding >= cnt);
1698 requestPtr.p->m_outstanding -= cnt;
1699
1700 if (requestPtr.p->m_outstanding == 0)
1701 {
1702 jam();
1703 batchComplete(signal, requestPtr);
1704 }
1705 }
1706
1707 /**
1708 * Request has completed all outstanding work.
1709 * Signal API about completion status and cleanup
1710 * resources if appropriate.
1711 */
1712 void
batchComplete(Signal * signal,Ptr<Request> requestPtr)1713 Dbspj::batchComplete(Signal* signal, Ptr<Request> requestPtr)
1714 {
1715 ndbrequire(requestPtr.p->m_outstanding == 0); // "definition" of batchComplete
1716
1717 bool is_complete = requestPtr.p->m_cnt_active == 0;
1718 bool need_complete_phase = requestPtr.p->m_bits & Request::RT_NEED_COMPLETE;
1719
1720 if (requestPtr.p->isLookup())
1721 {
1722 ndbassert(requestPtr.p->m_cnt_active == 0);
1723 }
1724
1725 if (!is_complete || (is_complete && need_complete_phase == false))
1726 {
1727 /**
1728 * one batch complete, and either
1729 * - request not complete
1730 * - or not complete_phase needed
1731 */
1732 jam();
1733
1734 if ((requestPtr.p->m_state & Request::RS_ABORTING) != 0)
1735 {
1736 ndbassert(is_complete);
1737 }
1738
1739 prepareNextBatch(signal, requestPtr);
1740 sendConf(signal, requestPtr, is_complete);
1741 }
1742 else if (is_complete && need_complete_phase)
1743 {
1744 jam();
1745 /**
1746 * run complete-phase
1747 */
1748 complete(signal, requestPtr);
1749 return;
1750 }
1751
1752 if (requestPtr.p->m_cnt_active == 0)
1753 {
1754 jam();
1755 /**
1756 * Entire Request completed
1757 */
1758 cleanup(requestPtr);
1759 }
1760 else
1761 {
1762 jam();
1763 /**
1764 * Cleanup the TreeNode branches getting another
1765 * batch of result rows.
1766 */
1767 cleanupBatch(requestPtr);
1768 }
1769 }
1770
1771 /**
1772 * Locate next TreeNode(s) to retrieve more rows from.
1773 *
1774 * Calculate set of the 'm_active_nodes' we will receive from in NEXTREQ.
1775 * Add these TreeNodes to the cursor list to be iterated.
1776 */
1777 void
prepareNextBatch(Signal * signal,Ptr<Request> requestPtr)1778 Dbspj::prepareNextBatch(Signal* signal, Ptr<Request> requestPtr)
1779 {
1780 requestPtr.p->m_cursor_nodes.init();
1781 requestPtr.p->m_active_nodes.clear();
1782
1783 if (requestPtr.p->m_cnt_active == 0)
1784 {
1785 jam();
1786 return;
1787 }
1788
1789 DEBUG("prepareNextBatch, request: " << requestPtr.i);
1790
1791 if (requestPtr.p->m_bits & Request::RT_REPEAT_SCAN_RESULT)
1792 {
1793 /**
1794 * If REPEAT_SCAN_RESULT we handle bushy scans by return more *new* rows
1795 * from only one of the active child scans. If there are multiple
1796 * bushy scans not being able to return their current result set in
1797 * a single batch, result sets from the other child scans are repeated
1798 * until all rows has been returned to the API client.
1799 *
1800 * Hence, the cross joined results from the bushy scans are partly
1801 * produced within the SPJ block on a 'batchsize granularity',
1802 * and partly is the responsibility of the API-client by iterating
1803 * the result rows within the current result batches.
1804 * (Opposed to non-REPEAT_SCAN_RESULT, the client only have to care about
1805 * the current batched rows - no buffering is required)
1806 */
1807 jam();
1808 Ptr<TreeNode> nodePtr;
1809 Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
1810
1811 /**
1812 * Locate last 'TN_ACTIVE' TreeNode which is the only one choosen
1813 * to return more *new* rows.
1814 */
1815 for (list.last(nodePtr); !nodePtr.isNull(); list.prev(nodePtr))
1816 {
1817 if (nodePtr.p->m_state == TreeNode::TN_ACTIVE)
1818 {
1819 jam();
1820 DEBUG("Will fetch more from 'active' m_node_no: " << nodePtr.p->m_node_no);
1821 /**
1822 * A later NEXTREQ will request a *new* batch of rows from this TreeNode.
1823 */
1824 registerActiveCursor(requestPtr, nodePtr);
1825 break;
1826 }
1827 }
1828
1829 /**
1830 * Restart/repeat other (index scan) child batches which:
1831 * - Being 'after' nodePtr located above.
1832 * - Not being an ancestor of (depends on) any 'active' TreeNode.
1833 * (As these scans are started when rows from these parent nodes
1834 * arrives.)
1835 */
1836 if (!nodePtr.isNull())
1837 {
1838 jam();
1839 DEBUG("Calculate 'active', w/ cursor on m_node_no: " << nodePtr.p->m_node_no);
1840
1841 /* Restart any partial index-scans after this 'TN_ACTIVE' TreeNode */
1842 for (list.next(nodePtr); !nodePtr.isNull(); list.next(nodePtr))
1843 {
1844 jam();
1845 if (!nodePtr.p->m_ancestors.overlaps (requestPtr.p->m_active_nodes))
1846 {
1847 jam();
1848 ndbrequire(nodePtr.p->m_state != TreeNode::TN_ACTIVE);
1849 ndbrequire(nodePtr.p->m_info != 0);
1850 if (nodePtr.p->m_info->m_parent_batch_repeat != 0)
1851 {
1852 jam();
1853 (this->*(nodePtr.p->m_info->m_parent_batch_repeat))(signal,
1854 requestPtr,
1855 nodePtr);
1856 }
1857 }
1858 }
1859 } // if (!nodePtr.isNull()
1860 }
1861 else // not 'RT_REPEAT_SCAN_RESULT'
1862 {
1863 /**
1864 * If not REPEAT_SCAN_RESULT multiple active TreeNodes may return their
1865 * remaining result simultaneously. In case of bushy-scans, these
1866 * concurrent result streams are cross joins of each other
1867 * in SQL terms. In order to produce the cross joined result, it is
1868 * the responsibility of the API-client to buffer these streams and
1869 * iterate them to produce the cross join.
1870 */
1871 jam();
1872 Ptr<TreeNode> nodePtr;
1873 Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
1874 TreeNodeBitMask ancestors_of_active;
1875
1876 for (list.last(nodePtr); !nodePtr.isNull(); list.prev(nodePtr))
1877 {
1878 /**
1879 * If we are active (i.e not consumed all rows originating
1880 * from parent rows) and we are not in the set of parents
1881 * for any active child:
1882 *
1883 * Then, this is a position that execSCAN_NEXTREQ should continue
1884 */
1885 if (nodePtr.p->m_state == TreeNode::TN_ACTIVE &&
1886 !ancestors_of_active.get (nodePtr.p->m_node_no))
1887 {
1888 jam();
1889 DEBUG("Add 'active' m_node_no: " << nodePtr.p->m_node_no);
1890 registerActiveCursor(requestPtr, nodePtr);
1891 ancestors_of_active.bitOR(nodePtr.p->m_ancestors);
1892 }
1893 }
1894 } // if (RT_REPEAT_SCAN_RESULT)
1895
1896 DEBUG("Calculated 'm_active_nodes': " << requestPtr.p->m_active_nodes.rep.data[0]);
1897 }
1898
1899 void
registerActiveCursor(Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)1900 Dbspj::registerActiveCursor(Ptr<Request> requestPtr, Ptr<TreeNode> treeNodePtr)
1901 {
1902 Uint32 bit = treeNodePtr.p->m_node_no;
1903 ndbrequire(!requestPtr.p->m_active_nodes.get(bit));
1904 requestPtr.p->m_active_nodes.set(bit);
1905
1906 Local_TreeNodeCursor_list list(m_treenode_pool, requestPtr.p->m_cursor_nodes);
1907 #ifdef VM_TRACE
1908 {
1909 Ptr<TreeNode> nodePtr;
1910 for (list.first(nodePtr); !nodePtr.isNull(); list.next(nodePtr))
1911 {
1912 ndbrequire(nodePtr.i != treeNodePtr.i);
1913 }
1914 }
1915 #endif
1916 list.addFirst(treeNodePtr);
1917 }
1918
1919 void
sendConf(Signal * signal,Ptr<Request> requestPtr,bool is_complete)1920 Dbspj::sendConf(Signal* signal, Ptr<Request> requestPtr, bool is_complete)
1921 {
1922 if (requestPtr.p->isScan())
1923 {
1924 if (unlikely((requestPtr.p->m_state & Request::RS_WAITING) != 0))
1925 {
1926 jam();
1927 /**
1928 * We aborted request ourselves (due to node-failure ?)
1929 * but TC haven't contacted us...so we can't reply yet...
1930 */
1931 ndbrequire(is_complete);
1932 ndbrequire((requestPtr.p->m_state & Request::RS_ABORTING) != 0);
1933 return;
1934 }
1935
1936 if (requestPtr.p->m_errCode == 0)
1937 {
1938 jam();
1939 ScanFragConf * conf=
1940 reinterpret_cast<ScanFragConf*>(signal->getDataPtrSend());
1941 conf->senderData = requestPtr.p->m_senderData;
1942 conf->transId1 = requestPtr.p->m_transId[0];
1943 conf->transId2 = requestPtr.p->m_transId[1];
1944 conf->completedOps = requestPtr.p->m_rows;
1945 conf->fragmentCompleted = is_complete ? 1 : 0;
1946 conf->total_len = requestPtr.p->m_active_nodes.rep.data[0];
1947
1948 c_Counters.incr_counter(CI_SCAN_BATCHES_RETURNED, 1);
1949 c_Counters.incr_counter(CI_SCAN_ROWS_RETURNED, requestPtr.p->m_rows);
1950
1951 #ifdef SPJ_TRACE_TIME
1952 const NDB_TICKS now = NdbTick_getCurrentTicks();
1953 const NDB_TICKS then = requestPtr.p->m_save_time;
1954 const Uint64 diff = NdbTick_Elapsed(then,now).microSec();
1955
1956 requestPtr.p->m_sum_rows += requestPtr.p->m_rows;
1957 requestPtr.p->m_sum_running += Uint32(diff);
1958 requestPtr.p->m_cnt_batches++;
1959 requestPtr.p->m_save_time = now;
1960
1961 if (is_complete)
1962 {
1963 Uint32 cnt = requestPtr.p->m_cnt_batches;
1964 ndbout_c("batches: %u avg_rows: %u avg_running: %u avg_wait: %u",
1965 cnt,
1966 (requestPtr.p->m_sum_rows / cnt),
1967 (requestPtr.p->m_sum_running / cnt),
1968 cnt == 1 ? 0 : requestPtr.p->m_sum_waiting / (cnt - 1));
1969 }
1970 #endif
1971
1972 /**
1973 * reset for next batch
1974 */
1975 requestPtr.p->m_rows = 0;
1976 if (!is_complete)
1977 {
1978 jam();
1979 requestPtr.p->m_state |= Request::RS_WAITING;
1980 }
1981 #ifdef DEBUG_SCAN_FRAGREQ
1982 ndbout_c("Dbspj::sendConf() sending SCAN_FRAGCONF ");
1983 printSCAN_FRAGCONF(stdout, signal->getDataPtrSend(),
1984 conf->total_len,
1985 DBLQH);
1986 #endif
1987 sendSignal(requestPtr.p->m_senderRef, GSN_SCAN_FRAGCONF, signal,
1988 ScanFragConf::SignalLength, JBB);
1989 }
1990 else
1991 {
1992 jam();
1993 ndbrequire(is_complete);
1994 ScanFragRef * ref=
1995 reinterpret_cast<ScanFragRef*>(signal->getDataPtrSend());
1996 ref->senderData = requestPtr.p->m_senderData;
1997 ref->transId1 = requestPtr.p->m_transId[0];
1998 ref->transId2 = requestPtr.p->m_transId[1];
1999 ref->errorCode = requestPtr.p->m_errCode;
2000
2001 sendSignal(requestPtr.p->m_senderRef, GSN_SCAN_FRAGREF, signal,
2002 ScanFragRef::SignalLength, JBB);
2003 }
2004 }
2005 else
2006 {
2007 ndbassert(is_complete);
2008 if (requestPtr.p->m_errCode)
2009 {
2010 jam();
2011 Uint32 resultRef = getResultRef(requestPtr);
2012 TcKeyRef* ref = (TcKeyRef*)signal->getDataPtr();
2013 ref->connectPtr = requestPtr.p->m_senderData;
2014 ref->transId[0] = requestPtr.p->m_transId[0];
2015 ref->transId[1] = requestPtr.p->m_transId[1];
2016 ref->errorCode = requestPtr.p->m_errCode;
2017 ref->errorData = 0;
2018
2019 sendTCKEYREF(signal, resultRef, requestPtr.p->m_senderRef);
2020 }
2021 }
2022 }
2023
2024 Uint32
getResultRef(Ptr<Request> requestPtr)2025 Dbspj::getResultRef(Ptr<Request> requestPtr)
2026 {
2027 Ptr<TreeNode> nodePtr;
2028 Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
2029 for (list.first(nodePtr); !nodePtr.isNull(); list.next(nodePtr))
2030 {
2031 if (nodePtr.p->m_info == &g_LookupOpInfo)
2032 {
2033 jam();
2034 return nodePtr.p->m_lookup_data.m_api_resultRef;
2035 }
2036 }
2037 ndbrequire(false);
2038 return 0;
2039 }
2040
2041 /**
2042 * Cleanup resources in preparation for a SCAN_NEXTREQ
2043 * requesting a new batch of rows.
2044 */
2045 void
cleanupBatch(Ptr<Request> requestPtr)2046 Dbspj::cleanupBatch(Ptr<Request> requestPtr)
2047 {
2048 /**
2049 * Needs to be atleast 1 active otherwise we should have
2050 * taken the Request cleanup "path" in batchComplete
2051 */
2052 ndbassert(requestPtr.p->m_cnt_active >= 1);
2053
2054 /**
2055 * Release any buffered rows for the TreeNode branches
2056 * getting new rows.
2057 */
2058 if ((requestPtr.p->m_bits & Request::RT_ROW_BUFFERS) != 0)
2059 {
2060 if ((requestPtr.p->m_bits & Request::RT_MULTI_SCAN) != 0)
2061 {
2062 jam();
2063 /**
2064 * A MULTI_SCAN may selectively retrieve rows from only
2065 * some of the (scan-) branches in the Request.
2066 * Selectively release from only these brances.
2067 */
2068 releaseScanBuffers(requestPtr);
2069 }
2070 else
2071 {
2072 jam();
2073 /**
2074 * if not multiple scans in request, simply release all pages allocated
2075 * for row buffers (all rows will be released anyway)
2076 */
2077 // Root node should be the one and only being active
2078 ndbassert(requestPtr.p->m_cnt_active == 1);
2079 ndbassert(requestPtr.p->m_active_nodes.get(0));
2080 releaseRequestBuffers(requestPtr);
2081 }
2082 } //RT_ROW_BUFFERS
2083
2084
2085 Ptr<TreeNode> treeNodePtr;
2086 Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
2087
2088 for (list.first(treeNodePtr); !treeNodePtr.isNull(); list.next(treeNodePtr))
2089 {
2090 /**
2091 * Re-init row buffer structures for those treeNodes getting more rows
2092 * in the following NEXTREQ, including all its childs.
2093 */
2094 if (requestPtr.p->m_active_nodes.get(treeNodePtr.p->m_node_no) ||
2095 requestPtr.p->m_active_nodes.overlaps(treeNodePtr.p->m_ancestors))
2096 {
2097 jam();
2098 treeNodePtr.p->m_rows.init();
2099 }
2100
2101 /**
2102 * Do further cleanup in treeNodes having ancestor getting more rows.
2103 * (Which excludes the restarted treeNode itself)
2104 */
2105 if (requestPtr.p->m_active_nodes.overlaps(treeNodePtr.p->m_ancestors))
2106 {
2107 jam();
2108 /**
2109 * Common TreeNode cleanup:
2110 * Release list of deferred operations which may refer
2111 * buffered rows released above.
2112 */
2113 LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
2114 {
2115 Local_correlation_list correlations(pool, treeNodePtr.p->m_deferred.m_correlations);
2116 correlations.release();
2117 }
2118 treeNodePtr.p->m_deferred.init();
2119
2120 /**
2121 * TreeNode-type specific cleanup.
2122 */
2123 if (treeNodePtr.p->m_info->m_parent_batch_cleanup != 0)
2124 {
2125 jam();
2126 (this->*(treeNodePtr.p->m_info->m_parent_batch_cleanup))(requestPtr,
2127 treeNodePtr);
2128 }
2129 }
2130 }
2131 }
2132
2133 void
releaseScanBuffers(Ptr<Request> requestPtr)2134 Dbspj::releaseScanBuffers(Ptr<Request> requestPtr)
2135 {
2136 Ptr<TreeNode> treeNodePtr;
2137 Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
2138
2139 for (list.first(treeNodePtr); !treeNodePtr.isNull(); list.next(treeNodePtr))
2140 {
2141 /**
2142 * Release buffered rows for all treeNodes getting more rows
2143 * in the following NEXTREQ, including all its childs.
2144 */
2145 if (requestPtr.p->m_active_nodes.get(treeNodePtr.p->m_node_no) ||
2146 requestPtr.p->m_active_nodes.overlaps(treeNodePtr.p->m_ancestors))
2147 {
2148 if (treeNodePtr.p->m_bits & TreeNode::T_ROW_BUFFER)
2149 {
2150 jam();
2151 releaseNodeRows(requestPtr, treeNodePtr);
2152 }
2153 }
2154 }
2155 }
2156
2157 void
releaseNodeRows(Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)2158 Dbspj::releaseNodeRows(Ptr<Request> requestPtr, Ptr<TreeNode> treeNodePtr)
2159 {
2160 /**
2161 * Release all rows associated with tree node
2162 */
2163 DEBUG("releaseNodeRows"
2164 << ", node: " << treeNodePtr.p->m_node_no
2165 << ", request: " << requestPtr.i
2166 );
2167
2168 ndbassert(treeNodePtr.p->m_bits & TreeNode::T_ROW_BUFFER);
2169
2170 Uint32 cnt = 0;
2171 RowIterator iter;
2172 for (first(treeNodePtr.p->m_rows, iter); !iter.isNull(); )
2173 {
2174 jam();
2175 RowRef pos = iter.m_base.m_ref;
2176 next(iter);
2177 releaseRow(treeNodePtr.p->m_rows, pos);
2178 cnt ++;
2179 }
2180 DEBUG("RowIterator: released " << cnt << " rows!");
2181
2182 if (treeNodePtr.p->m_rows.m_type == RowCollection::COLLECTION_MAP)
2183 {
2184 jam();
2185 // Release the (now empty) RowMap
2186 RowMap& map = treeNodePtr.p->m_rows.m_map;
2187 if (!map.isNull())
2188 {
2189 jam();
2190 RowRef ref;
2191 map.copyto(ref);
2192 releaseRow(treeNodePtr.p->m_rows, ref); // Map was allocated in row memory
2193 }
2194 }
2195 }
2196
2197 void
releaseRow(RowCollection & collection,RowRef pos)2198 Dbspj::releaseRow(RowCollection& collection, RowRef pos)
2199 {
2200 // only when var-alloc, or else stack will be popped wo/ consideration
2201 // to individual rows
2202 ndbassert(collection.m_base.m_rowBuffer != NULL);
2203 ndbassert(collection.m_base.m_rowBuffer->m_type == BUFFER_VAR);
2204 ndbassert(pos.m_alloc_type == BUFFER_VAR);
2205
2206 RowBuffer& rowBuffer = *collection.m_base.m_rowBuffer;
2207 Ptr<RowPage> ptr;
2208 m_page_pool.getPtr(ptr, pos.m_page_id);
2209 ((Var_page*)ptr.p)->free_record(pos.m_page_pos, Var_page::CHAIN);
2210 Uint32 free_space = ((Var_page*)ptr.p)->free_space;
2211 if (free_space == Var_page::DATA_WORDS - 1)
2212 {
2213 jam();
2214 LocalDLFifoList<RowPage> list(m_page_pool,
2215 rowBuffer.m_page_list);
2216 const bool last = list.hasNext(ptr) == false;
2217 list.remove(ptr);
2218 if (list.isEmpty())
2219 {
2220 jam();
2221 /**
2222 * Don't remove last page...
2223 */
2224 list.addLast(ptr);
2225 rowBuffer.m_var.m_free = free_space;
2226 }
2227 else
2228 {
2229 jam();
2230 if (last)
2231 {
2232 jam();
2233 /**
2234 * If we were last...set m_var.m_free to free_space of newLastPtr
2235 */
2236 Ptr<RowPage> newLastPtr;
2237 ndbrequire(list.last(newLastPtr));
2238 rowBuffer.m_var.m_free = ((Var_page*)newLastPtr.p)->free_space;
2239 }
2240 releasePage(ptr);
2241 }
2242 }
2243 else if (free_space > rowBuffer.m_var.m_free)
2244 {
2245 jam();
2246 LocalDLFifoList<RowPage> list(m_page_pool,
2247 rowBuffer.m_page_list);
2248 list.remove(ptr);
2249 list.addLast(ptr);
2250 rowBuffer.m_var.m_free = free_space;
2251 }
2252 }
2253
2254 void
releaseRequestBuffers(Ptr<Request> requestPtr)2255 Dbspj::releaseRequestBuffers(Ptr<Request> requestPtr)
2256 {
2257 DEBUG("releaseRequestBuffers"
2258 << ", request: " << requestPtr.i
2259 );
2260 /**
2261 * Release all pages for request
2262 */
2263 {
2264 {
2265 LocalSLList<RowPage> freelist(m_page_pool, m_free_page_list);
2266 freelist.prependList(requestPtr.p->m_rowBuffer.m_page_list);
2267 }
2268 requestPtr.p->m_rowBuffer.reset();
2269 }
2270 }
2271
2272 /**
2273 * Handle that batch for this 'TreeNode' is complete.
2274 */
2275 void
handleTreeNodeComplete(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)2276 Dbspj::handleTreeNodeComplete(Signal * signal, Ptr<Request> requestPtr,
2277 Ptr<TreeNode> treeNodePtr)
2278 {
2279 if ((requestPtr.p->m_state & Request::RS_ABORTING) == 0)
2280 {
2281 jam();
2282 ndbassert(!requestPtr.p->m_completed_nodes.get(treeNodePtr.p->m_node_no));
2283 requestPtr.p->m_completed_nodes.set(treeNodePtr.p->m_node_no);
2284
2285 /**
2286 * If all ancestors are complete, this has to be reported
2287 * as we might be waiting for this condition to start more
2288 * operations.
2289 */
2290 if (requestPtr.p->m_completed_nodes.contains(treeNodePtr.p->m_ancestors))
2291 {
2292 jam();
2293 reportAncestorsComplete(signal, requestPtr, treeNodePtr);
2294 }
2295 }
2296 }
2297
2298 /**
2299 * Notify any children of this 'TreeNode' that all ancestor
2300 * TreeNodes has completed their batch.
2301 */
2302 void
reportAncestorsComplete(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)2303 Dbspj::reportAncestorsComplete(Signal * signal, Ptr<Request> requestPtr,
2304 Ptr<TreeNode> treeNodePtr)
2305 {
2306 if (treeNodePtr.p->m_bits & TreeNode::T_REPORT_BATCH_COMPLETE)
2307 {
2308 jam();
2309 LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
2310 Local_dependency_map list(pool, treeNodePtr.p->m_dependent_nodes);
2311 Dependency_map::ConstDataBufferIterator it;
2312
2313 for (list.first(it); !it.isNull(); list.next(it))
2314 {
2315 jam();
2316 Ptr<TreeNode> childPtr;
2317 m_treenode_pool.getPtr(childPtr, * it.data);
2318
2319 if (requestPtr.p->m_completed_nodes.contains(childPtr.p->m_ancestors) &&
2320 childPtr.p->m_deferred.isEmpty())
2321 {
2322 jam();
2323
2324 /**
2325 * Does any child need to know about when *my* batch is complete
2326 */
2327 if (childPtr.p->m_bits & TreeNode::T_NEED_REPORT_BATCH_COMPLETED)
2328 {
2329 jam();
2330 ndbrequire(childPtr.p->m_info != 0 &&
2331 childPtr.p->m_info->m_parent_batch_complete !=0 );
2332 (this->*(childPtr.p->m_info->m_parent_batch_complete))(signal,
2333 requestPtr,
2334 childPtr);
2335 }
2336 reportAncestorsComplete(signal, requestPtr, childPtr);
2337 }
2338 }
2339 }
2340 }
2341
2342 void
abort(Signal * signal,Ptr<Request> requestPtr,Uint32 errCode)2343 Dbspj::abort(Signal* signal, Ptr<Request> requestPtr, Uint32 errCode)
2344 {
2345 jam();
2346
2347 /**
2348 * Need to handle online upgrade as the protocoll for
2349 * signaling errors for Lookup-request changed in 7.2.5.
2350 * If API-version is <= 7.2.4 we increase the severity
2351 * of the error to a 'NodeFailure' as this is the only
2352 * errorcode for which the API will stop further
2353 * 'outstanding-counting' in pre 7.2.5.
2354 * (Starting from 7.2.5 we will stop counting for all 'hard errors')
2355 */
2356 if (requestPtr.p->isLookup() &&
2357 !ndbd_fixed_lookup_query_abort(getNodeInfo(getResultRef(requestPtr)).m_version))
2358 {
2359 jam();
2360 errCode = DbspjErr::NodeFailure;
2361 }
2362
2363 if ((requestPtr.p->m_state & Request::RS_ABORTING) != 0)
2364 {
2365 jam();
2366 goto checkcomplete;
2367 }
2368
2369 requestPtr.p->m_state |= Request::RS_ABORTING;
2370 requestPtr.p->m_errCode = errCode;
2371
2372 {
2373 Ptr<TreeNode> nodePtr;
2374 Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
2375 for (list.first(nodePtr); !nodePtr.isNull(); list.next(nodePtr))
2376 {
2377 jam();
2378 ndbrequire(nodePtr.p->m_info != 0);
2379 if (nodePtr.p->m_info->m_abort != 0)
2380 {
2381 jam();
2382 (this->*(nodePtr.p->m_info->m_abort))(signal, requestPtr, nodePtr);
2383 }
2384 }
2385 }
2386
2387 checkcomplete:
2388 checkBatchComplete(signal, requestPtr, 0);
2389 }
2390
2391 Uint32
nodeFail(Signal * signal,Ptr<Request> requestPtr,NdbNodeBitmask nodes)2392 Dbspj::nodeFail(Signal* signal, Ptr<Request> requestPtr,
2393 NdbNodeBitmask nodes)
2394 {
2395 Uint32 cnt = 0;
2396 Uint32 iter = 0;
2397
2398 {
2399 Ptr<TreeNode> nodePtr;
2400 Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
2401 for (list.first(nodePtr); !nodePtr.isNull(); list.next(nodePtr))
2402 {
2403 jam();
2404 ndbrequire(nodePtr.p->m_info != 0);
2405 if (nodePtr.p->m_info->m_execNODE_FAILREP != 0)
2406 {
2407 jam();
2408 iter ++;
2409 cnt += (this->*(nodePtr.p->m_info->m_execNODE_FAILREP))(signal,
2410 requestPtr,
2411 nodePtr, nodes);
2412 }
2413 }
2414 }
2415
2416 if (cnt == 0)
2417 {
2418 jam();
2419 /**
2420 * None of the operations needed NodeFailRep "action"
2421 * check if our TC has died...but...only needed in
2422 * scan case...for lookup...not so...
2423 */
2424 if (requestPtr.p->isScan() &&
2425 nodes.get(refToNode(requestPtr.p->m_senderRef)))
2426 {
2427 jam();
2428 abort(signal, requestPtr, DbspjErr::NodeFailure);
2429 }
2430 }
2431 else
2432 {
2433 jam();
2434 abort(signal, requestPtr, DbspjErr::NodeFailure);
2435 }
2436
2437 return cnt + iter;
2438 }
2439
2440 void
complete(Signal * signal,Ptr<Request> requestPtr)2441 Dbspj::complete(Signal* signal, Ptr<Request> requestPtr)
2442 {
2443 /**
2444 * we need to run complete-phase before sending last SCAN_FRAGCONF
2445 */
2446 Uint32 flags = requestPtr.p->m_state &
2447 (Request::RS_ABORTING | Request::RS_WAITING);
2448
2449 requestPtr.p->m_state = Request::RS_COMPLETING | flags;
2450
2451 // clear bit so that next batchComplete()
2452 // will continue to cleanup
2453 ndbassert((requestPtr.p->m_bits & Request::RT_NEED_COMPLETE) != 0);
2454 requestPtr.p->m_bits &= ~(Uint32)Request::RT_NEED_COMPLETE;
2455 requestPtr.p->m_outstanding = 0;
2456 {
2457 Ptr<TreeNode> nodePtr;
2458 Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
2459 for (list.first(nodePtr); !nodePtr.isNull(); list.next(nodePtr))
2460 {
2461 jam();
2462 ndbrequire(nodePtr.p->m_info != 0);
2463 if (nodePtr.p->m_info->m_complete != 0)
2464 {
2465 jam();
2466 (this->*(nodePtr.p->m_info->m_complete))(signal, requestPtr, nodePtr);
2467 }
2468 }
2469
2470 /**
2471 * preferably RT_NEED_COMPLETE should only be set if blocking
2472 * calls are used, in which case m_outstanding should have been increased
2473 *
2474 * BUT: scanIndex does DIH_SCAN_TAB_COMPLETE_REP which does not send reply
2475 * so it not really "blocking"
2476 * i.e remove assert
2477 */
2478 //ndbassert(requestPtr.p->m_outstanding);
2479 }
2480 checkBatchComplete(signal, requestPtr, 0);
2481 }
2482
2483 void
cleanup(Ptr<Request> requestPtr)2484 Dbspj::cleanup(Ptr<Request> requestPtr)
2485 {
2486 ndbrequire(requestPtr.p->m_cnt_active == 0);
2487 {
2488 Ptr<TreeNode> nodePtr;
2489 Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
2490 while (list.removeFirst(nodePtr))
2491 {
2492 jam();
2493 ndbrequire(nodePtr.p->m_info != 0 && nodePtr.p->m_info->m_cleanup != 0);
2494 (this->*(nodePtr.p->m_info->m_cleanup))(requestPtr, nodePtr);
2495
2496 m_treenode_pool.release(nodePtr);
2497 }
2498 }
2499 if (requestPtr.p->isScan())
2500 {
2501 jam();
2502
2503 if (unlikely((requestPtr.p->m_state & Request::RS_WAITING) != 0))
2504 {
2505 jam();
2506 requestPtr.p->m_state = Request::RS_ABORTED;
2507 return;
2508 }
2509 m_scan_request_hash.remove(requestPtr, *requestPtr.p);
2510 }
2511 else
2512 {
2513 jam();
2514 m_lookup_request_hash.remove(requestPtr, *requestPtr.p);
2515 }
2516 releaseRequestBuffers(requestPtr);
2517 ArenaHead ah = requestPtr.p->m_arena;
2518 m_request_pool.release(requestPtr);
2519 m_arenaAllocator.release(ah);
2520 }
2521
2522 void
cleanup_common(Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)2523 Dbspj::cleanup_common(Ptr<Request> requestPtr, Ptr<TreeNode> treeNodePtr)
2524 {
2525 jam();
2526
2527 LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
2528 {
2529 Local_dependency_map list(pool, treeNodePtr.p->m_dependent_nodes);
2530 list.release();
2531 }
2532
2533 {
2534 Local_pattern_store pattern(pool, treeNodePtr.p->m_keyPattern);
2535 pattern.release();
2536 }
2537
2538 {
2539 Local_pattern_store pattern(pool, treeNodePtr.p->m_attrParamPattern);
2540 pattern.release();
2541 }
2542
2543 {
2544 Local_correlation_list correlations(pool, treeNodePtr.p->m_deferred.m_correlations);
2545 correlations.release();
2546 }
2547
2548 if (treeNodePtr.p->m_send.m_keyInfoPtrI != RNIL)
2549 {
2550 jam();
2551 releaseSection(treeNodePtr.p->m_send.m_keyInfoPtrI);
2552 }
2553
2554 if (treeNodePtr.p->m_send.m_attrInfoPtrI != RNIL)
2555 {
2556 jam();
2557 releaseSection(treeNodePtr.p->m_send.m_attrInfoPtrI);
2558 }
2559 }
2560
2561 /**
2562 * Processing of signals from LQH
2563 */
2564 void
execLQHKEYREF(Signal * signal)2565 Dbspj::execLQHKEYREF(Signal* signal)
2566 {
2567 jamEntry();
2568
2569 const LqhKeyRef* ref = reinterpret_cast<const LqhKeyRef*>(signal->getDataPtr());
2570
2571 Ptr<TreeNode> treeNodePtr;
2572 m_treenode_pool.getPtr(treeNodePtr, ref->connectPtr);
2573
2574 Ptr<Request> requestPtr;
2575 m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
2576 ndbassert(!requestPtr.p->m_completed_nodes.get(treeNodePtr.p->m_node_no));
2577
2578 DEBUG("execLQHKEYREF"
2579 << ", node: " << treeNodePtr.p->m_node_no
2580 << ", request: " << requestPtr.i
2581 << ", errorCode: " << ref->errorCode
2582 );
2583
2584 ndbrequire(treeNodePtr.p->m_info && treeNodePtr.p->m_info->m_execLQHKEYREF);
2585 (this->*(treeNodePtr.p->m_info->m_execLQHKEYREF))(signal,
2586 requestPtr,
2587 treeNodePtr);
2588 }
2589
2590 void
execLQHKEYCONF(Signal * signal)2591 Dbspj::execLQHKEYCONF(Signal* signal)
2592 {
2593 jamEntry();
2594
2595 const LqhKeyConf* conf = reinterpret_cast<const LqhKeyConf*>(signal->getDataPtr());
2596 Ptr<TreeNode> treeNodePtr;
2597 m_treenode_pool.getPtr(treeNodePtr, conf->opPtr);
2598
2599 Ptr<Request> requestPtr;
2600 m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
2601 ndbassert(!requestPtr.p->m_completed_nodes.get(treeNodePtr.p->m_node_no));
2602
2603 DEBUG("execLQHKEYCONF"
2604 << ", node: " << treeNodePtr.p->m_node_no
2605 << ", request: " << requestPtr.i
2606 );
2607
2608 ndbrequire(treeNodePtr.p->m_info && treeNodePtr.p->m_info->m_execLQHKEYCONF);
2609 (this->*(treeNodePtr.p->m_info->m_execLQHKEYCONF))(signal,
2610 requestPtr,
2611 treeNodePtr);
2612 }
2613
2614 void
execSCAN_FRAGREF(Signal * signal)2615 Dbspj::execSCAN_FRAGREF(Signal* signal)
2616 {
2617 jamEntry();
2618 const ScanFragRef* ref = reinterpret_cast<const ScanFragRef*>(signal->getDataPtr());
2619
2620 Ptr<ScanFragHandle> scanFragHandlePtr;
2621 m_scanfraghandle_pool.getPtr(scanFragHandlePtr, ref->senderData);
2622 Ptr<TreeNode> treeNodePtr;
2623 m_treenode_pool.getPtr(treeNodePtr, scanFragHandlePtr.p->m_treeNodePtrI);
2624 Ptr<Request> requestPtr;
2625 m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
2626 ndbassert(!requestPtr.p->m_completed_nodes.get(treeNodePtr.p->m_node_no));
2627
2628 DEBUG("execSCAN_FRAGREF"
2629 << ", node: " << treeNodePtr.p->m_node_no
2630 << ", request: " << requestPtr.i
2631 << ", errorCode: " << ref->errorCode
2632 );
2633
2634 ndbrequire(treeNodePtr.p->m_info&&treeNodePtr.p->m_info->m_execSCAN_FRAGREF);
2635 (this->*(treeNodePtr.p->m_info->m_execSCAN_FRAGREF))(signal,
2636 requestPtr,
2637 treeNodePtr,
2638 scanFragHandlePtr);
2639 }
2640
2641 void
execSCAN_HBREP(Signal * signal)2642 Dbspj::execSCAN_HBREP(Signal* signal)
2643 {
2644 jamEntry();
2645
2646 Uint32 senderData = signal->theData[0];
2647 //Uint32 transId[2] = { signal->theData[1], signal->theData[2] };
2648
2649 Ptr<ScanFragHandle> scanFragHandlePtr;
2650 m_scanfraghandle_pool.getPtr(scanFragHandlePtr, senderData);
2651 Ptr<TreeNode> treeNodePtr;
2652 m_treenode_pool.getPtr(treeNodePtr, scanFragHandlePtr.p->m_treeNodePtrI);
2653 Ptr<Request> requestPtr;
2654 m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
2655 DEBUG("execSCAN_HBREP"
2656 << ", node: " << treeNodePtr.p->m_node_no
2657 << ", request: " << requestPtr.i
2658 );
2659
2660 Uint32 ref = requestPtr.p->m_senderRef;
2661 signal->theData[0] = requestPtr.p->m_senderData;
2662 sendSignal(ref, GSN_SCAN_HBREP, signal, 3, JBB);
2663 }
2664
2665 void
execSCAN_FRAGCONF(Signal * signal)2666 Dbspj::execSCAN_FRAGCONF(Signal* signal)
2667 {
2668 jamEntry();
2669
2670 const ScanFragConf* conf = reinterpret_cast<const ScanFragConf*>(signal->getDataPtr());
2671
2672 #ifdef DEBUG_SCAN_FRAGREQ
2673 ndbout_c("Dbspj::execSCAN_FRAGCONF() receiveing SCAN_FRAGCONF ");
2674 printSCAN_FRAGCONF(stdout, signal->getDataPtrSend(),
2675 conf->total_len,
2676 DBLQH);
2677 #endif
2678
2679 Ptr<ScanFragHandle> scanFragHandlePtr;
2680 m_scanfraghandle_pool.getPtr(scanFragHandlePtr, conf->senderData);
2681 Ptr<TreeNode> treeNodePtr;
2682 m_treenode_pool.getPtr(treeNodePtr, scanFragHandlePtr.p->m_treeNodePtrI);
2683 Ptr<Request> requestPtr;
2684 m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
2685 ndbassert(!requestPtr.p->m_completed_nodes.get(treeNodePtr.p->m_node_no) ||
2686 requestPtr.p->m_state & Request::RS_ABORTING);
2687
2688 DEBUG("execSCAN_FRAGCONF"
2689 << ", node: " << treeNodePtr.p->m_node_no
2690 << ", request: " << requestPtr.i
2691 );
2692
2693 ndbrequire(treeNodePtr.p->m_info&&treeNodePtr.p->m_info->m_execSCAN_FRAGCONF);
2694 (this->*(treeNodePtr.p->m_info->m_execSCAN_FRAGCONF))(signal,
2695 requestPtr,
2696 treeNodePtr,
2697 scanFragHandlePtr);
2698 }
2699
2700 void
execSCAN_NEXTREQ(Signal * signal)2701 Dbspj::execSCAN_NEXTREQ(Signal* signal)
2702 {
2703 jamEntry();
2704 const ScanFragNextReq * req = (ScanFragNextReq*)&signal->theData[0];
2705
2706 #ifdef DEBUG_SCAN_FRAGREQ
2707 DEBUG("Incomming SCAN_NEXTREQ");
2708 printSCANFRAGNEXTREQ(stdout, &signal->theData[0],
2709 ScanFragNextReq::SignalLength, DBLQH);
2710 #endif
2711
2712 Request key;
2713 key.m_transId[0] = req->transId1;
2714 key.m_transId[1] = req->transId2;
2715 key.m_senderData = req->senderData;
2716
2717 Ptr<Request> requestPtr;
2718 if (unlikely(!m_scan_request_hash.find(requestPtr, key)))
2719 {
2720 jam();
2721 ndbrequire(ScanFragNextReq::getCloseFlag(req->requestInfo));
2722 return;
2723 }
2724 DEBUG("execSCAN_NEXTREQ, request: " << requestPtr.i);
2725
2726 #ifdef SPJ_TRACE_TIME
2727 const NDB_TICKS now = NdbTick_getCurrentTicks();
2728 const NDB_TICKS then = requestPtr.p->m_save_time;
2729 const Uint64 diff = NdbTick_Elapsed(then,now).microSec();
2730 requestPtr.p->m_sum_waiting += Uint32(diff);
2731 requestPtr.p->m_save_time = now;
2732 #endif
2733
2734 Uint32 state = requestPtr.p->m_state;
2735 requestPtr.p->m_state = state & ~Uint32(Request::RS_WAITING);
2736
2737 if (unlikely(state == Request::RS_ABORTED))
2738 {
2739 jam();
2740 batchComplete(signal, requestPtr);
2741 return;
2742 }
2743
2744 if (unlikely((state & Request::RS_ABORTING) != 0))
2745 {
2746 jam();
2747 /**
2748 * abort is already in progress...
2749 * since RS_WAITING is cleared...it will end this request
2750 */
2751 return;
2752 }
2753
2754 if (ScanFragNextReq::getCloseFlag(req->requestInfo)) // Requested close scan
2755 {
2756 jam();
2757 abort(signal, requestPtr, 0);
2758 return;
2759 }
2760
2761 ndbrequire((state & Request::RS_WAITING) != 0);
2762 ndbrequire(requestPtr.p->m_outstanding == 0);
2763
2764 {
2765 /**
2766 * Scroll all relevant cursors...
2767 */
2768 Ptr<TreeNode> treeNodePtr;
2769 Local_TreeNodeCursor_list list(m_treenode_pool,
2770 requestPtr.p->m_cursor_nodes);
2771 Uint32 cnt_active = 0;
2772
2773 for (list.first(treeNodePtr); !treeNodePtr.isNull(); list.next(treeNodePtr))
2774 {
2775 if (treeNodePtr.p->m_state == TreeNode::TN_ACTIVE)
2776 {
2777 jam();
2778 DEBUG("SCAN_NEXTREQ on TreeNode: "
2779 << ", m_node_no: " << treeNodePtr.p->m_node_no
2780 << ", w/ m_parentPtrI: " << treeNodePtr.p->m_parentPtrI);
2781
2782 ndbrequire(treeNodePtr.p->m_info != 0 &&
2783 treeNodePtr.p->m_info->m_execSCAN_NEXTREQ != 0);
2784 (this->*(treeNodePtr.p->m_info->m_execSCAN_NEXTREQ))(signal,
2785 requestPtr,
2786 treeNodePtr);
2787 cnt_active++;
2788 }
2789 else
2790 {
2791 /**
2792 * Restart any other scans not being 'TN_ACTIVE'
2793 * (Only effective if 'RT_REPEAT_SCAN_RESULT')
2794 */
2795 jam();
2796 ndbrequire(requestPtr.p->m_bits & Request::RT_REPEAT_SCAN_RESULT);
2797 DEBUG("Restart TreeNode "
2798 << ", m_node_no: " << treeNodePtr.p->m_node_no
2799 << ", w/ m_parentPtrI: " << treeNodePtr.p->m_parentPtrI);
2800
2801 ndbrequire(treeNodePtr.p->m_info != 0 &&
2802 treeNodePtr.p->m_info->m_parent_batch_complete !=0 );
2803 (this->*(treeNodePtr.p->m_info->m_parent_batch_complete))(signal,
2804 requestPtr,
2805 treeNodePtr);
2806 }
2807 if (unlikely((requestPtr.p->m_state & Request::RS_ABORTING) != 0))
2808 {
2809 jam();
2810 break;
2811 }
2812 }// for all treeNodes in 'm_cursor_nodes'
2813
2814 /* Expected only a single ACTIVE TreeNode among the cursors */
2815 ndbrequire(cnt_active == 1 ||
2816 !(requestPtr.p->m_bits & Request::RT_REPEAT_SCAN_RESULT));
2817 }
2818 }
2819
2820 void
execTRANSID_AI(Signal * signal)2821 Dbspj::execTRANSID_AI(Signal* signal)
2822 {
2823 jamEntry();
2824 TransIdAI * req = (TransIdAI *)signal->getDataPtr();
2825 Uint32 ptrI = req->connectPtr;
2826 //Uint32 transId[2] = { req->transId[0], req->transId[1] };
2827
2828 Ptr<TreeNode> treeNodePtr;
2829 m_treenode_pool.getPtr(treeNodePtr, ptrI);
2830 Ptr<Request> requestPtr;
2831 m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
2832 ndbassert(!requestPtr.p->m_completed_nodes.get(treeNodePtr.p->m_node_no));
2833
2834 DEBUG("execTRANSID_AI"
2835 << ", node: " << treeNodePtr.p->m_node_no
2836 << ", request: " << requestPtr.i
2837 );
2838
2839 ndbrequire(signal->getNoOfSections() != 0);
2840
2841 SegmentedSectionPtr dataPtr;
2842 {
2843 SectionHandle handle(this, signal);
2844 handle.getSection(dataPtr, 0);
2845 handle.clear();
2846 }
2847
2848 #if defined(DEBUG_LQHKEYREQ) || defined(DEBUG_SCAN_FRAGREQ)
2849 printf("execTRANSID_AI: ");
2850 print(dataPtr, stdout);
2851 #endif
2852
2853 /**
2854 * build easy-access-array for row
2855 */
2856 Uint32 tmp[2+MAX_ATTRIBUTES_IN_TABLE];
2857 RowPtr::Header* header = CAST_PTR(RowPtr::Header, &tmp[0]);
2858
2859 Uint32 cnt = buildRowHeader(header, dataPtr);
2860 ndbassert(header->m_len < NDB_ARRAY_SIZE(tmp));
2861
2862 struct RowPtr row;
2863 row.m_type = RowPtr::RT_SECTION;
2864 row.m_src_node_ptrI = treeNodePtr.i;
2865 row.m_row_data.m_section.m_header = header;
2866 row.m_row_data.m_section.m_dataPtr.assign(dataPtr);
2867
2868 getCorrelationData(row.m_row_data.m_section,
2869 cnt - 1,
2870 row.m_src_correlation);
2871
2872 if (treeNodePtr.p->m_bits & TreeNode::T_ROW_BUFFER)
2873 {
2874 jam();
2875 Uint32 err;
2876
2877 DEBUG("Need to storeRow"
2878 << ", node: " << treeNodePtr.p->m_node_no
2879 );
2880
2881 if (ERROR_INSERTED(17120) ||
2882 (ERROR_INSERTED(17121) && treeNodePtr.p->m_parentPtrI != RNIL))
2883 {
2884 jam();
2885 CLEAR_ERROR_INSERT_VALUE;
2886 abort(signal, requestPtr, DbspjErr::OutOfRowMemory);
2887 }
2888 else if ((err = storeRow(treeNodePtr.p->m_rows, row)) != 0)
2889 {
2890 jam();
2891 abort(signal, requestPtr, err);
2892 }
2893 }
2894
2895 ndbrequire(treeNodePtr.p->m_info&&treeNodePtr.p->m_info->m_execTRANSID_AI);
2896
2897 (this->*(treeNodePtr.p->m_info->m_execTRANSID_AI))(signal,
2898 requestPtr,
2899 treeNodePtr,
2900 row);
2901 release(dataPtr);
2902 }
2903
2904 Uint32
storeRow(RowCollection & collection,RowPtr & row)2905 Dbspj::storeRow(RowCollection& collection, RowPtr &row)
2906 {
2907 ndbassert(row.m_type == RowPtr::RT_SECTION);
2908 SegmentedSectionPtr dataPtr = row.m_row_data.m_section.m_dataPtr;
2909 Uint32 * headptr = (Uint32*)row.m_row_data.m_section.m_header;
2910 Uint32 headlen = 1 + row.m_row_data.m_section.m_header->m_len;
2911
2912 /**
2913 * Rows might be stored at an offset within the collection.
2914 */
2915 const Uint32 offset = collection.rowOffset();
2916
2917 Uint32 totlen = 0;
2918 totlen += dataPtr.sz;
2919 totlen += headlen;
2920 totlen += offset;
2921
2922 RowRef ref;
2923 Uint32* const dstptr = rowAlloc(*collection.m_base.m_rowBuffer, ref, totlen);
2924 if (unlikely(dstptr == 0))
2925 {
2926 jam();
2927 return DbspjErr::OutOfRowMemory;
2928 }
2929 memcpy(dstptr + offset, headptr, 4 * headlen);
2930 copy(dstptr + offset + headlen, dataPtr);
2931
2932 if (collection.m_type == RowCollection::COLLECTION_LIST)
2933 {
2934 jam();
2935 NullRowRef.copyto_link(dstptr); // Null terminate list...
2936 add_to_list(collection.m_list, ref);
2937 }
2938 else
2939 {
2940 jam();
2941 Uint32 error = add_to_map(collection.m_map, row.m_src_correlation, ref);
2942 if (unlikely(error))
2943 return error;
2944 }
2945
2946 /**
2947 * Refetch pointer to alloc'ed row memory before creating RowPtr
2948 * as above add_to_xxx may mave reorganized memory causing
2949 * alloced row to be moved.
2950 */
2951 const Uint32* const rowptr = get_row_ptr(ref);
2952 setupRowPtr(collection, row, ref, rowptr);
2953 return 0;
2954 }
2955
2956 void
setupRowPtr(const RowCollection & collection,RowPtr & row,RowRef ref,const Uint32 * src)2957 Dbspj::setupRowPtr(const RowCollection& collection,
2958 RowPtr& row, RowRef ref, const Uint32 * src)
2959 {
2960 const Uint32 offset = collection.rowOffset();
2961 const RowPtr::Header * headptr = (RowPtr::Header*)(src + offset);
2962 Uint32 headlen = 1 + headptr->m_len;
2963
2964 row.m_type = RowPtr::RT_LINEAR;
2965 row.m_row_data.m_linear.m_row_ref = ref;
2966 row.m_row_data.m_linear.m_header = headptr;
2967 row.m_row_data.m_linear.m_data = (Uint32*)headptr + headlen;
2968 }
2969
2970 void
add_to_list(SLFifoRowList & list,RowRef rowref)2971 Dbspj::add_to_list(SLFifoRowList & list, RowRef rowref)
2972 {
2973 if (list.isNull())
2974 {
2975 jam();
2976 list.m_first_row_page_id = rowref.m_page_id;
2977 list.m_first_row_page_pos = rowref.m_page_pos;
2978 }
2979 else
2980 {
2981 jam();
2982 /**
2983 * add last to list
2984 */
2985 RowRef last;
2986 last.m_alloc_type = rowref.m_alloc_type;
2987 last.m_page_id = list.m_last_row_page_id;
2988 last.m_page_pos = list.m_last_row_page_pos;
2989 Uint32 * const rowptr = get_row_ptr(last);
2990 rowref.copyto_link(rowptr);
2991 }
2992
2993 list.m_last_row_page_id = rowref.m_page_id;
2994 list.m_last_row_page_pos = rowref.m_page_pos;
2995 }
2996
2997 Uint32 *
get_row_ptr(RowRef pos)2998 Dbspj::get_row_ptr(RowRef pos)
2999 {
3000 Ptr<RowPage> ptr;
3001 m_page_pool.getPtr(ptr, pos.m_page_id);
3002 if (pos.m_alloc_type == BUFFER_STACK) // ::stackAlloc() memory
3003 {
3004 jam();
3005 return ptr.p->m_data + pos.m_page_pos;
3006 }
3007 else // ::varAlloc() memory
3008 {
3009 jam();
3010 ndbassert(pos.m_alloc_type == BUFFER_VAR);
3011 return ((Var_page*)ptr.p)->get_ptr(pos.m_page_pos);
3012 }
3013 }
3014
3015 inline
3016 bool
first(const SLFifoRowList & list,SLFifoRowListIterator & iter)3017 Dbspj::first(const SLFifoRowList& list,
3018 SLFifoRowListIterator& iter)
3019 {
3020 if (list.isNull())
3021 {
3022 jam();
3023 iter.setNull();
3024 return false;
3025 }
3026
3027 // const Buffer_type allocator = list.m_rowBuffer->m_type;
3028 iter.m_ref.m_alloc_type = list.m_rowBuffer->m_type;
3029 iter.m_ref.m_page_id = list.m_first_row_page_id;
3030 iter.m_ref.m_page_pos = list.m_first_row_page_pos;
3031 iter.m_row_ptr = get_row_ptr(iter.m_ref);
3032 return true;
3033 }
3034
3035 inline
3036 bool
next(SLFifoRowListIterator & iter)3037 Dbspj::next(SLFifoRowListIterator& iter)
3038 {
3039 iter.m_ref.assign_from_link(iter.m_row_ptr);
3040 if (iter.m_ref.isNull())
3041 {
3042 jam();
3043 return false;
3044 }
3045 iter.m_row_ptr = get_row_ptr(iter.m_ref);
3046 return true;
3047 }
3048
3049 Uint32
add_to_map(RowMap & map,Uint32 corrVal,RowRef rowref)3050 Dbspj::add_to_map(RowMap& map,
3051 Uint32 corrVal, RowRef rowref)
3052 {
3053 Uint32 * mapptr;
3054 if (map.isNull())
3055 {
3056 jam();
3057 ndbassert(map.m_size > 0);
3058 ndbassert(map.m_rowBuffer != NULL);
3059
3060 Uint32 sz16 = RowMap::MAP_SIZE_PER_REF_16 * map.m_size;
3061 Uint32 sz32 = (sz16 + 1) / 2;
3062 RowRef ref;
3063 mapptr = rowAlloc(*map.m_rowBuffer, ref, sz32);
3064 if (unlikely(mapptr == 0))
3065 {
3066 jam();
3067 return DbspjErr::OutOfRowMemory;
3068 }
3069 map.assign(ref);
3070 map.m_elements = 0;
3071 map.clear(mapptr);
3072 }
3073 else
3074 {
3075 jam();
3076 RowRef ref;
3077 map.copyto(ref);
3078 mapptr = get_row_ptr(ref);
3079 }
3080
3081 Uint32 pos = corrVal & 0xFFFF;
3082 ndbrequire(pos < map.m_size);
3083 ndbrequire(map.m_elements < map.m_size);
3084
3085 if (1)
3086 {
3087 /**
3088 * Check that *pos* is empty
3089 */
3090 RowRef check;
3091 map.load(mapptr, pos, check);
3092 ndbrequire(check.m_page_pos == 0xFFFF);
3093 }
3094
3095 map.store(mapptr, pos, rowref);
3096
3097 return 0;
3098 }
3099
3100 inline
3101 bool
first(const RowMap & map,RowMapIterator & iter)3102 Dbspj::first(const RowMap& map,
3103 RowMapIterator & iter)
3104 {
3105 if (map.isNull())
3106 {
3107 jam();
3108 iter.setNull();
3109 return false;
3110 }
3111
3112 iter.m_map_ptr = get_row_ptr(map.m_map_ref);
3113 iter.m_size = map.m_size;
3114 iter.m_ref.m_alloc_type = map.m_rowBuffer->m_type;
3115
3116 Uint32 pos = 0;
3117 while (RowMap::isNull(iter.m_map_ptr, pos) && pos < iter.m_size)
3118 pos++;
3119
3120 if (pos == iter.m_size)
3121 {
3122 jam();
3123 iter.setNull();
3124 return false;
3125 }
3126 else
3127 {
3128 jam();
3129 RowMap::load(iter.m_map_ptr, pos, iter.m_ref);
3130 iter.m_element_no = pos;
3131 iter.m_row_ptr = get_row_ptr(iter.m_ref);
3132 return true;
3133 }
3134 }
3135
3136 inline
3137 bool
next(RowMapIterator & iter)3138 Dbspj::next(RowMapIterator & iter)
3139 {
3140 Uint32 pos = iter.m_element_no + 1;
3141 while (RowMap::isNull(iter.m_map_ptr, pos) && pos < iter.m_size)
3142 pos++;
3143
3144 if (pos == iter.m_size)
3145 {
3146 jam();
3147 iter.setNull();
3148 return false;
3149 }
3150 else
3151 {
3152 jam();
3153 RowMap::load(iter.m_map_ptr, pos, iter.m_ref);
3154 iter.m_element_no = pos;
3155 iter.m_row_ptr = get_row_ptr(iter.m_ref);
3156 return true;
3157 }
3158 }
3159
3160 bool
first(const RowCollection & collection,RowIterator & iter)3161 Dbspj::first(const RowCollection& collection,
3162 RowIterator& iter)
3163 {
3164 iter.m_type = collection.m_type;
3165 if (iter.m_type == RowCollection::COLLECTION_LIST)
3166 {
3167 jam();
3168 return first(collection.m_list, iter.m_list);
3169 }
3170 else
3171 {
3172 jam();
3173 ndbassert(iter.m_type == RowCollection::COLLECTION_MAP);
3174 return first(collection.m_map, iter.m_map);
3175 }
3176 }
3177
3178 bool
next(RowIterator & iter)3179 Dbspj::next(RowIterator& iter)
3180 {
3181 if (iter.m_type == RowCollection::COLLECTION_LIST)
3182 {
3183 jam();
3184 return next(iter.m_list);
3185 }
3186 else
3187 {
3188 jam();
3189 ndbassert(iter.m_type == RowCollection::COLLECTION_MAP);
3190 return next(iter.m_map);
3191 }
3192 }
3193
3194 inline
3195 Uint32 *
stackAlloc(RowBuffer & buffer,RowRef & dst,Uint32 sz)3196 Dbspj::stackAlloc(RowBuffer & buffer, RowRef& dst, Uint32 sz)
3197 {
3198 Ptr<RowPage> ptr;
3199 LocalDLFifoList<RowPage> list(m_page_pool, buffer.m_page_list);
3200
3201 Uint32 pos = buffer.m_stack.m_pos;
3202 const Uint32 SIZE = RowPage::SIZE;
3203 if (list.isEmpty() || (pos + sz) > SIZE)
3204 {
3205 jam();
3206 bool ret = allocPage(ptr);
3207 if (unlikely(ret == false))
3208 {
3209 jam();
3210 return 0;
3211 }
3212
3213 pos = 0;
3214 list.addLast(ptr);
3215 }
3216 else
3217 {
3218 list.last(ptr);
3219 }
3220
3221 dst.m_page_id = ptr.i;
3222 dst.m_page_pos = pos;
3223 dst.m_alloc_type = BUFFER_STACK;
3224 buffer.m_stack.m_pos = pos + sz;
3225 return ptr.p->m_data + pos;
3226 }
3227
3228 inline
3229 Uint32 *
varAlloc(RowBuffer & buffer,RowRef & dst,Uint32 sz)3230 Dbspj::varAlloc(RowBuffer & buffer, RowRef& dst, Uint32 sz)
3231 {
3232 Ptr<RowPage> ptr;
3233 LocalDLFifoList<RowPage> list(m_page_pool, buffer.m_page_list);
3234
3235 Uint32 free_space = buffer.m_var.m_free;
3236 if (list.isEmpty() || free_space < (sz + 1))
3237 {
3238 jam();
3239 bool ret = allocPage(ptr);
3240 if (unlikely(ret == false))
3241 {
3242 jam();
3243 return 0;
3244 }
3245
3246 list.addLast(ptr);
3247 ((Var_page*)ptr.p)->init();
3248 }
3249 else
3250 {
3251 jam();
3252 list.last(ptr);
3253 }
3254
3255 Var_page * vp = (Var_page*)ptr.p;
3256 Uint32 pos = vp->alloc_record(sz, (Var_page*)m_buffer0, Var_page::CHAIN);
3257
3258 dst.m_page_id = ptr.i;
3259 dst.m_page_pos = pos;
3260 dst.m_alloc_type = BUFFER_VAR;
3261 buffer.m_var.m_free = vp->free_space;
3262 return vp->get_ptr(pos);
3263 }
3264
3265 Uint32 *
rowAlloc(RowBuffer & rowBuffer,RowRef & dst,Uint32 sz)3266 Dbspj::rowAlloc(RowBuffer& rowBuffer, RowRef& dst, Uint32 sz)
3267 {
3268 if (rowBuffer.m_type == BUFFER_STACK)
3269 {
3270 jam();
3271 return stackAlloc(rowBuffer, dst, sz);
3272 }
3273 else if (rowBuffer.m_type == BUFFER_VAR)
3274 {
3275 jam();
3276 return varAlloc(rowBuffer, dst, sz);
3277 }
3278 else
3279 {
3280 jam();
3281 ndbrequire(false);
3282 return NULL;
3283 }
3284 }
3285
3286 bool
allocPage(Ptr<RowPage> & ptr)3287 Dbspj::allocPage(Ptr<RowPage> & ptr)
3288 {
3289 if (m_free_page_list.isEmpty())
3290 {
3291 jam();
3292 if (ERROR_INSERTED_CLEAR(17003))
3293 {
3294 jam();
3295 ndbout_c("Injecting failed '::allocPage', error 17003 at line %d file %s",
3296 __LINE__, __FILE__);
3297 return false;
3298 }
3299 ptr.p = (RowPage*)m_ctx.m_mm.alloc_page(RT_SPJ_DATABUFFER,
3300 &ptr.i,
3301 Ndbd_mem_manager::NDB_ZONE_ANY);
3302 if (ptr.p == 0)
3303 {
3304 jam();
3305 return false;
3306 }
3307 return true;
3308 }
3309 else
3310 {
3311 jam();
3312 LocalSLList<RowPage> list(m_page_pool, m_free_page_list);
3313 bool ret = list.removeFirst(ptr);
3314 ndbrequire(ret);
3315 return ret;
3316 }
3317 }
3318
3319 void
releasePage(Ptr<RowPage> ptr)3320 Dbspj::releasePage(Ptr<RowPage> ptr)
3321 {
3322 LocalSLList<RowPage> list(m_page_pool, m_free_page_list);
3323 list.addFirst(ptr);
3324 }
3325
3326 void
releaseGlobal(Signal * signal)3327 Dbspj::releaseGlobal(Signal * signal)
3328 {
3329 Uint32 delay = 100;
3330 LocalSLList<RowPage> list(m_page_pool, m_free_page_list);
3331 if (list.isEmpty())
3332 {
3333 jam();
3334 delay = 300;
3335 }
3336 else
3337 {
3338 Ptr<RowPage> ptr;
3339 list.removeFirst(ptr);
3340 m_ctx.m_mm.release_page(RT_SPJ_DATABUFFER, ptr.i);
3341 }
3342
3343 signal->theData[0] = 0;
3344 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, delay, 1);
3345 }
3346
3347 Uint32
checkTableError(Ptr<TreeNode> treeNodePtr) const3348 Dbspj::checkTableError(Ptr<TreeNode> treeNodePtr) const
3349 {
3350 jam();
3351 if (treeNodePtr.p->m_tableOrIndexId >= c_tabrecFilesize)
3352 {
3353 jam();
3354 ndbassert(c_tabrecFilesize > 0);
3355 return DbspjErr::NoSuchTable;
3356 }
3357
3358 TableRecordPtr tablePtr;
3359 tablePtr.i = treeNodePtr.p->m_tableOrIndexId;
3360 ptrAss(tablePtr, m_tableRecord);
3361 Uint32 err = tablePtr.p->checkTableError(treeNodePtr.p->m_schemaVersion);
3362 if (unlikely(err))
3363 {
3364 DEBUG_DICT("Dbsp::checkTableError"
3365 << ", m_node_no: " << treeNodePtr.p->m_node_no
3366 << ", tableOrIndexId: " << treeNodePtr.p->m_tableOrIndexId
3367 << ", error: " << err);
3368 }
3369 if (ERROR_INSERTED(17520) ||
3370 (ERROR_INSERTED(17521) && (rand() % 7) == 0))
3371 {
3372 jam();
3373 CLEAR_ERROR_INSERT_VALUE;
3374 ndbout_c("::checkTableError, injecting NoSuchTable error at line %d file %s",
3375 __LINE__, __FILE__);
3376 return DbspjErr::NoSuchTable;
3377 }
3378 return err;
3379 }
3380
3381 /**
3382 * END - MODULE GENERIC
3383 */
3384
3385 void
common_execTRANSID_AI(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,const RowPtr & rowRef)3386 Dbspj::common_execTRANSID_AI(Signal* signal,
3387 Ptr<Request> requestPtr,
3388 Ptr<TreeNode> treeNodePtr,
3389 const RowPtr & rowRef)
3390 {
3391 jam();
3392
3393 if (likely((requestPtr.p->m_state & Request::RS_ABORTING) == 0))
3394 {
3395 LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
3396 Local_dependency_map list(pool, treeNodePtr.p->m_dependent_nodes);
3397 Dependency_map::ConstDataBufferIterator it;
3398
3399 /**
3400 * Activate child operations in two steps:
3401 * 1) Any child operations requiring T_EXEC_SEQUENTIAL are
3402 * prepared for exec by appending rowRefs to the deferred
3403 * list.
3404 * 2) Start executing non-T_EXEC_SEQUENTIAL child operations.
3405 */
3406 for (list.first(it); !it.isNull(); list.next(it))
3407 {
3408 Ptr<TreeNode> childPtr;
3409 m_treenode_pool.getPtr(childPtr, * it.data);
3410
3411 if (childPtr.p->m_bits & TreeNode::T_EXEC_SEQUENTIAL)
3412 {
3413 jam();
3414 DEBUG("T_EXEC_SEQUENTIAL --> child exec deferred");
3415
3416 /**
3417 * Append correlation values of deferred child operations
3418 * to a list / fifo. Upon resume, we will then be able to
3419 * relocate all parent rows for which to resume operations.
3420 */
3421 LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
3422 Local_pattern_store correlations(pool, childPtr.p->m_deferred.m_correlations);
3423 if (!correlations.append(&rowRef.m_src_correlation, 1))
3424 {
3425 jam();
3426 abort(signal, requestPtr, DbspjErr::OutOfQueryMemory);
3427 return;
3428 }
3429
3430 // As there are pending deferred operations we are not complete
3431 requestPtr.p->m_completed_nodes.clear(childPtr.p->m_node_no);
3432 }
3433 }
3434
3435 for (list.first(it); !it.isNull(); list.next(it))
3436 {
3437 Ptr<TreeNode> childPtr;
3438 m_treenode_pool.getPtr(childPtr, * it.data);
3439 if ((childPtr.p->m_bits & TreeNode::T_EXEC_SEQUENTIAL) == 0)
3440 {
3441 jam();
3442 ndbrequire(childPtr.p->m_info!=0 && childPtr.p->m_info->m_parent_row!=0);
3443
3444 (this->*(childPtr.p->m_info->m_parent_row))(signal,
3445 requestPtr, childPtr, rowRef);
3446
3447 /* Recheck RS_ABORTING as child operation might have aborted */
3448 if (unlikely(requestPtr.p->m_state & Request::RS_ABORTING))
3449 {
3450 jam();
3451 return;
3452 }
3453 }
3454 }
3455 }
3456 }
3457
3458
3459 /**
3460 * MODULE LOOKUP
3461 */
3462 const Dbspj::OpInfo
3463 Dbspj::g_LookupOpInfo =
3464 {
3465 &Dbspj::lookup_build,
3466 0, // prepare
3467 &Dbspj::lookup_start,
3468 &Dbspj::lookup_execTRANSID_AI,
3469 &Dbspj::lookup_execLQHKEYREF,
3470 &Dbspj::lookup_execLQHKEYCONF,
3471 0, // execSCAN_FRAGREF
3472 0, // execSCAN_FRAGCONF
3473 &Dbspj::lookup_parent_row,
3474 0, // Dbspj::lookup_parent_batch_complete,
3475 0, // Dbspj::lookup_parent_batch_repeat,
3476 0, // Dbspj::lookup_parent_batch_cleanup,
3477 0, // Dbspj::lookup_execSCAN_NEXTREQ
3478 0, // Dbspj::lookup_complete
3479 &Dbspj::lookup_abort,
3480 &Dbspj::lookup_execNODE_FAILREP,
3481 &Dbspj::lookup_cleanup
3482 };
3483
3484 Uint32
lookup_build(Build_context & ctx,Ptr<Request> requestPtr,const QueryNode * qn,const QueryNodeParameters * qp)3485 Dbspj::lookup_build(Build_context& ctx,
3486 Ptr<Request> requestPtr,
3487 const QueryNode* qn,
3488 const QueryNodeParameters* qp)
3489 {
3490 Uint32 err = 0;
3491 Ptr<TreeNode> treeNodePtr;
3492 const QN_LookupNode * node = (const QN_LookupNode*)qn;
3493 const QN_LookupParameters * param = (const QN_LookupParameters*)qp;
3494 do
3495 {
3496 err = DbspjErr::InvalidTreeNodeSpecification;
3497 if (unlikely(node->len < QN_LookupNode::NodeSize))
3498 {
3499 jam();
3500 break;
3501 }
3502
3503 err = DbspjErr::InvalidTreeParametersSpecification;
3504 DEBUG("param len: " << param->len);
3505 if (unlikely(param->len < QN_LookupParameters::NodeSize))
3506 {
3507 jam();
3508 break;
3509 }
3510
3511 err = createNode(ctx, requestPtr, treeNodePtr);
3512 if (unlikely(err != 0))
3513 {
3514 jam();
3515 break;
3516 }
3517
3518 treeNodePtr.p->m_tableOrIndexId = node->tableId;
3519 treeNodePtr.p->m_primaryTableId = node->tableId;
3520 treeNodePtr.p->m_schemaVersion = node->tableVersion;
3521 treeNodePtr.p->m_info = &g_LookupOpInfo;
3522 Uint32 transId1 = requestPtr.p->m_transId[0];
3523 Uint32 transId2 = requestPtr.p->m_transId[1];
3524 Uint32 savePointId = ctx.m_savepointId;
3525
3526 Uint32 treeBits = node->requestInfo;
3527 Uint32 paramBits = param->requestInfo;
3528 //ndbout_c("Dbspj::lookup_build() treeBits=%.8x paramBits=%.8x",
3529 // treeBits, paramBits);
3530 LqhKeyReq* dst = (LqhKeyReq*)treeNodePtr.p->m_lookup_data.m_lqhKeyReq;
3531 {
3532 /**
3533 * static variables
3534 */
3535 dst->tcBlockref = reference();
3536 dst->clientConnectPtr = treeNodePtr.i;
3537
3538 /**
3539 * TODO reference()+treeNodePtr.i is passed twice
3540 * this can likely be optimized using the requestInfo-bits
3541 * UPDATE: This can be accomplished by *not* setApplicationAddressFlag
3542 * and patch LQH to then instead use tcBlockref/clientConnectPtr
3543 */
3544 dst->transId1 = transId1;
3545 dst->transId2 = transId2;
3546 dst->savePointId = savePointId;
3547 dst->scanInfo = 0;
3548 dst->attrLen = 0;
3549 /** Initialy set reply ref to client, do_send will set SPJ refs if non-LEAF */
3550 dst->variableData[0] = ctx.m_resultRef;
3551 dst->variableData[1] = param->resultData;
3552 Uint32 requestInfo = 0;
3553 LqhKeyReq::setOperation(requestInfo, ZREAD);
3554 LqhKeyReq::setApplicationAddressFlag(requestInfo, 1);
3555 LqhKeyReq::setDirtyFlag(requestInfo, 1);
3556 LqhKeyReq::setSimpleFlag(requestInfo, 1);
3557 LqhKeyReq::setNormalProtocolFlag(requestInfo, 0); // Assume T_LEAF
3558 LqhKeyReq::setCorrFactorFlag(requestInfo, 1);
3559 LqhKeyReq::setNoDiskFlag(requestInfo,
3560 (treeBits & DABits::NI_LINKED_DISK) == 0 &&
3561 (paramBits & DABits::PI_DISK_ATTR) == 0);
3562 dst->requestInfo = requestInfo;
3563 }
3564
3565 if (treeBits & QN_LookupNode::L_UNIQUE_INDEX)
3566 {
3567 jam();
3568 treeNodePtr.p->m_bits |= TreeNode::T_UNIQUE_INDEX_LOOKUP;
3569 }
3570
3571 Uint32 tableId = node->tableId;
3572 Uint32 schemaVersion = node->tableVersion;
3573
3574 Uint32 tableSchemaVersion = tableId + ((schemaVersion << 16) & 0xFFFF0000);
3575 dst->tableSchemaVersion = tableSchemaVersion;
3576
3577 ctx.m_resultData = param->resultData;
3578 treeNodePtr.p->m_lookup_data.m_api_resultRef = ctx.m_resultRef;
3579 treeNodePtr.p->m_lookup_data.m_api_resultData = param->resultData;
3580 treeNodePtr.p->m_lookup_data.m_outstanding = 0;
3581
3582 /**
3583 * Parse stuff common lookup/scan-frag
3584 */
3585 struct DABuffer nodeDA, paramDA;
3586 nodeDA.ptr = node->optional;
3587 nodeDA.end = nodeDA.ptr + (node->len - QN_LookupNode::NodeSize);
3588 paramDA.ptr = param->optional;
3589 paramDA.end = paramDA.ptr + (param->len - QN_LookupParameters::NodeSize);
3590 err = parseDA(ctx, requestPtr, treeNodePtr,
3591 nodeDA, treeBits, paramDA, paramBits);
3592 if (unlikely(err != 0))
3593 {
3594 jam();
3595 break;
3596 }
3597
3598 if (treeNodePtr.p->m_bits & TreeNode::T_ATTR_INTERPRETED)
3599 {
3600 jam();
3601 LqhKeyReq::setInterpretedFlag(dst->requestInfo, 1);
3602 }
3603
3604 /**
3605 * Inherit batch size from parent
3606 */
3607 treeNodePtr.p->m_batch_size = 1;
3608 if (treeNodePtr.p->m_parentPtrI != RNIL)
3609 {
3610 jam();
3611 Ptr<TreeNode> parentPtr;
3612 m_treenode_pool.getPtr(parentPtr, treeNodePtr.p->m_parentPtrI);
3613 treeNodePtr.p->m_batch_size = parentPtr.p->m_batch_size;
3614 }
3615
3616 if (ctx.m_start_signal)
3617 {
3618 jam();
3619 Signal * signal = ctx.m_start_signal;
3620 const LqhKeyReq* src = (const LqhKeyReq*)signal->getDataPtr();
3621 #if NOT_YET
3622 Uint32 instanceNo =
3623 blockToInstance(signal->header.theReceiversBlockNumber);
3624 treeNodePtr.p->m_send.m_ref = numberToRef(DBLQH,
3625 instanceNo, getOwnNodeId());
3626 #else
3627 treeNodePtr.p->m_send.m_ref =
3628 numberToRef(DBLQH, getInstanceKey(src->tableSchemaVersion & 0xFFFF,
3629 src->fragmentData & 0xFFFF),
3630 getOwnNodeId());
3631 #endif
3632
3633 Uint32 hashValue = src->hashValue;
3634 Uint32 fragId = src->fragmentData;
3635 Uint32 attrLen = src->attrLen; // fragdist-key is in here
3636
3637 /**
3638 * assertions
3639 */
3640 #ifdef VM_TRACE
3641 Uint32 requestInfo = src->requestInfo;
3642 ndbassert(LqhKeyReq::getAttrLen(attrLen) == 0); // Only long
3643 ndbassert(LqhKeyReq::getScanTakeOverFlag(attrLen) == 0);// Not supported
3644 ndbassert(LqhKeyReq::getReorgFlag(attrLen) == ScanFragReq::REORG_ALL); // Not supported
3645 ndbassert(LqhKeyReq::getOperation(requestInfo) == ZREAD);
3646 ndbassert(LqhKeyReq::getKeyLen(requestInfo) == 0); // Only long
3647 ndbassert(LqhKeyReq::getMarkerFlag(requestInfo) == 0); // Only read
3648 ndbassert(LqhKeyReq::getAIInLqhKeyReq(requestInfo) == 0);
3649 ndbassert(LqhKeyReq::getSeqNoReplica(requestInfo) == 0);
3650 ndbassert(LqhKeyReq::getLastReplicaNo(requestInfo) == 0);
3651 ndbassert(LqhKeyReq::getApplicationAddressFlag(requestInfo) != 0);
3652 ndbassert(LqhKeyReq::getSameClientAndTcFlag(requestInfo) == 0);
3653 #endif
3654
3655 #if TODO
3656 /**
3657 * Handle various lock-modes
3658 */
3659 static Uint8 getDirtyFlag(const UintR & requestInfo);
3660 static Uint8 getSimpleFlag(const UintR & requestInfo);
3661 #endif
3662
3663 #ifdef VM_TRACE
3664 Uint32 dst_requestInfo = dst->requestInfo;
3665 ndbassert(LqhKeyReq::getInterpretedFlag(requestInfo) ==
3666 LqhKeyReq::getInterpretedFlag(dst_requestInfo));
3667 ndbassert(LqhKeyReq::getNoDiskFlag(requestInfo) ==
3668 LqhKeyReq::getNoDiskFlag(dst_requestInfo));
3669 #endif
3670
3671 dst->hashValue = hashValue;
3672 dst->fragmentData = fragId;
3673 dst->attrLen = attrLen; // fragdist is in here
3674
3675 treeNodePtr.p->m_bits |= TreeNode::T_ONE_SHOT;
3676 }
3677 return 0;
3678 } while (0);
3679
3680 return err;
3681 }
3682
3683 void
lookup_start(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)3684 Dbspj::lookup_start(Signal* signal,
3685 Ptr<Request> requestPtr,
3686 Ptr<TreeNode> treeNodePtr)
3687 {
3688 lookup_send(signal, requestPtr, treeNodePtr);
3689 }
3690
3691 void
lookup_send(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)3692 Dbspj::lookup_send(Signal* signal,
3693 Ptr<Request> requestPtr,
3694 Ptr<TreeNode> treeNodePtr)
3695 {
3696 jam();
3697 if (!ERROR_INSERTED(17521)) // Avoid emulated rnd errors
3698 {
3699 // ::checkTableError() should be handled before we reach this far
3700 ndbassert(checkTableError(treeNodePtr) == 0);
3701 }
3702
3703 Uint32 cnt = 2;
3704 if (treeNodePtr.p->isLeaf())
3705 {
3706 jam();
3707 if (requestPtr.p->isLookup())
3708 {
3709 jam();
3710 cnt = 0;
3711 }
3712 else
3713 {
3714 jam();
3715 cnt = 1;
3716 }
3717 }
3718
3719 LqhKeyReq* req = reinterpret_cast<LqhKeyReq*>(signal->getDataPtrSend());
3720
3721 memcpy(req, treeNodePtr.p->m_lookup_data.m_lqhKeyReq,
3722 sizeof(treeNodePtr.p->m_lookup_data.m_lqhKeyReq));
3723 req->variableData[2] = treeNodePtr.p->m_send.m_correlation;
3724 req->variableData[3] = requestPtr.p->m_rootResultData;
3725
3726 if (!(requestPtr.p->isLookup() && treeNodePtr.p->isLeaf()))
3727 {
3728 // Non-LEAF want reply to SPJ instead of ApiClient.
3729 LqhKeyReq::setNormalProtocolFlag(req->requestInfo, 1);
3730 req->variableData[0] = reference();
3731 req->variableData[1] = treeNodePtr.i;
3732 }
3733 else
3734 {
3735 jam();
3736 /**
3737 * Fake that TC sent this request,
3738 * so that it can route a maybe TCKEYREF
3739 */
3740 req->tcBlockref = requestPtr.p->m_senderRef;
3741 }
3742
3743 SectionHandle handle(this);
3744
3745 Uint32 ref = treeNodePtr.p->m_send.m_ref;
3746 Uint32 keyInfoPtrI = treeNodePtr.p->m_send.m_keyInfoPtrI;
3747 Uint32 attrInfoPtrI = treeNodePtr.p->m_send.m_attrInfoPtrI;
3748
3749 Uint32 err = 0;
3750
3751 do
3752 {
3753 if (treeNodePtr.p->m_bits & TreeNode::T_ONE_SHOT)
3754 {
3755 jam();
3756 /**
3757 * Pass sections to send
3758 */
3759 treeNodePtr.p->m_send.m_attrInfoPtrI = RNIL;
3760 treeNodePtr.p->m_send.m_keyInfoPtrI = RNIL;
3761 }
3762 else
3763 {
3764 if ((treeNodePtr.p->m_bits & TreeNode::T_KEYINFO_CONSTRUCTED) == 0)
3765 {
3766 jam();
3767 Uint32 tmp = RNIL;
3768 if (!dupSection(tmp, keyInfoPtrI))
3769 {
3770 jam();
3771 ndbassert(tmp == RNIL); // Guard for memleak
3772 err = DbspjErr::OutOfSectionMemory;
3773 break;
3774 }
3775
3776 keyInfoPtrI = tmp;
3777 }
3778 else
3779 {
3780 jam();
3781 treeNodePtr.p->m_send.m_keyInfoPtrI = RNIL;
3782 }
3783
3784 if ((treeNodePtr.p->m_bits & TreeNode::T_ATTRINFO_CONSTRUCTED) == 0)
3785 {
3786 jam();
3787 Uint32 tmp = RNIL;
3788
3789 /**
3790 * Test execution terminated due to 'OutOfSectionMemory' which
3791 * may happen for different treeNodes in the request:
3792 * - 17070: Fail on any lookup_send()
3793 * - 17071: Fail on lookup_send() if 'isLeaf'
3794 * - 17072: Fail on lookup_send() if treeNode not root
3795 */
3796
3797 if (ERROR_INSERTED(17070) ||
3798 (ERROR_INSERTED(17071) && treeNodePtr.p->isLeaf()) ||
3799 (ERROR_INSERTED(17072) && treeNodePtr.p->m_parentPtrI != RNIL))
3800 {
3801 jam();
3802 CLEAR_ERROR_INSERT_VALUE;
3803 ndbout_c("Injecting OutOfSectionMemory error at line %d file %s",
3804 __LINE__, __FILE__);
3805 releaseSection(keyInfoPtrI);
3806 err = DbspjErr::OutOfSectionMemory;
3807 break;
3808 }
3809
3810 if (!dupSection(tmp, attrInfoPtrI))
3811 {
3812 jam();
3813 ndbassert(tmp == RNIL); // Guard for memleak
3814 releaseSection(keyInfoPtrI);
3815 err = DbspjErr::OutOfSectionMemory;
3816 break;
3817 }
3818
3819 attrInfoPtrI = tmp;
3820 }
3821 else
3822 {
3823 jam();
3824 treeNodePtr.p->m_send.m_attrInfoPtrI = RNIL;
3825 }
3826 }
3827
3828 getSection(handle.m_ptr[0], keyInfoPtrI);
3829 getSection(handle.m_ptr[1], attrInfoPtrI);
3830 handle.m_cnt = 2;
3831
3832 /**
3833 * Inject error to test LQHKEYREF handling:
3834 * Tampering with tableSchemaVersion such that LQH will
3835 * return LQHKEYREF('1227: Invalid schema version')
3836 * May happen for different treeNodes in the request:
3837 * - 17030: Fail on any lookup_send()
3838 * - 17031: Fail on lookup_send() if 'isLeaf'
3839 * - 17032: Fail on lookup_send() if treeNode not root
3840 */
3841 if (ERROR_INSERTED(17030) ||
3842 (ERROR_INSERTED(17031) && treeNodePtr.p->isLeaf()) ||
3843 (ERROR_INSERTED(17032) && treeNodePtr.p->m_parentPtrI != RNIL))
3844 {
3845 jam();
3846 CLEAR_ERROR_INSERT_VALUE;
3847 req->tableSchemaVersion += (1 << 16); // Provoke 'Invalid schema version'
3848 }
3849
3850 #if defined DEBUG_LQHKEYREQ
3851 ndbout_c("LQHKEYREQ to %x", ref);
3852 printLQHKEYREQ(stdout, signal->getDataPtrSend(),
3853 NDB_ARRAY_SIZE(treeNodePtr.p->m_lookup_data.m_lqhKeyReq),
3854 DBLQH);
3855 printf("KEYINFO: ");
3856 print(handle.m_ptr[0], stdout);
3857 printf("ATTRINFO: ");
3858 print(handle.m_ptr[1], stdout);
3859 #endif
3860
3861 Uint32 Tnode = refToNode(ref);
3862 if (Tnode == getOwnNodeId())
3863 {
3864 c_Counters.incr_counter(CI_LOCAL_READS_SENT, 1);
3865 }
3866 else
3867 {
3868 c_Counters.incr_counter(CI_REMOTE_READS_SENT, 1);
3869 }
3870
3871 /**
3872 * Test execution terminated due to 'NodeFailure' which
3873 * may happen for different treeNodes in the request:
3874 * - 17020: Fail on any lookup_send()
3875 * - 17021: Fail on lookup_send() if 'isLeaf'
3876 * - 17022: Fail on lookup_send() if treeNode not root
3877 */
3878 if (ERROR_INSERTED(17020) ||
3879 (ERROR_INSERTED(17021) && treeNodePtr.p->isLeaf()) ||
3880 (ERROR_INSERTED(17022) && treeNodePtr.p->m_parentPtrI != RNIL))
3881 {
3882 jam();
3883 CLEAR_ERROR_INSERT_VALUE;
3884 releaseSections(handle);
3885 err = DbspjErr::NodeFailure;
3886 break;
3887 }
3888 // Test for online downgrade.
3889 if (unlikely(!ndb_join_pushdown(getNodeInfo(Tnode).m_version)))
3890 {
3891 jam();
3892 releaseSections(handle);
3893 err = 4003; // Function not implemented.
3894 break;
3895 }
3896
3897 if (unlikely(!c_alive_nodes.get(Tnode)))
3898 {
3899 jam();
3900 releaseSections(handle);
3901 err = DbspjErr::NodeFailure;
3902 break;
3903 }
3904 else if (! (treeNodePtr.p->isLeaf() && requestPtr.p->isLookup()))
3905 {
3906 jam();
3907 ndbassert(Tnode < NDB_ARRAY_SIZE(requestPtr.p->m_lookup_node_data));
3908 requestPtr.p->m_completed_nodes.clear(treeNodePtr.p->m_node_no);
3909 requestPtr.p->m_outstanding += cnt;
3910 requestPtr.p->m_lookup_node_data[Tnode] += cnt;
3911 // number wrapped
3912 ndbrequire(! (requestPtr.p->m_lookup_node_data[Tnode] == 0));
3913 }
3914
3915 sendSignal(ref, GSN_LQHKEYREQ, signal,
3916 NDB_ARRAY_SIZE(treeNodePtr.p->m_lookup_data.m_lqhKeyReq),
3917 JBB, &handle);
3918
3919 treeNodePtr.p->m_lookup_data.m_outstanding += cnt;
3920 if (requestPtr.p->isLookup() && treeNodePtr.p->isLeaf())
3921 {
3922 jam();
3923 /**
3924 * Send TCKEYCONF with DirtyReadBit + Tnode,
3925 * so that API can discover if Tnode died while waiting for result
3926 */
3927 lookup_sendLeafCONF(signal, requestPtr, treeNodePtr, Tnode);
3928 }
3929 return;
3930 }
3931 while (0);
3932
3933 ndbrequire(err);
3934 jam();
3935 abort(signal, requestPtr, err);
3936 } //Dbspj::lookup_send
3937
3938 void
lookup_execTRANSID_AI(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,const RowPtr & rowRef)3939 Dbspj::lookup_execTRANSID_AI(Signal* signal,
3940 Ptr<Request> requestPtr,
3941 Ptr<TreeNode> treeNodePtr,
3942 const RowPtr & rowRef)
3943 {
3944 jam();
3945
3946 Uint32 Tnode = refToNode(signal->getSendersBlockRef());
3947 ndbrequire(!(requestPtr.p->isLookup() && treeNodePtr.p->isLeaf()));
3948
3949 common_execTRANSID_AI(signal, requestPtr, treeNodePtr, rowRef);
3950
3951 ndbassert(requestPtr.p->m_lookup_node_data[Tnode] >= 1);
3952 requestPtr.p->m_lookup_node_data[Tnode] -= 1;
3953
3954 treeNodePtr.p->m_lookup_data.m_outstanding--;
3955
3956 if (treeNodePtr.p->m_lookup_data.m_outstanding == 0
3957 && treeNodePtr.p->m_deferred.isEmpty())
3958 {
3959 jam();
3960 // We have received all rows for this treeNode in this batch.
3961 handleTreeNodeComplete(signal, requestPtr, treeNodePtr);
3962 }
3963
3964 checkBatchComplete(signal, requestPtr, 1);
3965 }
3966
3967 void
lookup_execLQHKEYREF(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)3968 Dbspj::lookup_execLQHKEYREF(Signal* signal,
3969 Ptr<Request> requestPtr,
3970 Ptr<TreeNode> treeNodePtr)
3971 {
3972 const LqhKeyRef * rep = (LqhKeyRef*)signal->getDataPtr();
3973 Uint32 errCode = rep->errorCode;
3974 Uint32 Tnode = refToNode(signal->getSendersBlockRef());
3975
3976 c_Counters.incr_counter(CI_READS_NOT_FOUND, 1);
3977
3978 DEBUG("lookup_execLQHKEYREF, errorCode:" << errCode);
3979
3980 /**
3981 * If Request is still actively running: API need to
3982 * be informed about error.
3983 * Error code may either indicate a 'hard error' which should
3984 * terminate the query execution, or a 'soft error' which
3985 * should be signaled NDBAPI, and execution continued.
3986 */
3987 if (likely((requestPtr.p->m_state & Request::RS_ABORTING) == 0))
3988 {
3989 switch(errCode){
3990 case 626: // 'Soft error' : Row not found
3991 case 899: // 'Soft error' : Interpreter_exit_nok
3992
3993 jam();
3994 /**
3995 * Only Lookup-request need to send TCKEYREF...
3996 */
3997 if (requestPtr.p->isLookup())
3998 {
3999 jam();
4000 lookup_stop_branch(signal, requestPtr, treeNodePtr, errCode);
4001 }
4002 break;
4003
4004 default: // 'Hard error' : abort query
4005 jam();
4006 abort(signal, requestPtr, errCode);
4007 }
4008 }
4009
4010 Uint32 cnt = (treeNodePtr.p->isLeaf()) ? 1 : 2;
4011 ndbassert(requestPtr.p->m_lookup_node_data[Tnode] >= cnt);
4012 requestPtr.p->m_lookup_node_data[Tnode] -= cnt;
4013
4014 treeNodePtr.p->m_lookup_data.m_outstanding -= cnt;
4015
4016 /**
4017 * Another TreeNode awaited for completion of this request
4018 * before it could resume its operation.
4019 */
4020 if (treeNodePtr.p->m_resumeEvents & TreeNode::TN_RESUME_REF)
4021 {
4022 jam();
4023 ndbassert(treeNodePtr.p->m_resumePtrI != RNIL);
4024 Ptr<TreeNode> resumeTreeNodePtr;
4025 m_treenode_pool.getPtr(resumeTreeNodePtr, treeNodePtr.p->m_resumePtrI);
4026 lookup_resume(signal, requestPtr, resumeTreeNodePtr);
4027 }
4028
4029 if (treeNodePtr.p->m_lookup_data.m_outstanding == 0
4030 && treeNodePtr.p->m_deferred.isEmpty())
4031 {
4032 jam();
4033 // We have received all rows for this treeNode in this batch.
4034 handleTreeNodeComplete(signal, requestPtr, treeNodePtr);
4035 }
4036
4037 checkBatchComplete(signal, requestPtr, cnt);
4038 }
4039
4040 /**
4041 * lookup_stop_branch() will send required signals to the API
4042 * to inform that the query branch starting with 'treeNodePtr'
4043 * will not be executed due to 'errCode'.
4044 *
4045 * NOTE: 'errCode'is expected to be a 'soft error', like
4046 * 'row not found', and is *not* intended to abort
4047 * entire query.
4048 */
4049 void
lookup_stop_branch(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,Uint32 errCode)4050 Dbspj::lookup_stop_branch(Signal* signal,
4051 Ptr<Request> requestPtr,
4052 Ptr<TreeNode> treeNodePtr,
4053 Uint32 errCode)
4054 {
4055 ndbassert(requestPtr.p->isLookup());
4056 DEBUG("::lookup_stop_branch"
4057 << ", node: " << treeNodePtr.p->m_node_no
4058 );
4059
4060 /**
4061 * If this is a "leaf" node, either on its own, or
4062 * indirectly through an unique index lookup:
4063 * Ordinary operation would have emited extra TCKEYCONF
4064 * required for nodefail handling.
4065 * (In case of nodefails during final leaf REQs).
4066 * As API cant, or at least does not try to, tell whether
4067 * leaf operation is REFed by SPJ or LQH, we still have to
4068 * send this extra CONF as required by protocoll.
4069 */
4070 if (treeNodePtr.p->isLeaf())
4071 {
4072 jam();
4073 DEBUG(" Leaf-lookup: sending extra 'CONF' for nodefail handling");
4074 lookup_sendLeafCONF(signal, requestPtr, treeNodePtr, getOwnNodeId());
4075 }
4076
4077 else if (treeNodePtr.p->m_bits & TreeNode::T_UNIQUE_INDEX_LOOKUP)
4078 {
4079 /**
4080 * UNIQUE_INDEX lookups are represented with an additional
4081 * child which does the lookup from UQ-index into the table
4082 * itself. Has to check this child for being 'leaf'.
4083 */
4084 LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
4085 Local_dependency_map list(pool, treeNodePtr.p->m_dependent_nodes);
4086 Dependency_map::ConstDataBufferIterator it;
4087 ndbrequire(list.first(it));
4088 ndbrequire(list.getSize() == 1); // should only be 1 child
4089 Ptr<TreeNode> childPtr;
4090 m_treenode_pool.getPtr(childPtr, * it.data);
4091 if (childPtr.p->m_bits & TreeNode::T_LEAF)
4092 {
4093 jam();
4094 DEBUG(" UNUQUE_INDEX-Leaf-lookup: sending extra 'CONF' "
4095 "for nodefail handling");
4096 lookup_sendLeafCONF(signal, requestPtr, childPtr, getOwnNodeId());
4097 }
4098 }
4099
4100 /**
4101 * Then produce the REF(errCode) which terminates this
4102 * tree branch.
4103 */
4104 Uint32 resultRef = treeNodePtr.p->m_lookup_data.m_api_resultRef;
4105 Uint32 resultData = treeNodePtr.p->m_lookup_data.m_api_resultData;
4106 TcKeyRef* ref = (TcKeyRef*)signal->getDataPtr();
4107 ref->connectPtr = resultData;
4108 ref->transId[0] = requestPtr.p->m_transId[0];
4109 ref->transId[1] = requestPtr.p->m_transId[1];
4110 ref->errorCode = errCode;
4111 ref->errorData = 0;
4112
4113 DEBUG(" send TCKEYREF");
4114 sendTCKEYREF(signal, resultRef, requestPtr.p->m_senderRef);
4115 }
4116
4117 /**
4118 * Lookup leafs in lookup requests will not receive CONF/REF
4119 * back to SPJ when LQH request has completed. Instead we
4120 * will cleanup() the request when the last leafnode KEYREQ
4121 * has been sent. If any of the REQuested datanodes fails
4122 * after this, SPJ will not detect this and be able to
4123 * send appropriate signals to the API to awake it from the
4124 * 'wait' state.
4125 * To get around this, we instead send an extra CONF
4126 * to the API which inform is about which 'node' it should
4127 * expect a result from. API can then discover if this
4128 * 'node' died while waiting for results.
4129 */
4130 void
lookup_sendLeafCONF(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,Uint32 node)4131 Dbspj::lookup_sendLeafCONF(Signal* signal,
4132 Ptr<Request> requestPtr,
4133 Ptr<TreeNode> treeNodePtr,
4134 Uint32 node)
4135 {
4136 ndbassert(treeNodePtr.p->isLeaf());
4137
4138 const Uint32 resultRef = treeNodePtr.p->m_lookup_data.m_api_resultRef;
4139 const Uint32 resultData = treeNodePtr.p->m_lookup_data.m_api_resultData;
4140 TcKeyConf* const conf = (TcKeyConf*)signal->getDataPtr();
4141 conf->apiConnectPtr = RNIL;
4142 conf->confInfo = 0;
4143 conf->gci_hi = 0;
4144 TcKeyConf::setNoOfOperations(conf->confInfo, 1);
4145 conf->transId1 = requestPtr.p->m_transId[0];
4146 conf->transId2 = requestPtr.p->m_transId[1];
4147 conf->operations[0].apiOperationPtr = resultData;
4148 conf->operations[0].attrInfoLen =
4149 TcKeyConf::DirtyReadBit | node;
4150 const Uint32 sigLen = TcKeyConf::StaticLength + TcKeyConf::OperationLength;
4151 sendTCKEYCONF(signal, sigLen, resultRef, requestPtr.p->m_senderRef);
4152 }
4153
4154
4155 void
lookup_execLQHKEYCONF(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)4156 Dbspj::lookup_execLQHKEYCONF(Signal* signal,
4157 Ptr<Request> requestPtr,
4158 Ptr<TreeNode> treeNodePtr)
4159 {
4160 ndbrequire(!(requestPtr.p->isLookup() && treeNodePtr.p->isLeaf()));
4161
4162 Uint32 Tnode = refToNode(signal->getSendersBlockRef());
4163
4164 if (treeNodePtr.p->m_bits & TreeNode::T_USER_PROJECTION)
4165 {
4166 jam();
4167 requestPtr.p->m_rows++;
4168 }
4169
4170 ndbassert(requestPtr.p->m_lookup_node_data[Tnode] >= 1);
4171 requestPtr.p->m_lookup_node_data[Tnode] -= 1;
4172
4173 treeNodePtr.p->m_lookup_data.m_outstanding--;
4174
4175 /**
4176 * Another TreeNode awaited for completion of this request
4177 * before it could resume its operation.
4178 */
4179 if (treeNodePtr.p->m_resumeEvents & TreeNode::TN_RESUME_CONF)
4180 {
4181 jam();
4182 ndbassert(treeNodePtr.p->m_resumePtrI != RNIL);
4183 Ptr<TreeNode> resumeTreeNodePtr;
4184 m_treenode_pool.getPtr(resumeTreeNodePtr, treeNodePtr.p->m_resumePtrI);
4185 lookup_resume(signal, requestPtr, resumeTreeNodePtr);
4186 }
4187
4188 if (treeNodePtr.p->m_lookup_data.m_outstanding == 0
4189 && treeNodePtr.p->m_deferred.isEmpty())
4190 {
4191 jam();
4192 // We have received all rows for this treeNode in this batch.
4193 handleTreeNodeComplete(signal, requestPtr, treeNodePtr);
4194 }
4195
4196 checkBatchComplete(signal, requestPtr, 1);
4197 }
4198
4199 void
lookup_parent_row(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,const RowPtr & rowRef)4200 Dbspj::lookup_parent_row(Signal* signal,
4201 Ptr<Request> requestPtr,
4202 Ptr<TreeNode> treeNodePtr,
4203 const RowPtr & rowRef)
4204 {
4205 jam();
4206
4207 DEBUG("::lookup_parent_row"
4208 << ", node: " << treeNodePtr.p->m_node_no);
4209
4210 ndbassert((treeNodePtr.p->m_bits & TreeNode::T_EXEC_SEQUENTIAL) == 0);
4211 lookup_row(signal, requestPtr, treeNodePtr, rowRef);
4212 } // Dbspj::lookup_parent_row()
4213
4214 /**
4215 * lookup_resume() is a delayed lookup_parent_row.
4216 * It will locate the next parent row now allowed to execute,
4217 * and create a child lookup request for that row.
4218 */
4219 void
lookup_resume(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)4220 Dbspj::lookup_resume(Signal* signal,
4221 Ptr<Request> requestPtr,
4222 Ptr<TreeNode> treeNodePtr)
4223 {
4224 jam();
4225 DEBUG("::lookup_resume"
4226 << ", node: " << treeNodePtr.p->m_node_no
4227 );
4228
4229 ndbassert(treeNodePtr.p->m_bits & TreeNode::T_EXEC_SEQUENTIAL);
4230 ndbassert(treeNodePtr.p->m_parentPtrI != RNIL);
4231 ndbassert(!treeNodePtr.p->m_deferred.isEmpty());
4232 ndbassert(!requestPtr.p->m_completed_nodes.get(treeNodePtr.p->m_node_no));
4233
4234 if (unlikely(requestPtr.p->m_state & Request::RS_ABORTING))
4235 {
4236 jam();
4237 return;
4238 }
4239
4240 Uint32 corrVal;
4241 {
4242 LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
4243 Local_pattern_store correlations(pool, treeNodePtr.p->m_deferred.m_correlations);
4244
4245 Local_pattern_store::DataBufferIterator it;
4246 const bool valid = correlations.position(it, (Uint32)(treeNodePtr.p->m_deferred.m_pos++));
4247 (void)valid; ndbassert(valid);
4248 corrVal = *it.data;
4249 }
4250
4251 Ptr<TreeNode> parentPtr;
4252 m_treenode_pool.getPtr(parentPtr, treeNodePtr.p->m_parentPtrI);
4253
4254 // Set up RowPtr & RowRef for this parent row
4255 RowPtr row;
4256 row.m_src_node_ptrI = parentPtr.i;
4257 row.m_src_correlation = corrVal;
4258
4259 ndbassert(parentPtr.p->m_rows.m_type == RowCollection::COLLECTION_MAP);
4260 RowRef ref;
4261 parentPtr.p->m_rows.m_map.copyto(ref);
4262 const Uint32* const mapptr = get_row_ptr(ref);
4263
4264 // Relocate parent row from correlation value.
4265 const Uint32 rowId = (corrVal & 0xFFFF);
4266 parentPtr.p->m_rows.m_map.load(mapptr, rowId, ref);
4267
4268 const Uint32* const rowptr = get_row_ptr(ref);
4269 setupRowPtr(parentPtr.p->m_rows, row, ref, rowptr);
4270
4271 lookup_row(signal, requestPtr, treeNodePtr, row);
4272 } // Dbspj::lookup_resume()
4273
4274 void
lookup_row(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,const RowPtr & rowRef)4275 Dbspj::lookup_row(Signal* signal,
4276 Ptr<Request> requestPtr,
4277 Ptr<TreeNode> treeNodePtr,
4278 const RowPtr & rowRef)
4279 {
4280 jam();
4281
4282 /**
4283 * Here we need to...
4284 * 1) construct a key
4285 * 2) compute hash (normally TC)
4286 * 3) get node for row (normally TC)
4287 */
4288 Uint32 err = 0;
4289 const Uint32 tableId = treeNodePtr.p->m_tableOrIndexId;
4290 const Uint32 corrVal = rowRef.m_src_correlation;
4291
4292 DEBUG("::lookup_row"
4293 << ", node: " << treeNodePtr.p->m_node_no);
4294
4295 do
4296 {
4297 err = checkTableError(treeNodePtr);
4298 if (unlikely(err != 0))
4299 {
4300 jam();
4301 break;
4302 }
4303
4304 /**
4305 * Test execution terminated due to 'OutOfQueryMemory' which
4306 * may happen multiple places below:
4307 * - 17040: Fail on any lookup_parent_row()
4308 * - 17041: Fail on lookup_parent_row() if 'isLeaf'
4309 * - 17042: Fail on lookup_parent_row() if treeNode not root
4310 */
4311 if (ERROR_INSERTED(17040) ||
4312 (ERROR_INSERTED(17041) && treeNodePtr.p->isLeaf()) ||
4313 (ERROR_INSERTED(17042) && treeNodePtr.p->m_parentPtrI != RNIL))
4314 {
4315 jam();
4316 CLEAR_ERROR_INSERT_VALUE;
4317 err = DbspjErr::OutOfQueryMemory;
4318 break;
4319 }
4320
4321 Uint32 ptrI = RNIL;
4322 if (treeNodePtr.p->m_bits & TreeNode::T_KEYINFO_CONSTRUCTED)
4323 {
4324 jam();
4325 DEBUG("parent_row w/ T_KEYINFO_CONSTRUCTED");
4326 /**
4327 * Get key-pattern
4328 */
4329 LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
4330 Local_pattern_store pattern(pool, treeNodePtr.p->m_keyPattern);
4331
4332 bool keyIsNull;
4333 err = expand(ptrI, pattern, rowRef, keyIsNull);
4334 if (unlikely(err != 0))
4335 {
4336 jam();
4337 releaseSection(ptrI);
4338 break;
4339 }
4340
4341 if (keyIsNull)
4342 {
4343 /**
4344 * When the key contains NULL values, an EQ-match is impossible!
4345 * Entire lookup request can therefore be eliminate as it is known
4346 * to be REFused with errorCode = 626 (Row not found).
4347 *
4348 * Scan requests can simply ignore these child LQHKEYREQs
4349 * as REFs are not needed, either by the API protocoll,
4350 * or in order to handle TN_RESUME_REF.
4351 *
4352 * Lookup requests has to send the same KEYREFs as would have
4353 * been produced by LQH.
4354 */
4355 jam();
4356 DEBUG("Key contain NULL values: Ignore impossible KEYREQ");
4357 releaseSection(ptrI);
4358 ptrI = RNIL;
4359
4360 /* Send KEYREF(errCode=626) as required by lookup request protocol */
4361 if (requestPtr.p->isLookup())
4362 {
4363 jam();
4364 lookup_stop_branch(signal, requestPtr, treeNodePtr, 626);
4365 }
4366
4367 /**
4368 * Another TreeNode awaited completion of this treeNode
4369 * or sub-branch before it could resume its operation.
4370 */
4371 if ((treeNodePtr.p->m_resumeEvents & TreeNode::TN_RESUME_REF))
4372 {
4373 jam();
4374 DEBUG("handling TN_RESUME_REF");
4375 ndbassert(treeNodePtr.p->m_resumePtrI != RNIL);
4376 Ptr<TreeNode> resumeTreeNodePtr;
4377 m_treenode_pool.getPtr(resumeTreeNodePtr, treeNodePtr.p->m_resumePtrI);
4378 lookup_resume(signal, requestPtr, resumeTreeNodePtr);
4379 }
4380
4381 return; // Bailout, KEYREQ would have returned KEYREF(626) anyway
4382 } // keyIsNull
4383
4384 ndbassert(ptrI != RNIL);
4385 treeNodePtr.p->m_send.m_keyInfoPtrI = ptrI;
4386 } //T_KEYINFO_CONSTRUCTED
4387
4388 BuildKeyReq tmp;
4389 err = computeHash(signal, tmp, tableId, treeNodePtr.p->m_send.m_keyInfoPtrI);
4390 if (unlikely(err != 0))
4391 break;
4392
4393 err = getNodes(signal, tmp, tableId);
4394 if (unlikely(err != 0))
4395 break;
4396
4397 Uint32 attrInfoPtrI = treeNodePtr.p->m_send.m_attrInfoPtrI;
4398 if (treeNodePtr.p->m_bits & TreeNode::T_ATTRINFO_CONSTRUCTED)
4399 {
4400 jam();
4401 Uint32 tmp = RNIL;
4402
4403 /**
4404 * Test execution terminated due to 'OutOfSectionMemory' which
4405 * may happen for different treeNodes in the request:
4406 * - 17080: Fail on lookup_parent_row
4407 * - 17081: Fail on lookup_parent_row: if 'isLeaf'
4408 * - 17082: Fail on lookup_parent_row: if treeNode not root
4409 */
4410
4411 if (ERROR_INSERTED(17080) ||
4412 (ERROR_INSERTED(17081) && treeNodePtr.p->isLeaf()) ||
4413 (ERROR_INSERTED(17082) && treeNodePtr.p->m_parentPtrI != RNIL))
4414 {
4415 jam();
4416 CLEAR_ERROR_INSERT_VALUE;
4417 ndbout_c("Injecting OutOfSectionMemory error at line %d file %s",
4418 __LINE__, __FILE__);
4419 err = DbspjErr::OutOfSectionMemory;
4420 break;
4421 }
4422
4423 if (!dupSection(tmp, attrInfoPtrI))
4424 {
4425 jam();
4426 ndbassert(tmp == RNIL); // Guard for memleak
4427 err = DbspjErr::OutOfSectionMemory;
4428 break;
4429 }
4430
4431 Uint32 org_size;
4432 {
4433 SegmentedSectionPtr ptr;
4434 getSection(ptr, tmp);
4435 org_size = ptr.sz;
4436 }
4437
4438 bool hasNull;
4439 LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
4440 Local_pattern_store pattern(pool, treeNodePtr.p->m_attrParamPattern);
4441 err = expand(tmp, pattern, rowRef, hasNull);
4442 if (unlikely(err != 0))
4443 {
4444 jam();
4445 releaseSection(tmp);
4446 break;
4447 }
4448 // ndbrequire(!hasNull);
4449
4450 /**
4451 * Update size of subsrouting section, which contains arguments
4452 */
4453 SegmentedSectionPtr ptr;
4454 getSection(ptr, tmp);
4455 Uint32 new_size = ptr.sz;
4456 Uint32 * sectionptrs = ptr.p->theData;
4457 sectionptrs[4] = new_size - org_size;
4458
4459 treeNodePtr.p->m_send.m_attrInfoPtrI = tmp;
4460 }
4461
4462 /**
4463 * Now send...
4464 */
4465
4466 /**
4467 * TODO merge better with lookup_start (refactor)
4468 */
4469 {
4470 /* We set the upper half word of m_correlation to the tuple ID
4471 * of the parent, such that the API can match this tuple with its
4472 * parent.
4473 * Then we re-use the tuple ID of the parent as the
4474 * tuple ID for this tuple also. Since the tuple ID
4475 * is unique within this batch and SPJ block for the parent operation,
4476 * it must also be unique for this operation.
4477 * This ensures that lookup operations with no user projection will
4478 * work, since such operations will have the same tuple ID as their
4479 * parents. The API will then be able to match a tuple with its
4480 * grandparent, even if it gets no tuple for the parent operation.*/
4481 treeNodePtr.p->m_send.m_correlation =
4482 (corrVal << 16) + (corrVal & 0xffff);
4483
4484 treeNodePtr.p->m_send.m_ref = tmp.receiverRef;
4485 LqhKeyReq * dst = (LqhKeyReq*)treeNodePtr.p->m_lookup_data.m_lqhKeyReq;
4486 dst->hashValue = tmp.hashInfo[0];
4487 dst->fragmentData = tmp.fragId;
4488 Uint32 attrLen = 0;
4489 LqhKeyReq::setDistributionKey(attrLen, tmp.fragDistKey);
4490 dst->attrLen = attrLen;
4491 lookup_send(signal, requestPtr, treeNodePtr);
4492
4493 if (treeNodePtr.p->m_bits & TreeNode::T_ATTRINFO_CONSTRUCTED)
4494 {
4495 jam();
4496 // restore
4497 treeNodePtr.p->m_send.m_attrInfoPtrI = attrInfoPtrI;
4498 }
4499 }
4500 return;
4501 } while (0);
4502
4503 // If we fail it will always be a 'hard error' -> abort
4504 ndbrequire(err);
4505 jam();
4506 abort(signal, requestPtr, err);
4507 }
4508
4509 void
lookup_abort(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)4510 Dbspj::lookup_abort(Signal* signal,
4511 Ptr<Request> requestPtr,
4512 Ptr<TreeNode> treeNodePtr)
4513 {
4514 jam();
4515 }
4516
4517 Uint32
lookup_execNODE_FAILREP(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,NdbNodeBitmask mask)4518 Dbspj::lookup_execNODE_FAILREP(Signal* signal,
4519 Ptr<Request> requestPtr,
4520 Ptr<TreeNode> treeNodePtr,
4521 NdbNodeBitmask mask)
4522 {
4523 jam();
4524 Uint32 node = 0;
4525 Uint32 sum = 0;
4526 while (requestPtr.p->m_outstanding &&
4527 ((node = mask.find(node + 1)) != NdbNodeBitmask::NotFound))
4528 {
4529 Uint32 cnt = requestPtr.p->m_lookup_node_data[node];
4530 sum += cnt;
4531 requestPtr.p->m_lookup_node_data[node] = 0;
4532 }
4533
4534 if (sum)
4535 {
4536 jam();
4537 ndbrequire(requestPtr.p->m_outstanding >= sum);
4538 requestPtr.p->m_outstanding -= sum;
4539 }
4540
4541 return sum;
4542 }
4543
4544 void
lookup_cleanup(Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)4545 Dbspj::lookup_cleanup(Ptr<Request> requestPtr,
4546 Ptr<TreeNode> treeNodePtr)
4547 {
4548 cleanup_common(requestPtr, treeNodePtr);
4549 }
4550
4551
4552 Uint32
handle_special_hash(Uint32 tableId,Uint32 dstHash[4],const Uint64 * src,Uint32 srcLen,const KeyDescriptor * desc)4553 Dbspj::handle_special_hash(Uint32 tableId, Uint32 dstHash[4],
4554 const Uint64* src,
4555 Uint32 srcLen, // Len in #32bit words
4556 const KeyDescriptor* desc)
4557 {
4558 const Uint32 MAX_KEY_SIZE_IN_LONG_WORDS=
4559 (MAX_KEY_SIZE_IN_WORDS + 1) / 2;
4560 Uint64 alignedWorkspace[MAX_KEY_SIZE_IN_LONG_WORDS * MAX_XFRM_MULTIPLY];
4561 const bool hasVarKeys = desc->noOfVarKeys > 0;
4562 const bool hasCharAttr = desc->hasCharAttr;
4563 const bool compute_distkey = desc->noOfDistrKeys > 0;
4564
4565 const Uint64 *hashInput = 0;
4566 Uint32 inputLen = 0;
4567 Uint32 keyPartLen[MAX_ATTRIBUTES_IN_INDEX];
4568 Uint32 * keyPartLenPtr;
4569
4570 /* Normalise KeyInfo into workspace if necessary */
4571 if (hasCharAttr || (compute_distkey && hasVarKeys))
4572 {
4573 hashInput = alignedWorkspace;
4574 keyPartLenPtr = keyPartLen;
4575 inputLen = xfrm_key(tableId,
4576 (Uint32*)src,
4577 (Uint32*)alignedWorkspace,
4578 sizeof(alignedWorkspace) >> 2,
4579 keyPartLenPtr);
4580 if (unlikely(inputLen == 0))
4581 {
4582 return 290; // 'Corrupt key in TC, unable to xfrm'
4583 }
4584 }
4585 else
4586 {
4587 /* Keyinfo already suitable for hash */
4588 hashInput = src;
4589 inputLen = srcLen;
4590 keyPartLenPtr = 0;
4591 }
4592
4593 /* Calculate primary key hash */
4594 md5_hash(dstHash, hashInput, inputLen);
4595
4596 /* If the distribution key != primary key then we have to
4597 * form a distribution key from the primary key and calculate
4598 * a separate distribution hash based on this
4599 */
4600 if (compute_distkey)
4601 {
4602 jam();
4603
4604 Uint32 distrKeyHash[4];
4605 /* Reshuffle primary key columns to get just distribution key */
4606 Uint32 len = create_distr_key(tableId, (Uint32*)hashInput, (Uint32*)alignedWorkspace, keyPartLenPtr);
4607 /* Calculate distribution key hash */
4608 md5_hash(distrKeyHash, alignedWorkspace, len);
4609
4610 /* Just one word used for distribution */
4611 dstHash[1] = distrKeyHash[1];
4612 }
4613 return 0;
4614 }
4615
4616 Uint32
computeHash(Signal * signal,BuildKeyReq & dst,Uint32 tableId,Uint32 ptrI)4617 Dbspj::computeHash(Signal* signal,
4618 BuildKeyReq& dst, Uint32 tableId, Uint32 ptrI)
4619 {
4620 /**
4621 * Essentially the same code as in Dbtc::hash().
4622 * The code for user defined partitioning has been removed though.
4623 */
4624 SegmentedSectionPtr ptr;
4625 getSection(ptr, ptrI);
4626
4627 /* NOTE: md5_hash below require 64-bit alignment
4628 */
4629 const Uint32 MAX_KEY_SIZE_IN_LONG_WORDS=
4630 (MAX_KEY_SIZE_IN_WORDS + 1) / 2;
4631 Uint64 tmp64[MAX_KEY_SIZE_IN_LONG_WORDS];
4632 Uint32 *tmp32 = (Uint32*)tmp64;
4633 ndbassert(ptr.sz <= MAX_KEY_SIZE_IN_WORDS);
4634 copy(tmp32, ptr);
4635
4636 const KeyDescriptor* desc = g_key_descriptor_pool.getPtr(tableId);
4637 ndbrequire(desc != NULL);
4638
4639 bool need_special_hash = desc->hasCharAttr | (desc->noOfDistrKeys > 0);
4640 if (need_special_hash)
4641 {
4642 jam();
4643 return handle_special_hash(tableId, dst.hashInfo, tmp64, ptr.sz, desc);
4644 }
4645 else
4646 {
4647 jam();
4648 md5_hash(dst.hashInfo, tmp64, ptr.sz);
4649 return 0;
4650 }
4651 }
4652
4653 /**
4654 * This function differs from computeHash in that *ptrI*
4655 * only contains partition key (packed) and not full primary key
4656 */
4657 Uint32
computePartitionHash(Signal * signal,BuildKeyReq & dst,Uint32 tableId,Uint32 ptrI)4658 Dbspj::computePartitionHash(Signal* signal,
4659 BuildKeyReq& dst, Uint32 tableId, Uint32 ptrI)
4660 {
4661 SegmentedSectionPtr ptr;
4662 getSection(ptr, ptrI);
4663
4664 /* NOTE: md5_hash below require 64-bit alignment
4665 */
4666 const Uint32 MAX_KEY_SIZE_IN_LONG_WORDS=
4667 (MAX_KEY_SIZE_IN_WORDS + 1) / 2;
4668 Uint64 _space[MAX_KEY_SIZE_IN_LONG_WORDS];
4669 Uint64 *tmp64 = _space;
4670 Uint32 *tmp32 = (Uint32*)tmp64;
4671 Uint32 sz = ptr.sz;
4672 ndbassert(ptr.sz <= MAX_KEY_SIZE_IN_WORDS);
4673 copy(tmp32, ptr);
4674
4675 const KeyDescriptor* desc = g_key_descriptor_pool.getPtr(tableId);
4676 ndbrequire(desc != NULL);
4677
4678 bool need_xfrm = desc->hasCharAttr || desc->noOfVarKeys;
4679 if (need_xfrm)
4680 {
4681 jam();
4682 /**
4683 * xfrm distribution key
4684 */
4685 Uint32 srcPos = 0;
4686 Uint32 dstPos = 0;
4687 Uint32 * src = tmp32;
4688 Uint32 * dst = signal->theData+24;
4689 for (Uint32 i = 0; i < desc->noOfKeyAttr; i++)
4690 {
4691 const KeyDescriptor::KeyAttr& keyAttr = desc->keyAttr[i];
4692 if (AttributeDescriptor::getDKey(keyAttr.attributeDescriptor))
4693 {
4694 Uint32 attrLen =
4695 xfrm_attr(keyAttr.attributeDescriptor, keyAttr.charsetInfo,
4696 src, srcPos, dst, dstPos,
4697 NDB_ARRAY_SIZE(signal->theData) - 24);
4698 if (unlikely(attrLen == 0))
4699 {
4700 DEBUG_CRASH();
4701 return 290; // 'Corrupt key in TC, unable to xfrm'
4702 }
4703 }
4704 }
4705 tmp64 = (Uint64*)dst;
4706 sz = dstPos;
4707 }
4708
4709 md5_hash(dst.hashInfo, tmp64, sz);
4710 return 0;
4711 }
4712
4713 Uint32
getNodes(Signal * signal,BuildKeyReq & dst,Uint32 tableId)4714 Dbspj::getNodes(Signal* signal, BuildKeyReq& dst, Uint32 tableId)
4715 {
4716 DiGetNodesReq * req = (DiGetNodesReq *)&signal->theData[0];
4717 req->tableId = tableId;
4718 req->hashValue = dst.hashInfo[1];
4719 req->distr_key_indicator = 0; // userDefinedPartitioning not supported!
4720 req->jamBufferPtr = jamBuffer();
4721
4722 #if 1
4723 EXECUTE_DIRECT(DBDIH, GSN_DIGETNODESREQ, signal,
4724 DiGetNodesReq::SignalLength, 0);
4725 #else
4726 sendSignal(DBDIH_REF, GSN_DIGETNODESREQ, signal,
4727 DiGetNodesReq::SignalLength, JBB);
4728 jamEntry();
4729
4730 #endif
4731
4732 DiGetNodesConf * conf = (DiGetNodesConf *)&signal->theData[0];
4733 const Uint32 err = signal->theData[0] ? signal->theData[1] : 0;
4734 Uint32 Tdata2 = conf->reqinfo;
4735 Uint32 nodeId = conf->nodes[0];
4736 Uint32 instanceKey = (Tdata2 >> 24) & 127;
4737
4738 DEBUG("HASH to nodeId:" << nodeId << ", instanceKey:" << instanceKey);
4739
4740 jamEntry();
4741 if (unlikely(err != 0))
4742 {
4743 jam();
4744 goto error;
4745 }
4746 dst.fragId = conf->fragId;
4747 dst.fragDistKey = (Tdata2 >> 16) & 255;
4748 dst.receiverRef = numberToRef(DBLQH, instanceKey, nodeId);
4749
4750 return 0;
4751
4752 error:
4753 return err;
4754 }
4755
4756 /**
4757 * END - MODULE LOOKUP
4758 */
4759
4760 /**
4761 * MODULE SCAN FRAG
4762 *
4763 * NOTE: This may only be root node
4764 */
4765 const Dbspj::OpInfo
4766 Dbspj::g_ScanFragOpInfo =
4767 {
4768 &Dbspj::scanFrag_build,
4769 0, // prepare
4770 &Dbspj::scanFrag_start,
4771 &Dbspj::scanFrag_execTRANSID_AI,
4772 0, // execLQHKEYREF
4773 0, // execLQHKEYCONF
4774 &Dbspj::scanFrag_execSCAN_FRAGREF,
4775 &Dbspj::scanFrag_execSCAN_FRAGCONF,
4776 0, // parent row
4777 0, // parent batch complete
4778 0, // parent batch repeat
4779 0, // Dbspj::scanFrag_parent_batch_cleanup,
4780 &Dbspj::scanFrag_execSCAN_NEXTREQ,
4781 0, // Dbspj::scanFrag_complete
4782 &Dbspj::scanFrag_abort,
4783 0, // execNODE_FAILREP,
4784 &Dbspj::scanFrag_cleanup
4785 };
4786
4787 Uint32
scanFrag_build(Build_context & ctx,Ptr<Request> requestPtr,const QueryNode * qn,const QueryNodeParameters * qp)4788 Dbspj::scanFrag_build(Build_context& ctx,
4789 Ptr<Request> requestPtr,
4790 const QueryNode* qn,
4791 const QueryNodeParameters* qp)
4792 {
4793 Uint32 err = 0;
4794 Ptr<TreeNode> treeNodePtr;
4795 const QN_ScanFragNode * node = (const QN_ScanFragNode*)qn;
4796 const QN_ScanFragParameters * param = (const QN_ScanFragParameters*)qp;
4797
4798 do
4799 {
4800 err = DbspjErr::InvalidTreeNodeSpecification;
4801 DEBUG("scanFrag_build: len=" << node->len);
4802 if (unlikely(node->len < QN_ScanFragNode::NodeSize))
4803 {
4804 jam();
4805 break;
4806 }
4807
4808 err = DbspjErr::InvalidTreeParametersSpecification;
4809 DEBUG("param len: " << param->len);
4810 if (unlikely(param->len < QN_ScanFragParameters::NodeSize))
4811 {
4812 jam();
4813 break;
4814 }
4815
4816 err = createNode(ctx, requestPtr, treeNodePtr);
4817 if (unlikely(err != 0))
4818 {
4819 jam();
4820 break;
4821 }
4822
4823 treeNodePtr.p->m_info = &g_ScanFragOpInfo;
4824 treeNodePtr.p->m_tableOrIndexId = node->tableId;
4825 treeNodePtr.p->m_primaryTableId = node->tableId;
4826 treeNodePtr.p->m_schemaVersion = node->tableVersion;
4827 treeNodePtr.p->m_scanfrag_data.m_scanFragHandlePtrI = RNIL;
4828 Ptr<ScanFragHandle> scanFragHandlePtr;
4829 if (ERROR_INSERTED_CLEAR(17004))
4830 {
4831 jam();
4832 ndbout_c("Injecting OutOfQueryMemory error 17004 at line %d file %s",
4833 __LINE__, __FILE__);
4834 err = DbspjErr::OutOfQueryMemory;
4835 break;
4836 }
4837 if (unlikely(m_scanfraghandle_pool.seize(requestPtr.p->m_arena,
4838 scanFragHandlePtr) != true))
4839 {
4840 err = DbspjErr::OutOfQueryMemory;
4841 jam();
4842 break;
4843 }
4844
4845 scanFragHandlePtr.p->m_treeNodePtrI = treeNodePtr.i;
4846 scanFragHandlePtr.p->m_state = ScanFragHandle::SFH_NOT_STARTED;
4847 treeNodePtr.p->m_scanfrag_data.m_scanFragHandlePtrI = scanFragHandlePtr.i;
4848
4849 requestPtr.p->m_bits |= Request::RT_SCAN;
4850 treeNodePtr.p->m_bits |= TreeNode::T_ATTR_INTERPRETED;
4851 treeNodePtr.p->m_batch_size = ctx.m_batch_size_rows;
4852
4853 ScanFragReq*dst=(ScanFragReq*)treeNodePtr.p->m_scanfrag_data.m_scanFragReq;
4854 dst->senderData = scanFragHandlePtr.i;
4855 dst->resultRef = reference();
4856 dst->resultData = treeNodePtr.i;
4857 dst->savePointId = ctx.m_savepointId;
4858
4859 Uint32 transId1 = requestPtr.p->m_transId[0];
4860 Uint32 transId2 = requestPtr.p->m_transId[1];
4861 dst->transId1 = transId1;
4862 dst->transId2 = transId2;
4863
4864 Uint32 treeBits = node->requestInfo;
4865 Uint32 paramBits = param->requestInfo;
4866 //ndbout_c("Dbspj::scanFrag_build() treeBits=%.8x paramBits=%.8x",
4867 // treeBits, paramBits);
4868 Uint32 requestInfo = 0;
4869 ScanFragReq::setReadCommittedFlag(requestInfo, 1);
4870 ScanFragReq::setScanPrio(requestInfo, ctx.m_scanPrio);
4871 ScanFragReq::setCorrFactorFlag(requestInfo, 1);
4872 ScanFragReq::setNoDiskFlag(requestInfo,
4873 (treeBits & DABits::NI_LINKED_DISK) == 0 &&
4874 (paramBits & DABits::PI_DISK_ATTR) == 0);
4875 dst->requestInfo = requestInfo;
4876 dst->tableId = node->tableId;
4877 dst->schemaVersion = node->tableVersion;
4878
4879 ctx.m_resultData = param->resultData;
4880
4881 /**
4882 * Parse stuff common lookup/scan-frag
4883 */
4884 struct DABuffer nodeDA, paramDA;
4885 nodeDA.ptr = node->optional;
4886 nodeDA.end = nodeDA.ptr + (node->len - QN_ScanFragNode::NodeSize);
4887 paramDA.ptr = param->optional;
4888 paramDA.end = paramDA.ptr + (param->len - QN_ScanFragParameters::NodeSize);
4889 err = parseDA(ctx, requestPtr, treeNodePtr,
4890 nodeDA, treeBits, paramDA, paramBits);
4891 if (unlikely(err != 0))
4892 {
4893 jam();
4894 break;
4895 }
4896
4897 ctx.m_scan_cnt++;
4898 ctx.m_scans.set(treeNodePtr.p->m_node_no);
4899
4900 if (ctx.m_start_signal)
4901 {
4902 jam();
4903 Signal* signal = ctx.m_start_signal;
4904 const ScanFragReq* src = (const ScanFragReq*)(signal->getDataPtr());
4905
4906 #if NOT_YET
4907 Uint32 instanceNo =
4908 blockToInstance(signal->header.theReceiversBlockNumber);
4909 treeNodePtr.p->m_send.m_ref = numberToRef(DBLQH,
4910 instanceNo, getOwnNodeId());
4911 #else
4912 treeNodePtr.p->m_send.m_ref =
4913 numberToRef(DBLQH, getInstanceKey(src->tableId,
4914 src->fragmentNoKeyLen),
4915 getOwnNodeId());
4916 #endif
4917
4918 Uint32 fragId = src->fragmentNoKeyLen;
4919 Uint32 requestInfo = src->requestInfo;
4920 Uint32 batch_size_bytes = src->batch_size_bytes;
4921 Uint32 batch_size_rows = src->batch_size_rows;
4922
4923 #ifdef VM_TRACE
4924 Uint32 savePointId = src->savePointId;
4925 Uint32 tableId = src->tableId;
4926 Uint32 schemaVersion = src->schemaVersion;
4927 Uint32 transId1 = src->transId1;
4928 Uint32 transId2 = src->transId2;
4929 #endif
4930 ndbassert(ScanFragReq::getLockMode(requestInfo) == 0);
4931 ndbassert(ScanFragReq::getHoldLockFlag(requestInfo) == 0);
4932 ndbassert(ScanFragReq::getKeyinfoFlag(requestInfo) == 0);
4933 ndbassert(ScanFragReq::getReadCommittedFlag(requestInfo) == 1);
4934 ndbassert(ScanFragReq::getLcpScanFlag(requestInfo) == 0);
4935 //ScanFragReq::getAttrLen(requestInfo); // ignore
4936 ndbassert(ScanFragReq::getReorgFlag(requestInfo) == ScanFragReq::REORG_ALL);
4937
4938 Uint32 tupScanFlag = ScanFragReq::getTupScanFlag(requestInfo);
4939 Uint32 rangeScanFlag = ScanFragReq::getRangeScanFlag(requestInfo);
4940 Uint32 descendingFlag = ScanFragReq::getDescendingFlag(requestInfo);
4941 Uint32 scanPrio = ScanFragReq::getScanPrio(requestInfo);
4942
4943 Uint32 dst_requestInfo = dst->requestInfo;
4944
4945 ScanFragReq::setTupScanFlag(dst_requestInfo,tupScanFlag);
4946 ScanFragReq::setRangeScanFlag(dst_requestInfo,rangeScanFlag);
4947 ScanFragReq::setDescendingFlag(dst_requestInfo,descendingFlag);
4948 ScanFragReq::setScanPrio(dst_requestInfo,scanPrio);
4949
4950 /**
4951 * 'NoDiskFlag' should agree with information in treeNode
4952 */
4953 ndbassert(ScanFragReq::getNoDiskFlag(requestInfo) ==
4954 ScanFragReq::getNoDiskFlag(dst_requestInfo));
4955
4956 dst->fragmentNoKeyLen = fragId;
4957 dst->requestInfo = dst_requestInfo;
4958 dst->batch_size_bytes = batch_size_bytes;
4959 dst->batch_size_rows = batch_size_rows;
4960
4961 #ifdef VM_TRACE
4962 ndbassert(dst->savePointId == savePointId);
4963 ndbassert(dst->tableId == tableId);
4964 ndbassert(dst->schemaVersion == schemaVersion);
4965 ndbassert(dst->transId1 == transId1);
4966 ndbassert(dst->transId2 == transId2);
4967 #endif
4968
4969 treeNodePtr.p->m_bits |= TreeNode::T_ONE_SHOT;
4970
4971 if (rangeScanFlag)
4972 {
4973 c_Counters.incr_counter(CI_RANGE_SCANS_RECEIVED, 1);
4974 }
4975 else
4976 {
4977 c_Counters.incr_counter(CI_TABLE_SCANS_RECEIVED, 1);
4978 }
4979 }
4980 else
4981 {
4982 ndbrequire(false);
4983 }
4984
4985 return 0;
4986 } while (0);
4987
4988 return err;
4989 }
4990
4991 void
scanFrag_start(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)4992 Dbspj::scanFrag_start(Signal* signal,
4993 Ptr<Request> requestPtr,
4994 Ptr<TreeNode> treeNodePtr)
4995 {
4996 scanFrag_send(signal, requestPtr, treeNodePtr);
4997 }
4998
4999 void
scanFrag_send(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)5000 Dbspj::scanFrag_send(Signal* signal,
5001 Ptr<Request> requestPtr,
5002 Ptr<TreeNode> treeNodePtr)
5003 {
5004 jam();
5005 if (!ERROR_INSERTED(17521)) // Avoid emulated rnd errors
5006 {
5007 // ::checkTableError() should be handled before we reach this far
5008 ndbassert(checkTableError(treeNodePtr) == 0);
5009 }
5010
5011 Ptr<ScanFragHandle> scanFragHandlePtr;
5012 m_scanfraghandle_pool.getPtr(scanFragHandlePtr, treeNodePtr.p->
5013 m_scanfrag_data.m_scanFragHandlePtrI);
5014
5015 ScanFragReq* req = reinterpret_cast<ScanFragReq*>(signal->getDataPtrSend());
5016
5017 memcpy(req, treeNodePtr.p->m_scanfrag_data.m_scanFragReq,
5018 sizeof(treeNodePtr.p->m_scanfrag_data.m_scanFragReq));
5019 req->variableData[0] = treeNodePtr.p->m_send.m_correlation;
5020 req->variableData[1] = requestPtr.p->m_rootResultData;
5021
5022 SectionHandle handle(this);
5023
5024 Uint32 ref = treeNodePtr.p->m_send.m_ref;
5025 Uint32 keyInfoPtrI = treeNodePtr.p->m_send.m_keyInfoPtrI;
5026 Uint32 attrInfoPtrI = treeNodePtr.p->m_send.m_attrInfoPtrI;
5027
5028 /**
5029 * ScanFrag may only be used as root-node, i.e T_ONE_SHOT
5030 */
5031 ndbrequire(treeNodePtr.p->m_bits & TreeNode::T_ONE_SHOT);
5032
5033 /**
5034 * Pass sections to send
5035 */
5036 treeNodePtr.p->m_send.m_attrInfoPtrI = RNIL;
5037 treeNodePtr.p->m_send.m_keyInfoPtrI = RNIL;
5038
5039 getSection(handle.m_ptr[0], attrInfoPtrI);
5040 handle.m_cnt = 1;
5041
5042 if (keyInfoPtrI != RNIL)
5043 {
5044 jam();
5045 getSection(handle.m_ptr[1], keyInfoPtrI);
5046 handle.m_cnt = 2;
5047 }
5048
5049 #ifdef DEBUG_SCAN_FRAGREQ
5050 ndbout_c("SCAN_FRAGREQ to %x", ref);
5051 printSCAN_FRAGREQ(stdout, signal->getDataPtrSend(),
5052 NDB_ARRAY_SIZE(treeNodePtr.p->m_scanfrag_data.m_scanFragReq),
5053 DBLQH);
5054 printf("ATTRINFO: ");
5055 print(handle.m_ptr[0], stdout);
5056 if (handle.m_cnt > 1)
5057 {
5058 printf("KEYINFO: ");
5059 print(handle.m_ptr[1], stdout);
5060 }
5061 #endif
5062
5063 if (ScanFragReq::getRangeScanFlag(req->requestInfo))
5064 {
5065 c_Counters.incr_counter(CI_LOCAL_RANGE_SCANS_SENT, 1);
5066 }
5067 else
5068 {
5069 c_Counters.incr_counter(CI_LOCAL_TABLE_SCANS_SENT, 1);
5070 }
5071
5072 if (ERROR_INSERTED_CLEAR(17100))
5073 {
5074 jam();
5075 ndbout_c("Injecting invalid schema version error at line %d file %s",
5076 __LINE__, __FILE__);
5077 // Provoke 'Invalid schema version' in order to receive SCAN_FRAGREF
5078 req->schemaVersion++;
5079 }
5080
5081 ndbrequire(refToNode(ref) == getOwnNodeId());
5082 {
5083 FragmentSendInfo fragSendInfo;
5084 // See comment about fragmented send pattern in ::scanIndex_send()
5085 sendFirstFragment(fragSendInfo,
5086 ref,
5087 GSN_SCAN_FRAGREQ,
5088 signal,
5089 NDB_ARRAY_SIZE(treeNodePtr.p->m_scanfrag_data.m_scanFragReq),
5090 JBB,
5091 &handle, false);
5092
5093 while (fragSendInfo.m_status != FragmentSendInfo::SendComplete) // SendNotComplete
5094 {
5095 jam();
5096 // Send remaining fragments
5097 sendNextSegmentedFragment(signal, fragSendInfo);
5098 }
5099 }
5100
5101 requestPtr.p->m_completed_nodes.clear(treeNodePtr.p->m_node_no);
5102 requestPtr.p->m_outstanding++;
5103 requestPtr.p->m_cnt_active++;
5104 treeNodePtr.p->m_state = TreeNode::TN_ACTIVE;
5105
5106 scanFragHandlePtr.p->m_state = ScanFragHandle::SFH_SCANNING;
5107 treeNodePtr.p->m_scanfrag_data.m_rows_received = 0;
5108 treeNodePtr.p->m_scanfrag_data.m_rows_expecting = ~Uint32(0);
5109 }
5110
5111 void
scanFrag_execTRANSID_AI(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,const RowPtr & rowRef)5112 Dbspj::scanFrag_execTRANSID_AI(Signal* signal,
5113 Ptr<Request> requestPtr,
5114 Ptr<TreeNode> treeNodePtr,
5115 const RowPtr & rowRef)
5116 {
5117 jam();
5118 treeNodePtr.p->m_scanfrag_data.m_rows_received++;
5119
5120 common_execTRANSID_AI(signal, requestPtr, treeNodePtr, rowRef);
5121
5122 ndbassert(treeNodePtr.p->m_resumePtrI == RNIL);
5123
5124 if (treeNodePtr.p->m_scanfrag_data.m_rows_received ==
5125 treeNodePtr.p->m_scanfrag_data.m_rows_expecting)
5126 {
5127 jam();
5128 handleTreeNodeComplete(signal, requestPtr, treeNodePtr);
5129
5130 checkBatchComplete(signal, requestPtr, 1);
5131 return;
5132 }
5133 }
5134
5135 void
scanFrag_execSCAN_FRAGREF(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,Ptr<ScanFragHandle> scanFragHandlePtr)5136 Dbspj::scanFrag_execSCAN_FRAGREF(Signal* signal,
5137 Ptr<Request> requestPtr,
5138 Ptr<TreeNode> treeNodePtr,
5139 Ptr<ScanFragHandle> scanFragHandlePtr)
5140 {
5141 jam();
5142
5143 const ScanFragRef* rep =
5144 reinterpret_cast<const ScanFragRef*>(signal->getDataPtr());
5145 Uint32 errCode = rep->errorCode;
5146
5147 DEBUG("scanFrag_execSCAN_FRAGREF, rep->senderData:" << rep->senderData
5148 << ", requestPtr.p->m_senderData:" << requestPtr.p->m_senderData);
5149 scanFragHandlePtr.p->m_state = ScanFragHandle::SFH_COMPLETE;
5150 ndbrequire(treeNodePtr.p->m_state == TreeNode::TN_ACTIVE);
5151 ndbrequire(requestPtr.p->m_cnt_active);
5152 requestPtr.p->m_cnt_active--;
5153 ndbrequire(requestPtr.p->m_outstanding);
5154 requestPtr.p->m_outstanding--;
5155 treeNodePtr.p->m_state = TreeNode::TN_INACTIVE;
5156
5157 abort(signal, requestPtr, errCode);
5158 }
5159
5160
5161 void
scanFrag_execSCAN_FRAGCONF(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,Ptr<ScanFragHandle> scanFragHandlePtr)5162 Dbspj::scanFrag_execSCAN_FRAGCONF(Signal* signal,
5163 Ptr<Request> requestPtr,
5164 Ptr<TreeNode> treeNodePtr,
5165 Ptr<ScanFragHandle> scanFragHandlePtr)
5166 {
5167 const ScanFragConf * conf =
5168 reinterpret_cast<const ScanFragConf*>(signal->getDataPtr());
5169 Uint32 rows = conf->completedOps;
5170 Uint32 done = conf->fragmentCompleted;
5171
5172 Uint32 state = scanFragHandlePtr.p->m_state;
5173 if (state == ScanFragHandle::SFH_WAIT_CLOSE && done == 0)
5174 {
5175 jam();
5176 /**
5177 * We sent an explicit close request...ignore this...a close will come later
5178 */
5179 return;
5180 }
5181
5182 ndbrequire(done <= 2); // 0, 1, 2 (=ZSCAN_FRAG_CLOSED)
5183
5184 ndbassert(treeNodePtr.p->m_scanfrag_data.m_rows_expecting == ~Uint32(0));
5185 treeNodePtr.p->m_scanfrag_data.m_rows_expecting = rows;
5186 if (treeNodePtr.p->isLeaf())
5187 {
5188 /**
5189 * If this is a leaf node, then no rows will be sent to the SPJ block,
5190 * as there are no child operations to instantiate.
5191 */
5192 treeNodePtr.p->m_scanfrag_data.m_rows_received = rows;
5193 }
5194
5195 requestPtr.p->m_rows += rows;
5196 if (done)
5197 {
5198 jam();
5199
5200 ndbrequire(requestPtr.p->m_cnt_active);
5201 requestPtr.p->m_cnt_active--;
5202 treeNodePtr.p->m_state = TreeNode::TN_INACTIVE;
5203 scanFragHandlePtr.p->m_state = ScanFragHandle::SFH_COMPLETE;
5204 }
5205 else
5206 {
5207 jam();
5208 scanFragHandlePtr.p->m_state = ScanFragHandle::SFH_WAIT_NEXTREQ;
5209 }
5210
5211 if (treeNodePtr.p->m_scanfrag_data.m_rows_expecting ==
5212 treeNodePtr.p->m_scanfrag_data.m_rows_received ||
5213 (state == ScanFragHandle::SFH_WAIT_CLOSE))
5214 {
5215 jam();
5216 handleTreeNodeComplete(signal, requestPtr, treeNodePtr);
5217
5218 checkBatchComplete(signal, requestPtr, 1);
5219 return;
5220 }
5221 }
5222
5223 void
scanFrag_execSCAN_NEXTREQ(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)5224 Dbspj::scanFrag_execSCAN_NEXTREQ(Signal* signal,
5225 Ptr<Request> requestPtr,
5226 Ptr<TreeNode> treeNodePtr)
5227 {
5228 jam();
5229 Uint32 err = checkTableError(treeNodePtr);
5230 if (unlikely(err))
5231 {
5232 jam();
5233 abort(signal, requestPtr, err);
5234 return;
5235 }
5236
5237 Ptr<ScanFragHandle> scanFragHandlePtr;
5238 m_scanfraghandle_pool.getPtr(scanFragHandlePtr, treeNodePtr.p->
5239 m_scanfrag_data.m_scanFragHandlePtrI);
5240
5241 const ScanFragReq * org =
5242 (ScanFragReq*)treeNodePtr.p->m_scanfrag_data.m_scanFragReq;
5243
5244 ScanFragNextReq* req =
5245 reinterpret_cast<ScanFragNextReq*>(signal->getDataPtrSend());
5246 req->senderData = treeNodePtr.p->m_scanfrag_data.m_scanFragHandlePtrI;
5247 req->requestInfo = 0;
5248 req->transId1 = requestPtr.p->m_transId[0];
5249 req->transId2 = requestPtr.p->m_transId[1];
5250 req->batch_size_rows = org->batch_size_rows;
5251 req->batch_size_bytes = org->batch_size_bytes;
5252
5253 DEBUG("scanFrag_execSCAN_NEXTREQ to: " << hex << treeNodePtr.p->m_send.m_ref
5254 << ", senderData: " << req->senderData);
5255 #ifdef DEBUG_SCAN_FRAGREQ
5256 printSCANFRAGNEXTREQ(stdout, &signal->theData[0],
5257 ScanFragNextReq::SignalLength, DBLQH);
5258 #endif
5259
5260 sendSignal(treeNodePtr.p->m_send.m_ref,
5261 GSN_SCAN_NEXTREQ,
5262 signal,
5263 ScanFragNextReq::SignalLength,
5264 JBB);
5265
5266 treeNodePtr.p->m_scanfrag_data.m_rows_received = 0;
5267 treeNodePtr.p->m_scanfrag_data.m_rows_expecting = ~Uint32(0);
5268 requestPtr.p->m_outstanding++;
5269 requestPtr.p->m_completed_nodes.clear(treeNodePtr.p->m_node_no);
5270 scanFragHandlePtr.p->m_state = ScanFragHandle::SFH_SCANNING;
5271 }//Dbspj::scanFrag_execSCAN_NEXTREQ()
5272
5273 void
scanFrag_abort(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)5274 Dbspj::scanFrag_abort(Signal* signal,
5275 Ptr<Request> requestPtr,
5276 Ptr<TreeNode> treeNodePtr)
5277 {
5278 jam();
5279
5280 if (treeNodePtr.p->m_state == TreeNode::TN_ACTIVE)
5281 {
5282 jam();
5283 Ptr<ScanFragHandle> scanFragHandlePtr;
5284 m_scanfraghandle_pool.getPtr(scanFragHandlePtr, treeNodePtr.p->
5285 m_scanfrag_data.m_scanFragHandlePtrI);
5286
5287 switch(scanFragHandlePtr.p->m_state){
5288 case ScanFragHandle::SFH_NOT_STARTED:
5289 case ScanFragHandle::SFH_COMPLETE:
5290 ndbrequire(false); // we shouldnt be TN_ACTIVE then...
5291
5292 case ScanFragHandle::SFH_WAIT_CLOSE:
5293 jam();
5294 // close already sent
5295 return;
5296 case ScanFragHandle::SFH_WAIT_NEXTREQ:
5297 jam();
5298 // we were idle
5299 requestPtr.p->m_outstanding++;
5300 break;
5301 case ScanFragHandle::SFH_SCANNING:
5302 jam();
5303 break;
5304 }
5305
5306 treeNodePtr.p->m_scanfrag_data.m_rows_expecting = ~Uint32(0);
5307 scanFragHandlePtr.p->m_state = ScanFragHandle::SFH_WAIT_CLOSE;
5308
5309 ScanFragNextReq* req =
5310 reinterpret_cast<ScanFragNextReq*>(signal->getDataPtrSend());
5311 req->senderData = treeNodePtr.p->m_scanfrag_data.m_scanFragHandlePtrI;
5312 req->requestInfo = 0;
5313 ScanFragNextReq::setCloseFlag(req->requestInfo, 1);
5314 req->transId1 = requestPtr.p->m_transId[0];
5315 req->transId2 = requestPtr.p->m_transId[1];
5316 req->batch_size_rows = 0;
5317 req->batch_size_bytes = 0;
5318
5319 sendSignal(treeNodePtr.p->m_send.m_ref,
5320 GSN_SCAN_NEXTREQ,
5321 signal,
5322 ScanFragNextReq::SignalLength,
5323 JBB);
5324 }
5325 }
5326
5327
5328 void
scanFrag_cleanup(Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)5329 Dbspj::scanFrag_cleanup(Ptr<Request> requestPtr,
5330 Ptr<TreeNode> treeNodePtr)
5331 {
5332 Uint32 ptrI = treeNodePtr.p->m_scanfrag_data.m_scanFragHandlePtrI;
5333 if (ptrI != RNIL)
5334 {
5335 m_scanfraghandle_pool.release(ptrI);
5336 }
5337 cleanup_common(requestPtr, treeNodePtr);
5338 }
5339
5340 /**
5341 * END - MODULE SCAN FRAG
5342 */
5343
5344 /**
5345 * MODULE SCAN INDEX
5346 *
5347 * NOTE: This may not be root-node
5348 */
5349 const Dbspj::OpInfo
5350 Dbspj::g_ScanIndexOpInfo =
5351 {
5352 &Dbspj::scanIndex_build,
5353 &Dbspj::scanIndex_prepare,
5354 0, // start
5355 &Dbspj::scanIndex_execTRANSID_AI,
5356 0, // execLQHKEYREF
5357 0, // execLQHKEYCONF
5358 &Dbspj::scanIndex_execSCAN_FRAGREF,
5359 &Dbspj::scanIndex_execSCAN_FRAGCONF,
5360 &Dbspj::scanIndex_parent_row,
5361 &Dbspj::scanIndex_parent_batch_complete,
5362 &Dbspj::scanIndex_parent_batch_repeat,
5363 &Dbspj::scanIndex_parent_batch_cleanup,
5364 &Dbspj::scanIndex_execSCAN_NEXTREQ,
5365 &Dbspj::scanIndex_complete,
5366 &Dbspj::scanIndex_abort,
5367 &Dbspj::scanIndex_execNODE_FAILREP,
5368 &Dbspj::scanIndex_cleanup
5369 };
5370
5371 Uint32
scanIndex_build(Build_context & ctx,Ptr<Request> requestPtr,const QueryNode * qn,const QueryNodeParameters * qp)5372 Dbspj::scanIndex_build(Build_context& ctx,
5373 Ptr<Request> requestPtr,
5374 const QueryNode* qn,
5375 const QueryNodeParameters* qp)
5376 {
5377 Uint32 err = 0;
5378 Ptr<TreeNode> treeNodePtr;
5379 const QN_ScanIndexNode * node = (const QN_ScanIndexNode*)qn;
5380 const QN_ScanIndexParameters * param = (const QN_ScanIndexParameters*)qp;
5381
5382 do
5383 {
5384 err = DbspjErr::InvalidTreeNodeSpecification;
5385 DEBUG("scanIndex_build: len=" << node->len);
5386 if (unlikely(node->len < QN_ScanIndexNode::NodeSize))
5387 {
5388 jam();
5389 break;
5390 }
5391
5392 err = DbspjErr::InvalidTreeParametersSpecification;
5393 DEBUG("param len: " << param->len);
5394 if (unlikely(param->len < QN_ScanIndexParameters::NodeSize))
5395 {
5396 jam();
5397 break;
5398 }
5399
5400 err = createNode(ctx, requestPtr, treeNodePtr);
5401 if (unlikely(err != 0))
5402 {
5403 jam();
5404 break;
5405 }
5406
5407 Uint32 batchSize = param->batchSize;
5408
5409 requestPtr.p->m_bits |= Request::RT_SCAN;
5410 requestPtr.p->m_bits |= Request::RT_NEED_PREPARE;
5411 requestPtr.p->m_bits |= Request::RT_NEED_COMPLETE;
5412
5413 Uint32 indexId = node->tableId;
5414 Uint32 tableId = g_key_descriptor_pool.getPtr(indexId)->primaryTableId;
5415
5416 treeNodePtr.p->m_info = &g_ScanIndexOpInfo;
5417 treeNodePtr.p->m_tableOrIndexId = indexId;
5418 treeNodePtr.p->m_primaryTableId = tableId;
5419 treeNodePtr.p->m_schemaVersion = node->tableVersion;
5420 treeNodePtr.p->m_bits |= TreeNode::T_ATTR_INTERPRETED;
5421 treeNodePtr.p->m_bits |= TreeNode::T_NEED_REPORT_BATCH_COMPLETED;
5422 treeNodePtr.p->m_batch_size =
5423 batchSize & ~(0xFFFFFFFF << QN_ScanIndexParameters::BatchRowBits);
5424
5425 ScanFragReq*dst=(ScanFragReq*)treeNodePtr.p->m_scanindex_data.m_scanFragReq;
5426 dst->senderData = treeNodePtr.i;
5427 dst->resultRef = reference();
5428 dst->resultData = treeNodePtr.i;
5429 dst->savePointId = ctx.m_savepointId;
5430 dst->batch_size_rows =
5431 batchSize & ~(0xFFFFFFFF << QN_ScanIndexParameters::BatchRowBits);
5432 dst->batch_size_bytes = batchSize >> QN_ScanIndexParameters::BatchRowBits;
5433
5434 Uint32 transId1 = requestPtr.p->m_transId[0];
5435 Uint32 transId2 = requestPtr.p->m_transId[1];
5436 dst->transId1 = transId1;
5437 dst->transId2 = transId2;
5438
5439 Uint32 treeBits = node->requestInfo;
5440 Uint32 paramBits = param->requestInfo;
5441 Uint32 requestInfo = 0;
5442 ScanFragReq::setRangeScanFlag(requestInfo, 1);
5443 ScanFragReq::setReadCommittedFlag(requestInfo, 1);
5444 ScanFragReq::setScanPrio(requestInfo, ctx.m_scanPrio);
5445 ScanFragReq::setNoDiskFlag(requestInfo,
5446 (treeBits & DABits::NI_LINKED_DISK) == 0 &&
5447 (paramBits & DABits::PI_DISK_ATTR) == 0);
5448 ScanFragReq::setCorrFactorFlag(requestInfo, 1);
5449 dst->requestInfo = requestInfo;
5450 dst->tableId = node->tableId;
5451 dst->schemaVersion = node->tableVersion;
5452
5453 ctx.m_resultData = param->resultData;
5454
5455 /**
5456 * Parse stuff
5457 */
5458 struct DABuffer nodeDA, paramDA;
5459 nodeDA.ptr = node->optional;
5460 nodeDA.end = nodeDA.ptr + (node->len - QN_ScanIndexNode::NodeSize);
5461 paramDA.ptr = param->optional;
5462 paramDA.end = paramDA.ptr + (param->len - QN_ScanIndexParameters::NodeSize);
5463
5464 err = parseScanIndex(ctx, requestPtr, treeNodePtr,
5465 nodeDA, treeBits, paramDA, paramBits);
5466
5467 if (unlikely(err != 0))
5468 {
5469 jam();
5470 break;
5471 }
5472
5473 /**
5474 * Since we T_NEED_REPORT_BATCH_COMPLETED, all ancestors
5475 * have to T_REPORT_BATCH_COMPLETE to its siblings
5476 */
5477 Ptr<TreeNode> nodePtr;
5478 nodePtr.i = treeNodePtr.p->m_parentPtrI;
5479 while (nodePtr.i != RNIL)
5480 {
5481 jam();
5482 m_treenode_pool.getPtr(nodePtr);
5483 nodePtr.p->m_bits |= TreeNode::T_REPORT_BATCH_COMPLETE;
5484 nodePtr.i = nodePtr.p->m_parentPtrI;
5485 }
5486
5487 /**
5488 * If there exists other scan TreeNodes not being among
5489 * my ancestors, results from this scanIndex may be repeated
5490 * as part of an X-scan.
5491 *
5492 * NOTE: The scan nodes being along the left deep ancestor chain
5493 * are not 'repeatable' as they are driving the
5494 * repeated X-scan and are thus not repeated themself.
5495 */
5496 if (requestPtr.p->m_bits & Request::RT_REPEAT_SCAN_RESULT &&
5497 !treeNodePtr.p->m_ancestors.contains(ctx.m_scans))
5498 {
5499 treeNodePtr.p->m_bits |= TreeNode::T_SCAN_REPEATABLE;
5500 }
5501
5502 ctx.m_scan_cnt++;
5503 ctx.m_scans.set(treeNodePtr.p->m_node_no);
5504
5505 return 0;
5506 } while (0);
5507
5508 return err;
5509 }
5510
5511 Uint32
parseScanIndex(Build_context & ctx,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,DABuffer tree,Uint32 treeBits,DABuffer param,Uint32 paramBits)5512 Dbspj::parseScanIndex(Build_context& ctx,
5513 Ptr<Request> requestPtr,
5514 Ptr<TreeNode> treeNodePtr,
5515 DABuffer tree, Uint32 treeBits,
5516 DABuffer param, Uint32 paramBits)
5517 {
5518 Uint32 err = 0;
5519
5520 typedef QN_ScanIndexNode Node;
5521 typedef QN_ScanIndexParameters Params;
5522
5523 do
5524 {
5525 jam();
5526
5527 ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
5528 data.m_fragments.init();
5529 data.m_frags_outstanding = 0;
5530 data.m_frags_complete = 0;
5531 data.m_frags_not_started = 0;
5532 data.m_parallelismStat.init();
5533 data.m_firstExecution = true;
5534 data.m_batch_chunks = 0;
5535
5536 /**
5537 * We will need to look at the parameters again if the scan is pruned and the prune
5538 * key uses parameter values. Therefore, we keep a reference to the start of the
5539 * parameter buffer.
5540 */
5541 DABuffer origParam = param;
5542 err = parseDA(ctx, requestPtr, treeNodePtr,
5543 tree, treeBits, param, paramBits);
5544 if (unlikely(err != 0))
5545 break;
5546
5547 if (treeBits & Node::SI_PRUNE_PATTERN)
5548 {
5549 Uint32 len_cnt = * tree.ptr ++;
5550 Uint32 len = len_cnt & 0xFFFF; // length of pattern in words
5551 Uint32 cnt = len_cnt >> 16; // no of parameters
5552
5553 LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
5554 ndbrequire((cnt==0) == ((treeBits & Node::SI_PRUNE_PARAMS) ==0));
5555 ndbrequire((cnt==0) == ((paramBits & Params::SIP_PRUNE_PARAMS)==0));
5556
5557 if (treeBits & Node::SI_PRUNE_LINKED)
5558 {
5559 jam();
5560 DEBUG("LINKED-PRUNE PATTERN w/ " << cnt << " PARAM values");
5561
5562 data.m_prunePattern.init();
5563 Local_pattern_store pattern(pool, data.m_prunePattern);
5564
5565 /**
5566 * Expand pattern into a new pattern (with linked values)
5567 */
5568 err = expand(pattern, treeNodePtr, tree, len, origParam, cnt);
5569 if (unlikely(err != 0))
5570 {
5571 jam();
5572 break;
5573 }
5574 treeNodePtr.p->m_bits |= TreeNode::T_PRUNE_PATTERN;
5575 c_Counters.incr_counter(CI_PRUNED_RANGE_SCANS_RECEIVED, 1);
5576 }
5577 else
5578 {
5579 jam();
5580 DEBUG("FIXED-PRUNE w/ " << cnt << " PARAM values");
5581
5582 /**
5583 * Expand pattern directly into
5584 * This means a "fixed" pruning from here on
5585 * i.e guaranteed single partition
5586 */
5587 Uint32 prunePtrI = RNIL;
5588 bool hasNull;
5589 err = expand(prunePtrI, tree, len, origParam, cnt, hasNull);
5590 if (unlikely(err != 0))
5591 {
5592 jam();
5593 releaseSection(prunePtrI);
5594 break;
5595 }
5596
5597 if (unlikely(hasNull))
5598 {
5599 /* API should have elliminated requests w/ const-NULL keys */
5600 jam();
5601 DEBUG("BEWARE: T_CONST_PRUNE-key contain NULL values");
5602 releaseSection(prunePtrI);
5603 // treeNodePtr.p->m_bits |= TreeNode::T_NULL_PRUNE;
5604 // break;
5605 ndbrequire(false);
5606 }
5607 ndbrequire(prunePtrI != RNIL); /* todo: can we allow / take advantage of NULLs in range scan? */
5608 data.m_constPrunePtrI = prunePtrI;
5609
5610 /**
5611 * We may not compute the partition for the hash-key here
5612 * as we have not yet opened a read-view
5613 */
5614 treeNodePtr.p->m_bits |= TreeNode::T_CONST_PRUNE;
5615 c_Counters.incr_counter(CI_CONST_PRUNED_RANGE_SCANS_RECEIVED, 1);
5616 }
5617 } //SI_PRUNE_PATTERN
5618
5619 if ((treeNodePtr.p->m_bits & TreeNode::T_CONST_PRUNE) == 0 &&
5620 ((treeBits & Node::SI_PARALLEL) ||
5621 ((paramBits & Params::SIP_PARALLEL))))
5622 {
5623 jam();
5624 treeNodePtr.p->m_bits |= TreeNode::T_SCAN_PARALLEL;
5625 }
5626
5627 return 0;
5628 } while(0);
5629
5630 jam();
5631 return err;
5632 }
5633
5634 void
scanIndex_prepare(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)5635 Dbspj::scanIndex_prepare(Signal * signal,
5636 Ptr<Request> requestPtr, Ptr<TreeNode> treeNodePtr)
5637 {
5638 jam();
5639
5640 if (!ERROR_INSERTED(17521)) // Avoid emulated rnd errors
5641 {
5642 // ::checkTableError() should be handled before we reach this far
5643 ndbassert(checkTableError(treeNodePtr) == 0); //Handled in Dbspj::start
5644 }
5645 treeNodePtr.p->m_state = TreeNode::TN_PREPARING;
5646
5647 DihScanTabReq * req = (DihScanTabReq*)signal->getDataPtrSend();
5648 req->senderRef = reference();
5649 req->senderData = treeNodePtr.i;
5650 req->tableId = treeNodePtr.p->m_tableOrIndexId;
5651 req->schemaTransId = 0;
5652 sendSignal(DBDIH_REF, GSN_DIH_SCAN_TAB_REQ, signal,
5653 DihScanTabReq::SignalLength, JBB);
5654
5655 requestPtr.p->m_outstanding++;
5656 }
5657
5658 void
execDIH_SCAN_TAB_REF(Signal * signal)5659 Dbspj::execDIH_SCAN_TAB_REF(Signal* signal)
5660 {
5661 jamEntry();
5662 ndbrequire(false);
5663 }
5664
5665 void
execDIH_SCAN_TAB_CONF(Signal * signal)5666 Dbspj::execDIH_SCAN_TAB_CONF(Signal* signal)
5667 {
5668 jamEntry();
5669 DihScanTabConf * conf = (DihScanTabConf*)signal->getDataPtr();
5670
5671 Ptr<TreeNode> treeNodePtr;
5672 m_treenode_pool.getPtr(treeNodePtr, conf->senderData);
5673 ndbrequire(treeNodePtr.p->m_info == &g_ScanIndexOpInfo);
5674
5675 ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
5676
5677 Uint32 cookie = conf->scanCookie;
5678 Uint32 fragCount = conf->fragmentCount;
5679
5680 if (conf->reorgFlag)
5681 {
5682 jam();
5683 ScanFragReq * dst = (ScanFragReq*)data.m_scanFragReq;
5684 ScanFragReq::setReorgFlag(dst->requestInfo, ScanFragReq::REORG_NOT_MOVED);
5685 }
5686 if (treeNodePtr.p->m_bits & TreeNode::T_CONST_PRUNE)
5687 {
5688 jam();
5689 fragCount = 1;
5690 }
5691 data.m_fragCount = fragCount;
5692 data.m_scanCookie = cookie;
5693
5694 const Uint32 prunemask = TreeNode::T_PRUNE_PATTERN | TreeNode::T_CONST_PRUNE;
5695 bool pruned = (treeNodePtr.p->m_bits & prunemask) != 0;
5696
5697 Ptr<Request> requestPtr;
5698 m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
5699
5700 // Add a skew in the fragment lists such that we don't scan
5701 // the same subset of frags fram all SPJ requests in case of
5702 // the scan not being ' T_SCAN_PARALLEL'
5703 Uint16 fragNoOffs = requestPtr.p->m_rootFragId % fragCount;
5704 Uint32 err = 0;
5705
5706 do
5707 {
5708 Ptr<ScanFragHandle> fragPtr;
5709
5710 /** Allocate & init all 'fragCnt' fragment desriptors */
5711 {
5712 Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments);
5713
5714 err = checkTableError(treeNodePtr);
5715 if (unlikely(err != 0))
5716 {
5717 jam();
5718 break;
5719 }
5720 for (Uint32 i = 0; i<fragCount; i++)
5721 {
5722 jam();
5723 Ptr<ScanFragHandle> fragPtr;
5724 Uint16 fragNo = (fragNoOffs+i) % fragCount;
5725
5726 if (!ERROR_INSERTED_CLEAR(17012) &&
5727 likely(m_scanfraghandle_pool.seize(requestPtr.p->m_arena, fragPtr)))
5728 {
5729 jam();
5730 fragPtr.p->init(fragNo);
5731 fragPtr.p->m_treeNodePtrI = treeNodePtr.i;
5732 list.addLast(fragPtr);
5733 }
5734 else
5735 {
5736 jam();
5737 err = DbspjErr::OutOfQueryMemory;
5738 goto error;
5739 }
5740 }
5741 list.first(fragPtr); // Needed if T_CONST_PRUNE
5742 } // end 'Alloc scope'
5743
5744 if (treeNodePtr.p->m_bits & TreeNode::T_CONST_PRUNE)
5745 {
5746 jam();
5747
5748 // TODO we need a different variant of computeHash here,
5749 // since m_constPrunePtrI does not contain full primary key
5750 // but only parts in distribution key
5751
5752 BuildKeyReq tmp;
5753 Uint32 tableId = treeNodePtr.p->m_primaryTableId;
5754 err = computePartitionHash(signal, tmp, tableId, data.m_constPrunePtrI);
5755 if (unlikely(err != 0))
5756 {
5757 jam();
5758 break;
5759 }
5760
5761 releaseSection(data.m_constPrunePtrI);
5762 data.m_constPrunePtrI = RNIL;
5763
5764 err = getNodes(signal, tmp, tableId);
5765 if (unlikely(err != 0))
5766 {
5767 jam();
5768 break;
5769 }
5770
5771 fragPtr.p->m_fragId = tmp.fragId;
5772 fragPtr.p->m_ref = tmp.receiverRef;
5773 ndbassert(data.m_fragCount == 1);
5774 }
5775 else if (fragCount == 1)
5776 {
5777 jam();
5778 /**
5779 * This is roughly equivalent to T_CONST_PRUNE
5780 * pretend that it is const-pruned
5781 */
5782 if (treeNodePtr.p->m_bits & TreeNode::T_PRUNE_PATTERN)
5783 {
5784 jam();
5785 LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
5786 Local_pattern_store pattern(pool, data.m_prunePattern);
5787 pattern.release();
5788 }
5789 data.m_constPrunePtrI = RNIL;
5790 Uint32 clear = TreeNode::T_PRUNE_PATTERN | TreeNode::T_SCAN_PARALLEL;
5791 treeNodePtr.p->m_bits &= ~clear;
5792 treeNodePtr.p->m_bits |= TreeNode::T_CONST_PRUNE;
5793
5794 /**
5795 * We must get fragPtr.p->m_ref...so set pruned=false
5796 */
5797 pruned = false;
5798 }
5799 data.m_frags_complete = data.m_fragCount;
5800
5801 if (!pruned)
5802 {
5803 /** Start requesting node info from DIH */
5804 jam();
5805 err = scanindex_sendDihGetNodesReq(signal, requestPtr, treeNodePtr);
5806 if (unlikely(err != 0))
5807 {
5808 jam();
5809 break;
5810 }
5811 requestPtr.p->m_outstanding++;
5812 }
5813 else
5814 {
5815 jam();
5816 treeNodePtr.p->m_state = TreeNode::TN_INACTIVE;
5817 }
5818 } while (0);
5819
5820 if (likely(err==0))
5821 {
5822 jam();
5823 checkPrepareComplete(signal, requestPtr, 1);
5824 return;
5825 }
5826 error:
5827 ndbrequire(requestPtr.p->isScan());
5828 ndbrequire(requestPtr.p->m_outstanding >= 1);
5829 requestPtr.p->m_outstanding -= 1;
5830 abort(signal, requestPtr, err);
5831 }
5832
5833 /**
5834 * Will check the fragment list for fragments which need to
5835 * get node info to construct 'fragPtr.p->m_ref' from DIH.
5836 *
5837 * In order to avoid CPU starvation, or unmanagable huge FragItem[],
5838 * max MAX_DIH_FRAG_REQS are requested in a single signal.
5839 * If there are more fragments, we have to repeatable call this
5840 * function when CONF for the first fragment set is received.
5841 */
5842 Uint32
scanindex_sendDihGetNodesReq(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)5843 Dbspj::scanindex_sendDihGetNodesReq(Signal* signal,
5844 Ptr<Request> requestPtr,
5845 Ptr<TreeNode> treeNodePtr)
5846 {
5847 jam();
5848 ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
5849 Ptr<ScanFragHandle> fragPtr;
5850 Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments);
5851
5852 DihScanGetNodesReq * req = (DihScanGetNodesReq*)signal->getDataPtrSend();
5853 Uint32 fragCnt = 0;
5854 for (list.first(fragPtr);
5855 !fragPtr.isNull() && fragCnt < DihScanGetNodesReq::MAX_DIH_FRAG_REQS;
5856 list.next(fragPtr))
5857 {
5858 jam();
5859 if (fragPtr.p->m_ref == 0) // Need GSN_DIH_SCAN_GET_NODES_REQ
5860 {
5861 jam();
5862 req->fragItem[fragCnt].senderData = fragPtr.i;
5863 req->fragItem[fragCnt].fragId = fragPtr.p->m_fragId;
5864 fragCnt++;
5865 }
5866 }
5867
5868 if (fragCnt > 0)
5869 {
5870 jam();
5871 Uint32 tableId = treeNodePtr.p->m_tableOrIndexId;
5872 req->senderRef = reference();
5873 req->tableId = tableId;
5874 req->scanCookie = data.m_scanCookie;
5875 req->fragCnt = fragCnt;
5876
5877 /** Always send as a long signal, even if a short would
5878 * have been sufficient in the (rare) case of 'fragCnt==1'
5879 */
5880 Ptr<SectionSegment> fragReq;
5881 Uint32 len = fragCnt*DihScanGetNodesReq::FragItem::Length;
5882 if (ERROR_INSERTED_CLEAR(17130) ||
5883 unlikely(!import(fragReq, (Uint32*)req->fragItem, len)))
5884 {
5885 jam();
5886 return DbspjErr::OutOfSectionMemory;
5887 }
5888
5889 SectionHandle handle(this, fragReq.i);
5890 sendSignal(DBDIH_REF, GSN_DIH_SCAN_GET_NODES_REQ, signal,
5891 DihScanGetNodesReq::FixedSignalLength,
5892 JBB, &handle);
5893
5894 data.m_frags_outstanding += fragCnt;
5895 }
5896 return 0;
5897 } //Dbspj::scanindex_sendDihGetNodesReq
5898
5899 void
execDIH_SCAN_GET_NODES_REF(Signal * signal)5900 Dbspj::execDIH_SCAN_GET_NODES_REF(Signal* signal)
5901 {
5902 jamEntry();
5903 const DihScanGetNodesRef* ref = (DihScanGetNodesRef*)signal->getDataPtr();
5904 //const Uint32 tableId = ref->tableId;
5905 const Uint32 fragCnt = ref->fragCnt;
5906 const Uint32 errCode = ref->errCode;
5907 ndbassert(errCode != 0);
5908
5909 if (signal->getNoOfSections() > 0)
5910 {
5911 // Long signal: FragItems listed in first section
5912 jam();
5913 SectionHandle handle(this, signal);
5914 ndbassert(handle.m_cnt==1);
5915 SegmentedSectionPtr fragRefSection;
5916 ndbrequire(handle.getSection(fragRefSection,0));
5917 ndbassert(fragRefSection.p->m_sz == (fragCnt*DihScanGetNodesRef::FragItem::Length));
5918 ndbassert(fragCnt <= DihScanGetNodesReq::MAX_DIH_FRAG_REQS);
5919 copy((Uint32*)ref->fragItem, fragRefSection);
5920 releaseSections(handle);
5921 }
5922 else // Short signal, single frag in ref->fragItem[0]
5923 {
5924 ndbassert(fragCnt == 1);
5925 ndbassert(signal->getLength()
5926 == DihScanGetNodesRef::FixedSignalLength + DihScanGetNodesRef::FragItem::Length);
5927 }
5928
5929 UintR treeNodePtrI = RNIL;
5930 for (Uint32 i=0; i < fragCnt; i++)
5931 {
5932 jam();
5933 const Uint32 senderData = ref->fragItem[i].senderData;
5934
5935 Ptr<ScanFragHandle> fragPtr;
5936 m_scanfraghandle_pool.getPtr(fragPtr, senderData);
5937
5938 // All fragItem[] should be for same TreeNode
5939 ndbassert (treeNodePtrI == RNIL || treeNodePtrI == fragPtr.p->m_treeNodePtrI);
5940 treeNodePtrI = fragPtr.p->m_treeNodePtrI;
5941 } //for
5942
5943 ndbassert(treeNodePtrI != RNIL); // fragCnt > 0 above
5944 Ptr<TreeNode> treeNodePtr;
5945 m_treenode_pool.getPtr(treeNodePtr, treeNodePtrI);
5946
5947 ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
5948 ndbassert(data.m_frags_outstanding == fragCnt);
5949 data.m_frags_outstanding -= fragCnt;
5950
5951 Ptr<Request> requestPtr;
5952 m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
5953 abort(signal, requestPtr, errCode);
5954
5955 if (data.m_frags_outstanding == 0)
5956 {
5957 jam();
5958 treeNodePtr.p->m_state = TreeNode::TN_INACTIVE;
5959 checkPrepareComplete(signal, requestPtr, 1);
5960 }
5961 }//Dbspj::execDIH_SCAN_GET_NODES_REF
5962
5963 void
execDIH_SCAN_GET_NODES_CONF(Signal * signal)5964 Dbspj::execDIH_SCAN_GET_NODES_CONF(Signal* signal)
5965 {
5966 jamEntry();
5967 const DihScanGetNodesConf * conf = (DihScanGetNodesConf*)signal->getDataPtr();
5968 const Uint32 fragCnt = conf->fragCnt;
5969
5970 if (signal->getNoOfSections() > 0)
5971 {
5972 // Unpack long signal
5973 jam();
5974 SectionHandle handle(this, signal);
5975 SegmentedSectionPtr fragConfSection;
5976 ndbrequire(handle.getSection(fragConfSection,0));
5977 ndbassert(fragConfSection.p->m_sz == (fragCnt*DihScanGetNodesConf::FragItem::Length));
5978 copy((Uint32*)conf->fragItem, fragConfSection);
5979 releaseSections(handle);
5980 }
5981 else // Short signal, with single FragItem
5982 {
5983 jam();
5984 ndbassert(fragCnt == 1);
5985 ndbassert(signal->getLength()
5986 == DihScanGetNodesConf::FixedSignalLength + DihScanGetNodesConf::FragItem::Length);
5987 }
5988
5989 UintR treeNodePtrI = RNIL;
5990 for (Uint32 i=0; i < fragCnt; i++)
5991 {
5992 jam();
5993 const Uint32 senderData = conf->fragItem[i].senderData;
5994 const Uint32 node = conf->fragItem[i].nodes[0];
5995 const Uint32 instanceKey = conf->fragItem[i].instanceKey;
5996
5997 Ptr<ScanFragHandle> fragPtr;
5998 m_scanfraghandle_pool.getPtr(fragPtr, senderData);
5999
6000 // All fragItem[] should be for same TreeNode
6001 ndbassert (treeNodePtrI == RNIL || treeNodePtrI == fragPtr.p->m_treeNodePtrI);
6002 treeNodePtrI = fragPtr.p->m_treeNodePtrI;
6003
6004 fragPtr.p->m_ref = numberToRef(DBLQH, instanceKey, node);
6005 } //for
6006
6007 ndbassert(treeNodePtrI != RNIL); // fragCnt > 0 above
6008 Ptr<TreeNode> treeNodePtr;
6009 m_treenode_pool.getPtr(treeNodePtr, treeNodePtrI);
6010
6011 ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
6012 ndbassert(data.m_frags_outstanding == fragCnt);
6013 data.m_frags_outstanding -= fragCnt;
6014
6015 Ptr<Request> requestPtr;
6016 m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
6017
6018 /** Check if we need to send more GSN_DIH_SCAN_GET_NODES_REQ */
6019 Uint32 err = scanindex_sendDihGetNodesReq(signal, requestPtr, treeNodePtr);
6020 if (unlikely(err != 0))
6021 {
6022 jam();
6023 abort(signal, requestPtr, err);
6024 }
6025
6026 if (data.m_frags_outstanding == 0)
6027 {
6028 jam();
6029 treeNodePtr.p->m_state = TreeNode::TN_INACTIVE;
6030 checkPrepareComplete(signal, requestPtr, 1);
6031 }
6032 }//Dbspj::execDIH_SCAN_GET_NODES_CONF
6033
6034 Uint32
scanIndex_findFrag(Local_ScanFragHandle_list & list,Ptr<ScanFragHandle> & fragPtr,Uint32 fragId)6035 Dbspj::scanIndex_findFrag(Local_ScanFragHandle_list & list,
6036 Ptr<ScanFragHandle> & fragPtr, Uint32 fragId)
6037 {
6038 for (list.first(fragPtr); !fragPtr.isNull(); list.next(fragPtr))
6039 {
6040 jam();
6041 if (fragPtr.p->m_fragId == fragId)
6042 {
6043 jam();
6044 return 0;
6045 }
6046 }
6047
6048 return DbspjErr::IndexFragNotFound;
6049 }
6050
6051 void
scanIndex_parent_row(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,const RowPtr & rowRef)6052 Dbspj::scanIndex_parent_row(Signal* signal,
6053 Ptr<Request> requestPtr,
6054 Ptr<TreeNode> treeNodePtr,
6055 const RowPtr & rowRef)
6056 {
6057 jam();
6058 DEBUG("::scanIndex_parent_row"
6059 << ", node: " << treeNodePtr.p->m_node_no);
6060
6061 Uint32 err;
6062 ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
6063
6064 /**
6065 * Construct range definition,
6066 * and if prune pattern enabled
6067 * stuff it onto correct scanindexFrag
6068 */
6069 do
6070 {
6071 Ptr<ScanFragHandle> fragPtr;
6072 Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments);
6073 LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
6074
6075 err = checkTableError(treeNodePtr);
6076 if (unlikely(err != 0))
6077 {
6078 jam();
6079 break;
6080 }
6081
6082 if (treeNodePtr.p->m_bits & TreeNode::T_PRUNE_PATTERN)
6083 {
6084 jam();
6085
6086 /**
6087 * TODO: Expand into linear memory instead
6088 * of expanding into sections, and then copy
6089 * section into linear
6090 */
6091 Local_pattern_store pattern(pool, data.m_prunePattern);
6092 Uint32 pruneKeyPtrI = RNIL;
6093 bool hasNull;
6094 err = expand(pruneKeyPtrI, pattern, rowRef, hasNull);
6095 if (unlikely(err != 0))
6096 {
6097 jam();
6098 releaseSection(pruneKeyPtrI);
6099 break;
6100 }
6101
6102 if (unlikely(hasNull))
6103 {
6104 jam();
6105 DEBUG("T_PRUNE_PATTERN-key contain NULL values");
6106
6107 // Ignore this request as 'NULL == <column>' will never give a match
6108 releaseSection(pruneKeyPtrI);
6109 return; // Bailout, SCANREQ would have returned 0 rows anyway
6110 }
6111
6112 BuildKeyReq tmp;
6113 Uint32 tableId = treeNodePtr.p->m_primaryTableId;
6114 err = computePartitionHash(signal, tmp, tableId, pruneKeyPtrI);
6115 releaseSection(pruneKeyPtrI);
6116 if (unlikely(err != 0))
6117 {
6118 jam();
6119 break;
6120 }
6121
6122 err = getNodes(signal, tmp, tableId);
6123 if (unlikely(err != 0))
6124 {
6125 jam();
6126 break;
6127 }
6128
6129 err = scanIndex_findFrag(list, fragPtr, tmp.fragId);
6130 if (unlikely(err != 0))
6131 {
6132 DEBUG_CRASH();
6133 break;
6134 }
6135
6136 /**
6137 * NOTE: We can get different receiverRef's here
6138 * for different keys. E.g during node-recovery where
6139 * primary-fragment is switched.
6140 *
6141 * Use latest that we receive
6142 *
6143 * TODO: Also double check table-reorg
6144 */
6145 fragPtr.p->m_ref = tmp.receiverRef;
6146 }
6147 else
6148 {
6149 jam();
6150 /**
6151 * If const prune, or no-prune, store on first fragment,
6152 * and send to 1 or all resp.
6153 */
6154 list.first(fragPtr);
6155 }
6156
6157 bool hasNull = false;
6158 if (treeNodePtr.p->m_bits & TreeNode::T_KEYINFO_CONSTRUCTED)
6159 {
6160 jam();
6161 Local_pattern_store pattern(pool, treeNodePtr.p->m_keyPattern);
6162
6163 /**
6164 * Test execution terminated due to 'OutOfSectionMemory':
6165 * - 17060: Fail on scanIndex_parent_row at first call
6166 * - 17061: Fail on scanIndex_parent_row if 'isLeaf'
6167 * - 17062: Fail on scanIndex_parent_row if treeNode not root
6168 * - 17063: Fail on scanIndex_parent_row at a random node of the query tree
6169 */
6170 if (ERROR_INSERTED(17060) ||
6171 (ERROR_INSERTED(17061) && (treeNodePtr.p->isLeaf())) ||
6172 (ERROR_INSERTED(17062) && (treeNodePtr.p->m_parentPtrI != RNIL)) ||
6173 (ERROR_INSERTED(17063) && (rand() % 7) == 0))
6174 {
6175 jam();
6176 CLEAR_ERROR_INSERT_VALUE;
6177 ndbout_c("Injecting OutOfSectionMemory error at line %d file %s",
6178 __LINE__, __FILE__);
6179 err = DbspjErr::OutOfSectionMemory;
6180 break;
6181 }
6182
6183 err = expand(fragPtr.p->m_rangePtrI, pattern, rowRef, hasNull);
6184 if (unlikely(err != 0))
6185 {
6186 jam();
6187 break;
6188 }
6189 }
6190 else
6191 {
6192 jam();
6193 // Fixed key...fix later...
6194 ndbrequire(false);
6195 }
6196 // ndbrequire(!hasNull); // FIXME, can't ignore request as we already added it to keyPattern
6197 scanIndex_fixupBound(fragPtr, fragPtr.p->m_rangePtrI, rowRef.m_src_correlation);
6198
6199 if (treeNodePtr.p->m_bits & TreeNode::T_ONE_SHOT)
6200 {
6201 jam();
6202 /**
6203 * We being a T_ONE_SHOT means that we're only be called
6204 * with parent_row once, i.e batch is complete
6205 */
6206 scanIndex_parent_batch_complete(signal, requestPtr, treeNodePtr);
6207 }
6208
6209 return;
6210 } while (0);
6211
6212 ndbrequire(err);
6213 jam();
6214 abort(signal, requestPtr, err);
6215 }
6216
6217
6218 void
scanIndex_fixupBound(Ptr<ScanFragHandle> fragPtr,Uint32 ptrI,Uint32 corrVal)6219 Dbspj::scanIndex_fixupBound(Ptr<ScanFragHandle> fragPtr,
6220 Uint32 ptrI, Uint32 corrVal)
6221 {
6222 /**
6223 * Index bounds...need special tender and care...
6224 *
6225 * 1) Set #bound no, bound-size, and renumber attributes
6226 */
6227 SectionReader r0(ptrI, getSectionSegmentPool());
6228 ndbrequire(r0.step(fragPtr.p->m_range_builder.m_range_size));
6229 Uint32 boundsz = r0.getSize() - fragPtr.p->m_range_builder.m_range_size;
6230 Uint32 boundno = fragPtr.p->m_range_builder.m_range_cnt + 1;
6231
6232 Uint32 tmp;
6233 ndbrequire(r0.peekWord(&tmp));
6234 tmp |= (boundsz << 16) | ((corrVal & 0xFFF) << 4);
6235 ndbrequire(r0.updateWord(tmp));
6236 ndbrequire(r0.step(1)); // Skip first BoundType
6237
6238 // TODO: Renumbering below assume there are only EQ-bounds !!
6239 Uint32 id = 0;
6240 Uint32 len32;
6241 do
6242 {
6243 ndbrequire(r0.peekWord(&tmp));
6244 AttributeHeader ah(tmp);
6245 Uint32 len = ah.getByteSize();
6246 AttributeHeader::init(&tmp, id++, len);
6247 ndbrequire(r0.updateWord(tmp));
6248 len32 = (len + 3) >> 2;
6249 } while (r0.step(2 + len32)); // Skip AttributeHeader(1) + Attribute(len32) + next BoundType(1)
6250
6251 fragPtr.p->m_range_builder.m_range_cnt = boundno;
6252 fragPtr.p->m_range_builder.m_range_size = r0.getSize();
6253 }
6254
6255 void
scanIndex_parent_batch_complete(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)6256 Dbspj::scanIndex_parent_batch_complete(Signal* signal,
6257 Ptr<Request> requestPtr,
6258 Ptr<TreeNode> treeNodePtr)
6259 {
6260 jam();
6261
6262 ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
6263 data.m_rows_received = 0;
6264 data.m_rows_expecting = 0;
6265 ndbassert(data.m_frags_outstanding == 0);
6266 ndbassert(data.m_frags_complete == data.m_fragCount);
6267 data.m_frags_complete = 0;
6268
6269 Ptr<ScanFragHandle> fragPtr;
6270 {
6271 Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments);
6272 list.first(fragPtr);
6273
6274 if ((treeNodePtr.p->m_bits & TreeNode::T_PRUNE_PATTERN) == 0)
6275 {
6276 if (fragPtr.p->m_rangePtrI == RNIL)
6277 {
6278 // No keys found
6279 jam();
6280 data.m_frags_complete = data.m_fragCount;
6281 }
6282 }
6283 else
6284 {
6285 while(!fragPtr.isNull())
6286 {
6287 if (fragPtr.p->m_rangePtrI == RNIL)
6288 {
6289 jam();
6290 /**
6291 * This is a pruned scan, so we must scan those fragments that
6292 * some distribution key hashed to.
6293 */
6294 fragPtr.p->m_state = ScanFragHandle::SFH_COMPLETE;
6295 data.m_frags_complete++;
6296 }
6297 list.next(fragPtr);
6298 }
6299 }
6300 }
6301 data.m_frags_not_started = data.m_fragCount - data.m_frags_complete;
6302
6303 if (data.m_frags_complete == data.m_fragCount)
6304 {
6305 jam();
6306 /**
6307 * No keys was produced...
6308 */
6309 return;
6310 }
6311
6312 /**
6313 * When parent's batch is complete, we send our batch
6314 */
6315 const ScanFragReq * org = (const ScanFragReq*)data.m_scanFragReq;
6316 ndbrequire(org->batch_size_rows > 0);
6317
6318 data.m_firstBatch = true;
6319 if (treeNodePtr.p->m_bits & TreeNode::T_SCAN_PARALLEL)
6320 {
6321 jam();
6322 data.m_parallelism = MIN(data.m_fragCount - data.m_frags_complete,
6323 org->batch_size_rows);
6324 }
6325 else if (data.m_firstExecution)
6326 {
6327 /**
6328 * Having a high parallelism would allow us to fetch data from many
6329 * fragments in parallel and thus reduce the number of round trips.
6330 * On the other hand, we should set parallelism so low that we can fetch
6331 * all data from a fragment in one batch if possible.
6332 * Since this is the first execution, we do not know how many rows or bytes
6333 * this operation is likely to return. Therefore we set parallelism to 1,
6334 * since this gives the lowest penalty if our guess is wrong.
6335 */
6336 jam();
6337 data.m_parallelism = 1;
6338 }
6339 else
6340 {
6341 jam();
6342 /**
6343 * Use statistics from earlier runs of this operation to estimate the
6344 * initial parallelism. We use the mean minus two times the standard
6345 * deviation to have a low risk of setting parallelism to high (as erring
6346 * in the other direction is more costly).
6347 */
6348 Int32 parallelism =
6349 static_cast<Int32>(MIN(data.m_parallelismStat.getMean()
6350 // Add 0.5 to get proper rounding.
6351 - 2 * data.m_parallelismStat.getStdDev() + 0.5,
6352 org->batch_size_rows));
6353
6354 if (parallelism < 1)
6355 {
6356 jam();
6357 parallelism = 1;
6358 }
6359 else if ((data.m_fragCount - data.m_frags_complete) % parallelism != 0)
6360 {
6361 jam();
6362 /**
6363 * Set parallelism such that we can expect to have similar
6364 * parallelism in each batch. For example if there are 8 remaining
6365 * fragments, then we should fecth 2 times 4 fragments rather than
6366 * 7+1.
6367 */
6368 const Int32 roundTrips =
6369 1 + (data.m_fragCount - data.m_frags_complete) / parallelism;
6370 parallelism = (data.m_fragCount - data.m_frags_complete) / roundTrips;
6371 }
6372
6373 ndbassert(parallelism >= 1);
6374 ndbassert((Uint32)parallelism + data.m_frags_complete <= data.m_fragCount);
6375 data.m_parallelism = static_cast<Uint32>(parallelism);
6376
6377 #ifdef DEBUG_SCAN_FRAGREQ
6378 DEBUG("::scanIndex_parent_batch_complete() starting index scan with parallelism="
6379 << data.m_parallelism);
6380 #endif
6381 }
6382 ndbrequire(data.m_parallelism > 0);
6383
6384 const Uint32 bs_rows = org->batch_size_rows/ data.m_parallelism;
6385 const Uint32 bs_bytes = org->batch_size_bytes / data.m_parallelism;
6386 ndbassert(bs_rows > 0);
6387 ndbassert(bs_bytes > 0);
6388
6389 data.m_largestBatchRows = 0;
6390 data.m_largestBatchBytes = 0;
6391 data.m_totalRows = 0;
6392 data.m_totalBytes = 0;
6393
6394 {
6395 Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments);
6396 Ptr<ScanFragHandle> fragPtr;
6397 list.first(fragPtr);
6398
6399 while(!fragPtr.isNull())
6400 {
6401 ndbassert(fragPtr.p->m_state == ScanFragHandle::SFH_NOT_STARTED ||
6402 fragPtr.p->m_state == ScanFragHandle::SFH_COMPLETE);
6403 fragPtr.p->m_state = ScanFragHandle::SFH_NOT_STARTED;
6404 list.next(fragPtr);
6405 }
6406 }
6407
6408 Uint32 batchRange = 0;
6409 Uint32 frags_started =
6410 scanIndex_send(signal,
6411 requestPtr,
6412 treeNodePtr,
6413 data.m_parallelism,
6414 bs_bytes,
6415 bs_rows,
6416 batchRange);
6417
6418 /**
6419 * scanIndex_send might fail to send (errors?):
6420 * Check that we really did send something before
6421 * updating outstanding & active.
6422 */
6423 if (likely(frags_started > 0))
6424 {
6425 jam();
6426 data.m_firstExecution = false;
6427
6428 ndbrequire(static_cast<Uint32>(data.m_frags_outstanding +
6429 data.m_frags_complete) <=
6430 data.m_fragCount);
6431
6432 data.m_batch_chunks = 1;
6433 requestPtr.p->m_cnt_active++;
6434 requestPtr.p->m_outstanding++;
6435 requestPtr.p->m_completed_nodes.clear(treeNodePtr.p->m_node_no);
6436 treeNodePtr.p->m_state = TreeNode::TN_ACTIVE;
6437 }
6438 }
6439
6440 void
scanIndex_parent_batch_repeat(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)6441 Dbspj::scanIndex_parent_batch_repeat(Signal* signal,
6442 Ptr<Request> requestPtr,
6443 Ptr<TreeNode> treeNodePtr)
6444 {
6445 jam();
6446 ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
6447
6448 DEBUG("scanIndex_parent_batch_repeat(), m_node_no: " << treeNodePtr.p->m_node_no
6449 << ", m_batch_chunks: " << data.m_batch_chunks);
6450
6451 ndbassert(treeNodePtr.p->m_bits & TreeNode::T_SCAN_REPEATABLE);
6452
6453 /**
6454 * Register index-scans to be restarted if we didn't get all
6455 * previously fetched parent related child rows in a single batch.
6456 */
6457 if (data.m_batch_chunks > 1)
6458 {
6459 jam();
6460 DEBUG("Register TreeNode for restart, m_node_no: " << treeNodePtr.p->m_node_no);
6461 ndbrequire(treeNodePtr.p->m_state != TreeNode::TN_ACTIVE);
6462 registerActiveCursor(requestPtr, treeNodePtr);
6463 data.m_batch_chunks = 0;
6464 }
6465 }
6466
6467 /**
6468 * Ask for the first batch for a number of fragments.
6469 *
6470 * Returns how many fragments we did request the
6471 * 'first batch' from. (<= noOfFrags)
6472 */
6473 Uint32
scanIndex_send(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,Uint32 noOfFrags,Uint32 bs_bytes,Uint32 bs_rows,Uint32 & batchRange)6474 Dbspj::scanIndex_send(Signal* signal,
6475 Ptr<Request> requestPtr,
6476 Ptr<TreeNode> treeNodePtr,
6477 Uint32 noOfFrags,
6478 Uint32 bs_bytes,
6479 Uint32 bs_rows,
6480 Uint32& batchRange)
6481 {
6482 jam();
6483 ndbassert(bs_bytes > 0);
6484 ndbassert(bs_rows > 0);
6485 ndbassert(bs_rows <= bs_bytes);
6486 /**
6487 * if (m_bits & prunemask):
6488 * - Range keys sliced out to each ScanFragHandle
6489 * - Else, range keys kept on first (and only) ScanFragHandle
6490 */
6491 const bool prune = treeNodePtr.p->m_bits &
6492 (TreeNode::T_PRUNE_PATTERN | TreeNode::T_CONST_PRUNE);
6493
6494 /**
6495 * If scan is repeatable, we must make sure not to release range keys so
6496 * that we canuse them again in the next repetition.
6497 */
6498 const bool repeatable =
6499 (treeNodePtr.p->m_bits & TreeNode::T_SCAN_REPEATABLE) != 0;
6500
6501 ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
6502 ndbassert(noOfFrags > 0);
6503 ndbassert(data.m_frags_not_started >= noOfFrags);
6504 ScanFragReq* const req =
6505 reinterpret_cast<ScanFragReq*>(signal->getDataPtrSend());
6506 const ScanFragReq * const org
6507 = reinterpret_cast<ScanFragReq*>(data.m_scanFragReq);
6508 memcpy(req, org, sizeof(data.m_scanFragReq));
6509 // req->variableData[0] // set below
6510 req->variableData[1] = requestPtr.p->m_rootResultData;
6511 req->batch_size_bytes = bs_bytes;
6512 req->batch_size_rows = bs_rows;
6513
6514 Uint32 requestsSent = 0;
6515 Uint32 err = checkTableError(treeNodePtr);
6516 if (likely(err == 0))
6517 {
6518 Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments);
6519 Ptr<ScanFragHandle> fragPtr;
6520 list.first(fragPtr);
6521 Uint32 keyInfoPtrI = fragPtr.p->m_rangePtrI;
6522 ndbrequire(prune || keyInfoPtrI != RNIL);
6523 /**
6524 * Iterate over the list of fragments until we have sent as many
6525 * SCAN_FRAGREQs as we should.
6526 */
6527 while (requestsSent < noOfFrags)
6528 {
6529 jam();
6530 ndbassert(!fragPtr.isNull());
6531
6532 if (fragPtr.p->m_state != ScanFragHandle::SFH_NOT_STARTED)
6533 {
6534 // Skip forward to the frags that we should send.
6535 jam();
6536 list.next(fragPtr);
6537 continue;
6538 }
6539
6540 const Uint32 ref = fragPtr.p->m_ref;
6541
6542 if (noOfFrags==1 && !prune &&
6543 data.m_frags_not_started == data.m_fragCount &&
6544 refToNode(ref) != getOwnNodeId() &&
6545 list.hasNext(fragPtr))
6546 {
6547 /**
6548 * If we are doing a scan with adaptive parallelism and start with
6549 * parallelism=1 then it makes sense to fetch a batch from a fragment on
6550 * the local data node. The reason for this is that if that fragment
6551 * contains few rows, we may be able to read from several fragments in
6552 * parallel. Then we minimize the total number of round trips (to remote
6553 * data nodes) if we fetch the first fragment batch locally.
6554 */
6555 jam();
6556 list.next(fragPtr);
6557 continue;
6558 }
6559
6560 Uint32 attrInfoPtrI = treeNodePtr.p->m_send.m_attrInfoPtrI;
6561
6562 /**
6563 * Set data specific for this fragment
6564 */
6565 req->senderData = fragPtr.i;
6566 req->fragmentNoKeyLen = fragPtr.p->m_fragId;
6567 req->variableData[0] = batchRange;
6568
6569 // Test for online downgrade.
6570 if (unlikely(ref != 0 &&
6571 !ndb_join_pushdown(getNodeInfo(refToNode(ref)).m_version)))
6572 {
6573 jam();
6574 err = 4003; // Function not implemented.
6575 break;
6576 }
6577
6578 if (prune)
6579 {
6580 jam();
6581 keyInfoPtrI = fragPtr.p->m_rangePtrI;
6582 if (keyInfoPtrI == RNIL)
6583 {
6584 /**
6585 * Since we use pruning, we can see that no parent rows would hash
6586 * to this fragment.
6587 */
6588 jam();
6589 fragPtr.p->m_state = ScanFragHandle::SFH_COMPLETE;
6590 list.next(fragPtr);
6591 continue;
6592 }
6593
6594 if (!repeatable)
6595 {
6596 /**
6597 * If we'll use sendSignal() and we need to send the attrInfo several
6598 * times, we need to copy them. (For repeatable or unpruned scans
6599 * we use sendSignalNoRelease(), so then we do not need to copy.)
6600 */
6601 jam();
6602 Uint32 tmp = RNIL;
6603
6604 /**
6605 * Test execution terminated due to 'OutOfSectionMemory' which
6606 * may happen for different treeNodes in the request:
6607 * - 17090: Fail on any scanIndex_send()
6608 * - 17091: Fail after sending SCAN_FRAGREQ to some fragments
6609 * - 17092: Fail on scanIndex_send() if 'isLeaf'
6610 * - 17093: Fail on scanIndex_send() if treeNode not root
6611 */
6612
6613 if (ERROR_INSERTED(17090) ||
6614 (ERROR_INSERTED(17091) && requestsSent > 1) ||
6615 (ERROR_INSERTED(17092) && treeNodePtr.p->isLeaf()) ||
6616 (ERROR_INSERTED(17093) && treeNodePtr.p->m_parentPtrI != RNIL))
6617 {
6618 jam();
6619 CLEAR_ERROR_INSERT_VALUE;
6620 ndbout_c("Injecting OutOfSectionMemory error at line %d file %s",
6621 __LINE__, __FILE__);
6622 err = DbspjErr::OutOfSectionMemory;
6623 break;
6624 }
6625
6626 if (!dupSection(tmp, attrInfoPtrI))
6627 {
6628 jam();
6629 ndbassert(tmp == RNIL); // Guard for memleak
6630 err = DbspjErr::OutOfSectionMemory;
6631 break;
6632 }
6633
6634 attrInfoPtrI = tmp;
6635 }
6636 }
6637
6638 SectionHandle handle(this);
6639 getSection(handle.m_ptr[0], attrInfoPtrI);
6640 getSection(handle.m_ptr[1], keyInfoPtrI);
6641 handle.m_cnt = 2;
6642
6643 #if defined DEBUG_SCAN_FRAGREQ
6644 ndbout_c("SCAN_FRAGREQ to %x", ref);
6645 printSCAN_FRAGREQ(stdout, signal->getDataPtrSend(),
6646 NDB_ARRAY_SIZE(treeNodePtr.p->m_scanfrag_data.m_scanFragReq),
6647 DBLQH);
6648 printf("ATTRINFO: ");
6649 print(handle.m_ptr[0], stdout);
6650 printf("KEYINFO: ");
6651 print(handle.m_ptr[1], stdout);
6652 #endif
6653
6654 if (refToNode(ref) == getOwnNodeId())
6655 {
6656 c_Counters.incr_counter(CI_LOCAL_RANGE_SCANS_SENT, 1);
6657 }
6658 else
6659 {
6660 c_Counters.incr_counter(CI_REMOTE_RANGE_SCANS_SENT, 1);
6661 }
6662
6663 /**
6664 * For a non-repeatable pruned scan, key info is unique for each
6665 * fragment and therefore cannot be reused, so we release key info
6666 * right away.
6667 */
6668
6669 if (ERROR_INSERTED(17110) ||
6670 (ERROR_INSERTED(17111) && treeNodePtr.p->isLeaf()) ||
6671 (ERROR_INSERTED(17112) && treeNodePtr.p->m_parentPtrI != RNIL))
6672 {
6673 jam();
6674 CLEAR_ERROR_INSERT_VALUE;
6675 ndbout_c("Injecting invalid schema version error at line %d file %s",
6676 __LINE__, __FILE__);
6677 // Provoke 'Invalid schema version' in order to receive SCAN_FRAGREF
6678 req->schemaVersion++;
6679 }
6680
6681 /**
6682 * To reduce the copy burden we want to keep hold of the
6683 * AttrInfo and KeyInfo sections after sending them to
6684 * LQH. To do this we perform the fragmented send inline,
6685 * so that all fragments are sent *now*. This avoids any
6686 * problems with the fragmented send CONTINUE 'thread' using
6687 * the section while we hold or even release it. The
6688 * signal receiver can still take realtime breaks when
6689 * receiving.
6690 *
6691 * Indicate to sendFirstFragment that we want to keep the
6692 * fragments, so it must not free them, unless this is the
6693 * last request in which case they can be freed. If the
6694 * last request is a local send then a copy is avoided.
6695 */
6696 const bool release = prune && !repeatable;
6697 {
6698 FragmentSendInfo fragSendInfo;
6699 sendFirstFragment(fragSendInfo,
6700 ref,
6701 GSN_SCAN_FRAGREQ,
6702 signal,
6703 NDB_ARRAY_SIZE(data.m_scanFragReq),
6704 JBB,
6705 &handle,
6706 !release); // Keep sent sections unless
6707 // last send
6708
6709 while (fragSendInfo.m_status != FragmentSendInfo::SendComplete)
6710 {
6711 jam();
6712 // Send remaining fragments
6713 sendNextSegmentedFragment(signal, fragSendInfo);
6714 }
6715 }
6716
6717 if (release)
6718 {
6719 jam();
6720 fragPtr.p->m_rangePtrI = RNIL;
6721 fragPtr.p->reset_ranges();
6722 }
6723 handle.clear();
6724
6725 fragPtr.p->m_state = ScanFragHandle::SFH_SCANNING; // running
6726 data.m_frags_outstanding++;
6727 data.m_frags_not_started--;
6728 batchRange += bs_rows;
6729 requestsSent++;
6730 list.next(fragPtr);
6731 } // while (requestsSent < noOfFrags)
6732 }
6733 if (err)
6734 {
6735 jam();
6736 abort(signal, requestPtr, err);
6737 }
6738
6739 return requestsSent;
6740 }
6741
6742 void
scanIndex_execTRANSID_AI(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,const RowPtr & rowRef)6743 Dbspj::scanIndex_execTRANSID_AI(Signal* signal,
6744 Ptr<Request> requestPtr,
6745 Ptr<TreeNode> treeNodePtr,
6746 const RowPtr & rowRef)
6747 {
6748 jam();
6749
6750 common_execTRANSID_AI(signal, requestPtr, treeNodePtr, rowRef);
6751
6752 ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
6753 data.m_rows_received++;
6754 ndbassert(treeNodePtr.p->m_resumePtrI == RNIL);
6755
6756 if (data.m_frags_outstanding == 0 &&
6757 data.m_rows_received == data.m_rows_expecting)
6758 {
6759 jam();
6760 handleTreeNodeComplete(signal, requestPtr, treeNodePtr);
6761
6762 checkBatchComplete(signal, requestPtr, 1);
6763 return;
6764 }
6765 }
6766
6767 void
scanIndex_execSCAN_FRAGCONF(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,Ptr<ScanFragHandle> fragPtr)6768 Dbspj::scanIndex_execSCAN_FRAGCONF(Signal* signal,
6769 Ptr<Request> requestPtr,
6770 Ptr<TreeNode> treeNodePtr,
6771 Ptr<ScanFragHandle> fragPtr)
6772 {
6773 jam();
6774
6775 const ScanFragConf * conf = (const ScanFragConf*)(signal->getDataPtr());
6776
6777 Uint32 rows = conf->completedOps;
6778 Uint32 done = conf->fragmentCompleted;
6779 Uint32 bytes = conf->total_len * sizeof(Uint32);
6780
6781 Uint32 state = fragPtr.p->m_state;
6782 ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
6783
6784 if (state == ScanFragHandle::SFH_WAIT_CLOSE && done == 0)
6785 {
6786 jam();
6787 /**
6788 * We sent an explicit close request...ignore this...a close will come later
6789 */
6790 return;
6791 }
6792
6793 requestPtr.p->m_rows += rows;
6794 data.m_totalRows += rows;
6795 data.m_totalBytes += bytes;
6796 data.m_largestBatchRows = MAX(data.m_largestBatchRows, rows);
6797 data.m_largestBatchBytes = MAX(data.m_largestBatchBytes, bytes);
6798
6799 if (!treeNodePtr.p->isLeaf())
6800 {
6801 jam();
6802 data.m_rows_expecting += rows;
6803 }
6804 ndbrequire(data.m_frags_outstanding);
6805 ndbrequire(state == ScanFragHandle::SFH_SCANNING ||
6806 state == ScanFragHandle::SFH_WAIT_CLOSE);
6807
6808 data.m_frags_outstanding--;
6809 fragPtr.p->m_state = ScanFragHandle::SFH_WAIT_NEXTREQ;
6810
6811 if (done)
6812 {
6813 jam();
6814 fragPtr.p->m_state = ScanFragHandle::SFH_COMPLETE;
6815 ndbrequire(data.m_frags_complete < data.m_fragCount);
6816 data.m_frags_complete++;
6817
6818 if (data.m_frags_complete == data.m_fragCount ||
6819 ((requestPtr.p->m_state & Request::RS_ABORTING) != 0 &&
6820 data.m_fragCount == (data.m_frags_complete + data.m_frags_not_started)))
6821 {
6822 jam();
6823 ndbrequire(requestPtr.p->m_cnt_active);
6824 requestPtr.p->m_cnt_active--;
6825 treeNodePtr.p->m_state = TreeNode::TN_INACTIVE;
6826 }
6827 }
6828
6829
6830 if (data.m_frags_outstanding == 0)
6831 {
6832 const bool isFirstBatch = data.m_firstBatch;
6833 data.m_firstBatch = false;
6834
6835 const ScanFragReq * const org
6836 = reinterpret_cast<const ScanFragReq*>(data.m_scanFragReq);
6837
6838 if (data.m_frags_complete == data.m_fragCount)
6839 {
6840 jam();
6841 /**
6842 * Calculate what would have been the optimal parallelism for the
6843 * scan instance that we have just completed, and update
6844 * 'parallelismStat' with this value. We then use this statistics to set
6845 * the initial parallelism for the next instance of this operation.
6846 */
6847 double parallelism = data.m_fragCount;
6848 if (data.m_totalRows > 0)
6849 {
6850 parallelism = MIN(parallelism,
6851 double(org->batch_size_rows) * data.m_fragCount
6852 / data.m_totalRows);
6853 }
6854 if (data.m_totalBytes > 0)
6855 {
6856 parallelism = MIN(parallelism,
6857 double(org->batch_size_bytes) * data.m_fragCount
6858 / data.m_totalBytes);
6859 }
6860 data.m_parallelismStat.update(parallelism);
6861 }
6862
6863 /**
6864 * Don't 'handleTreeNodeComplete' if we're aborting...
6865 */
6866 if (state == ScanFragHandle::SFH_WAIT_CLOSE)
6867 {
6868 jam();
6869 ndbrequire((requestPtr.p->m_state & Request::RS_ABORTING) != 0);
6870 checkBatchComplete(signal, requestPtr, 1);
6871 return;
6872 }
6873
6874 if (isFirstBatch && data.m_frags_not_started > 0)
6875 {
6876 /**
6877 * Check if we can expect to be able to fetch the entire result set by
6878 * asking for more fragments within the same batch. This may improve
6879 * performance for bushy scans, as subsequent bushy branches must be
6880 * re-executed for each batch of this scan.
6881 */
6882
6883 /**
6884 * Find the maximal correlation value that we may have seen so far.
6885 * Correlation value must be unique within batch and smaller than
6886 * org->batch_size_rows.
6887 */
6888 const Uint32 maxCorrVal = (data.m_totalRows) == 0 ? 0 :
6889 org->batch_size_rows / data.m_parallelism * (data.m_parallelism - 1)
6890 + data.m_totalRows;
6891
6892 // Number of rows & bytes that we can still fetch in this batch.
6893 const Int32 remainingRows
6894 = static_cast<Int32>(org->batch_size_rows - maxCorrVal);
6895 const Int32 remainingBytes
6896 = static_cast<Int32>(org->batch_size_bytes - data.m_totalBytes);
6897
6898 if (remainingRows >= data.m_frags_not_started &&
6899 remainingBytes >= data.m_frags_not_started &&
6900 /**
6901 * Check that (remaning row capacity)/(remaining fragments) is
6902 * greater or equal to (rows read so far)/(finished fragments).
6903 */
6904 remainingRows * static_cast<Int32>(data.m_parallelism) >=
6905 static_cast<Int32>(data.m_totalRows * data.m_frags_not_started) &&
6906 remainingBytes * static_cast<Int32>(data.m_parallelism) >=
6907 static_cast<Int32>(data.m_totalBytes * data.m_frags_not_started))
6908 {
6909 jam();
6910 Uint32 batchRange = maxCorrVal;
6911 Uint32 bs_rows = remainingRows / data.m_frags_not_started;
6912 Uint32 bs_bytes = remainingBytes / data.m_frags_not_started;
6913
6914 DEBUG("::scanIndex_execSCAN_FRAGCONF() first batch was not full."
6915 " Asking for new batches from " << data.m_frags_not_started <<
6916 " fragments with " <<
6917 bs_rows <<" rows and " <<
6918 bs_bytes << " bytes.");
6919
6920 if (unlikely(bs_rows > bs_bytes))
6921 bs_rows = bs_bytes;
6922
6923 Uint32 frags_started =
6924 scanIndex_send(signal,
6925 requestPtr,
6926 treeNodePtr,
6927 data.m_frags_not_started,
6928 bs_bytes,
6929 bs_rows,
6930 batchRange);
6931
6932 if (likely(frags_started > 0))
6933 return;
6934
6935 // Else: scanIndex_send() didn't send anything for some reason.
6936 // Need to continue into 'completion detection' below.
6937 jam();
6938 }
6939 } // (data.m_frags_outstanding == 0)
6940
6941 if (data.m_rows_received != data.m_rows_expecting)
6942 {
6943 jam();
6944 return;
6945 }
6946
6947 handleTreeNodeComplete(signal, requestPtr, treeNodePtr);
6948 checkBatchComplete(signal, requestPtr, 1);
6949 } // if (data.m_frags_outstanding == 0)
6950 }
6951
6952 void
scanIndex_execSCAN_FRAGREF(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,Ptr<ScanFragHandle> fragPtr)6953 Dbspj::scanIndex_execSCAN_FRAGREF(Signal* signal,
6954 Ptr<Request> requestPtr,
6955 Ptr<TreeNode> treeNodePtr,
6956 Ptr<ScanFragHandle> fragPtr)
6957 {
6958 jam();
6959
6960 const ScanFragRef * rep = CAST_CONSTPTR(ScanFragRef, signal->getDataPtr());
6961 const Uint32 errCode = rep->errorCode;
6962
6963 Uint32 state = fragPtr.p->m_state;
6964 ndbrequire(state == ScanFragHandle::SFH_SCANNING ||
6965 state == ScanFragHandle::SFH_WAIT_CLOSE);
6966
6967 fragPtr.p->m_state = ScanFragHandle::SFH_COMPLETE;
6968
6969 ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
6970 ndbrequire(data.m_frags_complete < data.m_fragCount);
6971 data.m_frags_complete++;
6972 ndbrequire(data.m_frags_outstanding > 0);
6973 data.m_frags_outstanding--;
6974
6975 if (data.m_fragCount == (data.m_frags_complete + data.m_frags_not_started))
6976 {
6977 jam();
6978 ndbrequire(requestPtr.p->m_cnt_active);
6979 requestPtr.p->m_cnt_active--;
6980 treeNodePtr.p->m_state = TreeNode::TN_INACTIVE;
6981 }
6982
6983 if (data.m_frags_outstanding == 0)
6984 {
6985 jam();
6986 ndbrequire(requestPtr.p->m_outstanding);
6987 requestPtr.p->m_outstanding--;
6988 }
6989
6990 abort(signal, requestPtr, errCode);
6991 }
6992
6993 void
scanIndex_execSCAN_NEXTREQ(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)6994 Dbspj::scanIndex_execSCAN_NEXTREQ(Signal* signal,
6995 Ptr<Request> requestPtr,
6996 Ptr<TreeNode> treeNodePtr)
6997 {
6998 jam();
6999 Uint32 err = checkTableError(treeNodePtr);
7000 if (unlikely(err))
7001 {
7002 jam();
7003 abort(signal, requestPtr, err);
7004 return;
7005 }
7006
7007 ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
7008 const ScanFragReq * org = (const ScanFragReq*)data.m_scanFragReq;
7009
7010 data.m_rows_received = 0;
7011 data.m_rows_expecting = 0;
7012 ndbassert(data.m_frags_outstanding == 0);
7013
7014 ndbrequire(data.m_frags_complete < data.m_fragCount);
7015 if ((treeNodePtr.p->m_bits & TreeNode::T_SCAN_PARALLEL) == 0)
7016 {
7017 jam();
7018 /**
7019 * Since fetching few but large batches is more efficient, we
7020 * set parallelism to the lowest value where we can still expect each
7021 * batch to be full.
7022 */
7023 if (data.m_largestBatchRows < org->batch_size_rows/data.m_parallelism &&
7024 data.m_largestBatchBytes < org->batch_size_bytes/data.m_parallelism)
7025 {
7026 jam();
7027 data.m_parallelism = MIN(data.m_fragCount - data.m_frags_complete,
7028 org->batch_size_rows);
7029 if (data.m_largestBatchRows > 0)
7030 {
7031 jam();
7032 data.m_parallelism =
7033 MIN(org->batch_size_rows / data.m_largestBatchRows,
7034 data.m_parallelism);
7035 }
7036 if (data.m_largestBatchBytes > 0)
7037 {
7038 jam();
7039 data.m_parallelism =
7040 MIN(data.m_parallelism,
7041 org->batch_size_bytes/data.m_largestBatchBytes);
7042 }
7043 if (data.m_frags_complete == 0 &&
7044 data.m_frags_not_started % data.m_parallelism != 0)
7045 {
7046 jam();
7047 /**
7048 * Set parallelism such that we can expect to have similar
7049 * parallelism in each batch. For example if there are 8 remaining
7050 * fragments, then we should fecth 2 times 4 fragments rather than
7051 * 7+1.
7052 */
7053 const Uint32 roundTrips =
7054 1 + data.m_frags_not_started / data.m_parallelism;
7055 data.m_parallelism = data.m_frags_not_started / roundTrips;
7056 }
7057 }
7058 else
7059 {
7060 jam();
7061 // We get full batches, so we should lower parallelism.
7062 data.m_parallelism = MIN(data.m_fragCount - data.m_frags_complete,
7063 MAX(1, data.m_parallelism/2));
7064 }
7065 ndbassert(data.m_parallelism > 0);
7066 #ifdef DEBUG_SCAN_FRAGREQ
7067 DEBUG("::scanIndex_execSCAN_NEXTREQ() Asking for new batches from " <<
7068 data.m_parallelism <<
7069 " fragments with " << org->batch_size_rows/data.m_parallelism <<
7070 " rows and " << org->batch_size_bytes/data.m_parallelism <<
7071 " bytes.");
7072 #endif
7073 }
7074 else
7075 {
7076 jam();
7077 data.m_parallelism = MIN(data.m_fragCount - data.m_frags_complete,
7078 org->batch_size_rows);
7079 }
7080
7081 const Uint32 bs_rows = org->batch_size_rows/data.m_parallelism;
7082 ndbassert(bs_rows > 0);
7083 ScanFragNextReq* req =
7084 reinterpret_cast<ScanFragNextReq*>(signal->getDataPtrSend());
7085 req->requestInfo = 0;
7086 ScanFragNextReq::setCorrFactorFlag(req->requestInfo);
7087 req->transId1 = requestPtr.p->m_transId[0];
7088 req->transId2 = requestPtr.p->m_transId[1];
7089 req->batch_size_rows = bs_rows;
7090 req->batch_size_bytes = org->batch_size_bytes/data.m_parallelism;
7091
7092 Uint32 batchRange = 0;
7093 Ptr<ScanFragHandle> fragPtr;
7094 Uint32 sentFragCount = 0;
7095 {
7096 /**
7097 * First, ask for more data from fragments that are already started.
7098 */
7099 Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments);
7100 list.first(fragPtr);
7101 while (sentFragCount < data.m_parallelism && !fragPtr.isNull())
7102 {
7103 jam();
7104 ndbassert(fragPtr.p->m_state == ScanFragHandle::SFH_WAIT_NEXTREQ ||
7105 fragPtr.p->m_state == ScanFragHandle::SFH_COMPLETE ||
7106 fragPtr.p->m_state == ScanFragHandle::SFH_NOT_STARTED);
7107 if (fragPtr.p->m_state == ScanFragHandle::SFH_WAIT_NEXTREQ)
7108 {
7109 jam();
7110
7111 data.m_frags_outstanding++;
7112 req->variableData[0] = batchRange;
7113 fragPtr.p->m_state = ScanFragHandle::SFH_SCANNING;
7114 batchRange += bs_rows;
7115
7116 DEBUG("scanIndex_execSCAN_NEXTREQ to: " << hex
7117 << treeNodePtr.p->m_send.m_ref
7118 << ", m_node_no=" << treeNodePtr.p->m_node_no
7119 << ", senderData: " << req->senderData);
7120
7121 #ifdef DEBUG_SCAN_FRAGREQ
7122 printSCANFRAGNEXTREQ(stdout, &signal->theData[0],
7123 ScanFragNextReq:: SignalLength + 1, DBLQH);
7124 #endif
7125
7126 req->senderData = fragPtr.i;
7127 sendSignal(fragPtr.p->m_ref, GSN_SCAN_NEXTREQ, signal,
7128 ScanFragNextReq::SignalLength + 1,
7129 JBB);
7130 sentFragCount++;
7131 }
7132 list.next(fragPtr);
7133 }
7134 }
7135
7136 Uint32 frags_started = 0;
7137 if (sentFragCount < data.m_parallelism)
7138 {
7139 /**
7140 * Then start new fragments until we reach data.m_parallelism.
7141 */
7142 jam();
7143 ndbassert(data.m_frags_not_started != 0);
7144 frags_started =
7145 scanIndex_send(signal,
7146 requestPtr,
7147 treeNodePtr,
7148 data.m_parallelism - sentFragCount,
7149 org->batch_size_bytes/data.m_parallelism,
7150 bs_rows,
7151 batchRange);
7152 }
7153 /**
7154 * sendSignal() or scanIndex_send() might have failed to send:
7155 * Check that we really did send something before
7156 * updating outstanding & active.
7157 */
7158 if (likely(sentFragCount+frags_started > 0))
7159 {
7160 jam();
7161 ndbrequire(data.m_batch_chunks > 0);
7162 data.m_batch_chunks++;
7163
7164 requestPtr.p->m_outstanding++;
7165 requestPtr.p->m_completed_nodes.clear(treeNodePtr.p->m_node_no);
7166 ndbassert(treeNodePtr.p->m_state == TreeNode::TN_ACTIVE);
7167 }
7168 }
7169
7170 void
scanIndex_complete(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)7171 Dbspj::scanIndex_complete(Signal* signal,
7172 Ptr<Request> requestPtr,
7173 Ptr<TreeNode> treeNodePtr)
7174 {
7175 jam();
7176 ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
7177 if (!data.m_fragments.isEmpty())
7178 {
7179 jam();
7180 DihScanTabCompleteRep* rep=(DihScanTabCompleteRep*)signal->getDataPtrSend();
7181 rep->tableId = treeNodePtr.p->m_tableOrIndexId;
7182 rep->scanCookie = data.m_scanCookie;
7183 sendSignal(DBDIH_REF, GSN_DIH_SCAN_TAB_COMPLETE_REP,
7184 signal, DihScanTabCompleteRep::SignalLength, JBB);
7185 }
7186 }
7187
7188 void
scanIndex_abort(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)7189 Dbspj::scanIndex_abort(Signal* signal,
7190 Ptr<Request> requestPtr,
7191 Ptr<TreeNode> treeNodePtr)
7192 {
7193 jam();
7194
7195 switch(treeNodePtr.p->m_state){
7196 case TreeNode::TN_BUILDING:
7197 case TreeNode::TN_PREPARING:
7198 case TreeNode::TN_INACTIVE:
7199 case TreeNode::TN_COMPLETING:
7200 case TreeNode::TN_END:
7201 ndbout_c("H'%.8x H'%.8x scanIndex_abort state: %u",
7202 requestPtr.p->m_transId[0],
7203 requestPtr.p->m_transId[1],
7204 treeNodePtr.p->m_state);
7205 return;
7206
7207 case TreeNode::TN_ACTIVE:
7208 jam();
7209 break;
7210 }
7211
7212 ScanFragNextReq* req = CAST_PTR(ScanFragNextReq, signal->getDataPtrSend());
7213 req->requestInfo = 0;
7214 ScanFragNextReq::setCloseFlag(req->requestInfo, 1);
7215 req->transId1 = requestPtr.p->m_transId[0];
7216 req->transId2 = requestPtr.p->m_transId[1];
7217 req->batch_size_rows = 0;
7218 req->batch_size_bytes = 0;
7219
7220 ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
7221 Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments);
7222 Ptr<ScanFragHandle> fragPtr;
7223
7224 Uint32 cnt_waiting = 0;
7225 Uint32 cnt_scanning = 0;
7226 for (list.first(fragPtr); !fragPtr.isNull(); list.next(fragPtr))
7227 {
7228 switch(fragPtr.p->m_state){
7229 case ScanFragHandle::SFH_NOT_STARTED:
7230 case ScanFragHandle::SFH_COMPLETE:
7231 case ScanFragHandle::SFH_WAIT_CLOSE:
7232 jam();
7233 break;
7234 case ScanFragHandle::SFH_WAIT_NEXTREQ:
7235 jam();
7236 cnt_waiting++; // was idle...
7237 data.m_frags_outstanding++; // is closing
7238 goto do_abort;
7239 case ScanFragHandle::SFH_SCANNING:
7240 jam();
7241 cnt_scanning++;
7242 goto do_abort;
7243 do_abort:
7244 req->senderData = fragPtr.i;
7245 sendSignal(fragPtr.p->m_ref, GSN_SCAN_NEXTREQ, signal,
7246 ScanFragNextReq::SignalLength, JBB);
7247
7248 fragPtr.p->m_state = ScanFragHandle::SFH_WAIT_CLOSE;
7249 break;
7250 }
7251 }
7252
7253 if (cnt_scanning == 0)
7254 {
7255 if (cnt_waiting > 0)
7256 {
7257 /**
7258 * If all were waiting...this should increase m_outstanding
7259 */
7260 jam();
7261 requestPtr.p->m_outstanding++;
7262 }
7263 else
7264 {
7265 /**
7266 * All fragments are either complete or not yet started, so there is
7267 * nothing to abort.
7268 */
7269 jam();
7270 ndbassert(data.m_frags_not_started > 0);
7271 ndbrequire(requestPtr.p->m_cnt_active);
7272 requestPtr.p->m_cnt_active--;
7273 treeNodePtr.p->m_state = TreeNode::TN_INACTIVE;
7274 }
7275 }
7276 }
7277
7278 Uint32
scanIndex_execNODE_FAILREP(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,NdbNodeBitmask nodes)7279 Dbspj::scanIndex_execNODE_FAILREP(Signal* signal,
7280 Ptr<Request> requestPtr,
7281 Ptr<TreeNode> treeNodePtr,
7282 NdbNodeBitmask nodes)
7283 {
7284 jam();
7285
7286 switch(treeNodePtr.p->m_state){
7287 case TreeNode::TN_PREPARING:
7288 case TreeNode::TN_INACTIVE:
7289 return 1;
7290
7291 case TreeNode::TN_BUILDING:
7292 case TreeNode::TN_COMPLETING:
7293 case TreeNode::TN_END:
7294 return 0;
7295
7296 case TreeNode::TN_ACTIVE:
7297 jam();
7298 break;
7299 }
7300
7301
7302 Uint32 sum = 0;
7303 ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
7304 Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments);
7305 Ptr<ScanFragHandle> fragPtr;
7306
7307 Uint32 save0 = data.m_frags_outstanding;
7308 Uint32 save1 = data.m_frags_complete;
7309
7310 for (list.first(fragPtr); !fragPtr.isNull(); list.next(fragPtr))
7311 {
7312 if (nodes.get(refToNode(fragPtr.p->m_ref)) == false)
7313 {
7314 jam();
7315 /**
7316 * No action needed
7317 */
7318 continue;
7319 }
7320
7321 switch(fragPtr.p->m_state){
7322 case ScanFragHandle::SFH_NOT_STARTED:
7323 jam();
7324 ndbrequire(data.m_frags_complete < data.m_fragCount);
7325 data.m_frags_complete++;
7326 ndbrequire(data.m_frags_not_started > 0);
7327 data.m_frags_not_started--;
7328 // fall through
7329 case ScanFragHandle::SFH_COMPLETE:
7330 jam();
7331 sum++; // indicate that we should abort
7332 /**
7333 * we could keep list of all fragments...
7334 * or execute DIGETNODES again...
7335 * but for now, we don't
7336 */
7337 break;
7338 case ScanFragHandle::SFH_WAIT_CLOSE:
7339 case ScanFragHandle::SFH_SCANNING:
7340 jam();
7341 ndbrequire(data.m_frags_outstanding > 0);
7342 data.m_frags_outstanding--;
7343 // fall through
7344 case ScanFragHandle::SFH_WAIT_NEXTREQ:
7345 jam();
7346 sum++;
7347 ndbrequire(data.m_frags_complete < data.m_fragCount);
7348 data.m_frags_complete++;
7349 break;
7350 }
7351 fragPtr.p->m_ref = 0;
7352 fragPtr.p->m_state = ScanFragHandle::SFH_COMPLETE;
7353 }
7354
7355 if (save0 != 0 && data.m_frags_outstanding == 0)
7356 {
7357 jam();
7358 ndbrequire(requestPtr.p->m_outstanding);
7359 requestPtr.p->m_outstanding--;
7360 }
7361
7362 if (save1 != 0 &&
7363 data.m_fragCount == (data.m_frags_complete + data.m_frags_not_started))
7364 {
7365 jam();
7366 ndbrequire(requestPtr.p->m_cnt_active);
7367 requestPtr.p->m_cnt_active--;
7368 treeNodePtr.p->m_state = TreeNode::TN_INACTIVE;
7369 }
7370
7371 return sum;
7372 }
7373
7374 void
scanIndex_release_rangekeys(Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)7375 Dbspj::scanIndex_release_rangekeys(Ptr<Request> requestPtr,
7376 Ptr<TreeNode> treeNodePtr)
7377 {
7378 jam();
7379 DEBUG("scanIndex_release_rangekeys(), tree node " << treeNodePtr.i
7380 << " m_node_no: " << treeNodePtr.p->m_node_no);
7381
7382 ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
7383 Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments);
7384 Ptr<ScanFragHandle> fragPtr;
7385
7386 if (treeNodePtr.p->m_bits & TreeNode::T_PRUNE_PATTERN)
7387 {
7388 jam();
7389 for (list.first(fragPtr); !fragPtr.isNull(); list.next(fragPtr))
7390 {
7391 if (fragPtr.p->m_rangePtrI != RNIL)
7392 {
7393 releaseSection(fragPtr.p->m_rangePtrI);
7394 fragPtr.p->m_rangePtrI = RNIL;
7395 }
7396 fragPtr.p->reset_ranges();
7397 }
7398 }
7399 else
7400 {
7401 jam();
7402 if (!list.first(fragPtr))
7403 return;
7404 if (fragPtr.p->m_rangePtrI != RNIL)
7405 {
7406 releaseSection(fragPtr.p->m_rangePtrI);
7407 fragPtr.p->m_rangePtrI = RNIL;
7408 }
7409 fragPtr.p->reset_ranges();
7410 }
7411 }
7412
7413 /**
7414 * Parent batch has completed, and will not refetch (X-joined) results
7415 * from its childs. Release & reset range keys which are unsent or we
7416 * have kept for possible resubmits.
7417 */
7418 void
scanIndex_parent_batch_cleanup(Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)7419 Dbspj::scanIndex_parent_batch_cleanup(Ptr<Request> requestPtr,
7420 Ptr<TreeNode> treeNodePtr)
7421 {
7422 DEBUG("scanIndex_parent_batch_cleanup");
7423 scanIndex_release_rangekeys(requestPtr,treeNodePtr);
7424 }
7425
7426 void
scanIndex_cleanup(Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)7427 Dbspj::scanIndex_cleanup(Ptr<Request> requestPtr,
7428 Ptr<TreeNode> treeNodePtr)
7429 {
7430 ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
7431 DEBUG("scanIndex_cleanup");
7432
7433 /**
7434 * Range keys has been collected wherever there are uncompleted
7435 * parent batches...release them to avoid memleak.
7436 */
7437 scanIndex_release_rangekeys(requestPtr,treeNodePtr);
7438
7439 // Clear fragments list head.
7440 // TODO: is this needed, all elements should already be removed and released
7441 data.m_fragments.init();
7442
7443 if (treeNodePtr.p->m_bits & TreeNode::T_PRUNE_PATTERN)
7444 {
7445 jam();
7446 LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
7447 Local_pattern_store pattern(pool, data.m_prunePattern);
7448 pattern.release();
7449 }
7450 else if (treeNodePtr.p->m_bits & TreeNode::T_CONST_PRUNE)
7451 {
7452 jam();
7453 if (data.m_constPrunePtrI != RNIL)
7454 {
7455 jam();
7456 releaseSection(data.m_constPrunePtrI);
7457 data.m_constPrunePtrI = RNIL;
7458 }
7459 }
7460
7461 cleanup_common(requestPtr, treeNodePtr);
7462 }
7463
7464 /**
7465 * END - MODULE SCAN INDEX
7466 */
7467
7468 /**
7469 * Static OpInfo handling
7470 */
7471 const Dbspj::OpInfo*
getOpInfo(Uint32 op)7472 Dbspj::getOpInfo(Uint32 op)
7473 {
7474 DEBUG("getOpInfo(" << op << ")");
7475 switch(op){
7476 case QueryNode::QN_LOOKUP:
7477 return &Dbspj::g_LookupOpInfo;
7478 case QueryNode::QN_SCAN_FRAG:
7479 return &Dbspj::g_ScanFragOpInfo;
7480 case QueryNode::QN_SCAN_INDEX:
7481 return &Dbspj::g_ScanIndexOpInfo;
7482 default:
7483 return 0;
7484 }
7485 }
7486
7487 /**
7488 * MODULE COMMON PARSE/UNPACK
7489 */
7490
7491 /**
7492 * @returns dstLen + 1 on error
7493 */
7494 static
7495 Uint32
unpackList(Uint32 dstLen,Uint32 * dst,Dbspj::DABuffer & buffer)7496 unpackList(Uint32 dstLen, Uint32 * dst, Dbspj::DABuffer & buffer)
7497 {
7498 const Uint32 * ptr = buffer.ptr;
7499 if (likely(ptr != buffer.end))
7500 {
7501 Uint32 tmp = * ptr++;
7502 Uint32 cnt = tmp & 0xFFFF;
7503
7504 * dst ++ = (tmp >> 16); // Store first
7505 DEBUG("cnt: " << cnt << " first: " << (tmp >> 16));
7506
7507 if (cnt > 1)
7508 {
7509 Uint32 len = cnt / 2;
7510 if (unlikely(cnt >= dstLen || (ptr + len > buffer.end)))
7511 goto error;
7512
7513 cnt --; // subtract item stored in header
7514
7515 for (Uint32 i = 0; i < cnt/2; i++)
7516 {
7517 * dst++ = (* ptr) & 0xFFFF;
7518 * dst++ = (* ptr) >> 16;
7519 ptr++;
7520 }
7521
7522 if (cnt & 1)
7523 {
7524 * dst ++ = * ptr & 0xFFFF;
7525 ptr++;
7526 }
7527
7528 cnt ++; // readd item stored in header
7529 }
7530 buffer.ptr = ptr;
7531 return cnt;
7532 }
7533 return 0;
7534
7535 error:
7536 return dstLen + 1;
7537 }
7538
7539 /**
7540 * This fuctions takes an array of attrinfo, and builds "header"
7541 * which can be used to do random access inside the row
7542 */
7543 Uint32
buildRowHeader(RowPtr::Header * header,SegmentedSectionPtr ptr)7544 Dbspj::buildRowHeader(RowPtr::Header * header, SegmentedSectionPtr ptr)
7545 {
7546 Uint32 tmp, len;
7547 Uint32 * dst = header->m_offset;
7548 const Uint32 * const save = dst;
7549 SectionReader r0(ptr, getSectionSegmentPool());
7550 Uint32 offset = 0;
7551 do
7552 {
7553 * dst++ = offset;
7554 r0.getWord(&tmp);
7555 len = AttributeHeader::getDataSize(tmp);
7556 offset += 1 + len;
7557 } while (r0.step(len));
7558
7559 return header->m_len = static_cast<Uint32>(dst - save);
7560 }
7561
7562 /**
7563 * This fuctions takes an array of attrinfo, and builds "header"
7564 * which can be used to do random access inside the row
7565 */
7566 Uint32
buildRowHeader(RowPtr::Header * header,const Uint32 * & src,Uint32 len)7567 Dbspj::buildRowHeader(RowPtr::Header * header, const Uint32 *& src, Uint32 len)
7568 {
7569 Uint32 * dst = header->m_offset;
7570 const Uint32 * save = dst;
7571 Uint32 offset = 0;
7572 for (Uint32 i = 0; i<len; i++)
7573 {
7574 * dst ++ = offset;
7575 Uint32 tmp = * src++;
7576 Uint32 tmp_len = AttributeHeader::getDataSize(tmp);
7577 offset += 1 + tmp_len;
7578 src += tmp_len;
7579 }
7580
7581 return header->m_len = static_cast<Uint32>(dst - save);
7582 }
7583
7584 Uint32
appendToPattern(Local_pattern_store & pattern,DABuffer & tree,Uint32 len)7585 Dbspj::appendToPattern(Local_pattern_store & pattern,
7586 DABuffer & tree, Uint32 len)
7587 {
7588 jam();
7589 if (unlikely(tree.ptr + len > tree.end))
7590 return DbspjErr::InvalidTreeNodeSpecification;
7591
7592 if (ERROR_INSERTED_CLEAR(17008))
7593 {
7594 ndbout_c("Injecting OutOfQueryMemory error 17008 at line %d file %s",
7595 __LINE__, __FILE__);
7596 jam();
7597 return DbspjErr::OutOfQueryMemory;
7598 }
7599 if (unlikely(pattern.append(tree.ptr, len)==0))
7600 return DbspjErr::OutOfQueryMemory;
7601
7602 tree.ptr += len;
7603 return 0;
7604 }
7605
7606 Uint32
appendParamToPattern(Local_pattern_store & dst,const RowPtr::Linear & row,Uint32 col)7607 Dbspj::appendParamToPattern(Local_pattern_store& dst,
7608 const RowPtr::Linear & row, Uint32 col)
7609 {
7610 jam();
7611 Uint32 offset = row.m_header->m_offset[col];
7612 const Uint32 * ptr = row.m_data + offset;
7613 Uint32 len = AttributeHeader::getDataSize(* ptr ++);
7614 /* Param COL's converted to DATA when appended to pattern */
7615 Uint32 info = QueryPattern::data(len);
7616
7617 if (ERROR_INSERTED_CLEAR(17009))
7618 {
7619 ndbout_c("Injecting OutOfQueryMemory error 17009 at line %d file %s",
7620 __LINE__, __FILE__);
7621 jam();
7622 return DbspjErr::OutOfQueryMemory;
7623 }
7624
7625 return dst.append(&info,1) && dst.append(ptr,len) ? 0 : DbspjErr::OutOfQueryMemory;
7626 }
7627
7628 #ifdef ERROR_INSERT
7629 static int fi_cnt = 0;
7630 bool
appendToSection(Uint32 & firstSegmentIVal,const Uint32 * src,Uint32 len)7631 Dbspj::appendToSection(Uint32& firstSegmentIVal,
7632 const Uint32* src, Uint32 len)
7633 {
7634 if (ERROR_INSERTED(17510) && fi_cnt++ % 13 == 0)
7635 {
7636 jam();
7637 ndbout_c("Injecting appendToSection error 17510 at line %d file %s",
7638 __LINE__, __FILE__);
7639 return false;
7640 }
7641 else
7642 {
7643 return SimulatedBlock::appendToSection(firstSegmentIVal, src, len);
7644 }
7645 }
7646 #endif
7647
7648 Uint32
appendParamHeadToPattern(Local_pattern_store & dst,const RowPtr::Linear & row,Uint32 col)7649 Dbspj::appendParamHeadToPattern(Local_pattern_store& dst,
7650 const RowPtr::Linear & row, Uint32 col)
7651 {
7652 jam();
7653 Uint32 offset = row.m_header->m_offset[col];
7654 const Uint32 * ptr = row.m_data + offset;
7655 Uint32 len = AttributeHeader::getDataSize(*ptr);
7656 /* Param COL's converted to DATA when appended to pattern */
7657 Uint32 info = QueryPattern::data(len+1);
7658
7659 if (ERROR_INSERTED_CLEAR(17010))
7660 {
7661 ndbout_c("Injecting OutOfQueryMemory error 17010 at line %d file %s",
7662 __LINE__, __FILE__);
7663 jam();
7664 return DbspjErr::OutOfQueryMemory;
7665 }
7666
7667 return dst.append(&info,1) && dst.append(ptr,len+1) ? 0 : DbspjErr::OutOfQueryMemory;
7668 }
7669
7670 Uint32
appendTreeToSection(Uint32 & ptrI,SectionReader & tree,Uint32 len)7671 Dbspj::appendTreeToSection(Uint32 & ptrI, SectionReader & tree, Uint32 len)
7672 {
7673 /**
7674 * TODO handle errors
7675 */
7676 jam();
7677 Uint32 SZ = 16;
7678 Uint32 tmp[16];
7679 while (len > SZ)
7680 {
7681 jam();
7682 tree.getWords(tmp, SZ);
7683 if (!appendToSection(ptrI, tmp, SZ))
7684 return DbspjErr::OutOfSectionMemory;
7685 len -= SZ;
7686 }
7687
7688 tree.getWords(tmp, len);
7689 if (!appendToSection(ptrI, tmp, len))
7690 return DbspjErr::OutOfSectionMemory;
7691
7692 return 0;
7693 }
7694
7695 void
getCorrelationData(const RowPtr::Section & row,Uint32 col,Uint32 & correlationNumber)7696 Dbspj::getCorrelationData(const RowPtr::Section & row,
7697 Uint32 col,
7698 Uint32& correlationNumber)
7699 {
7700 /**
7701 * TODO handle errors
7702 */
7703 SegmentedSectionPtr ptr(row.m_dataPtr);
7704 SectionReader reader(ptr, getSectionSegmentPool());
7705 Uint32 offset = row.m_header->m_offset[col];
7706 ndbrequire(reader.step(offset));
7707 Uint32 tmp;
7708 ndbrequire(reader.getWord(&tmp));
7709 Uint32 len = AttributeHeader::getDataSize(tmp);
7710 ndbrequire(len == 1);
7711 ndbrequire(AttributeHeader::getAttributeId(tmp) == AttributeHeader::CORR_FACTOR32);
7712 ndbrequire(reader.getWord(&correlationNumber));
7713 }
7714
7715 void
getCorrelationData(const RowPtr::Linear & row,Uint32 col,Uint32 & correlationNumber)7716 Dbspj::getCorrelationData(const RowPtr::Linear & row,
7717 Uint32 col,
7718 Uint32& correlationNumber)
7719 {
7720 /**
7721 * TODO handle errors
7722 */
7723 Uint32 offset = row.m_header->m_offset[col];
7724 Uint32 tmp = row.m_data[offset];
7725 Uint32 len = AttributeHeader::getDataSize(tmp);
7726 ndbrequire(len == 1);
7727 ndbrequire(AttributeHeader::getAttributeId(tmp) == AttributeHeader::CORR_FACTOR32);
7728 correlationNumber = row.m_data[offset+1];
7729 }
7730
7731 Uint32
appendColToSection(Uint32 & dst,const RowPtr::Section & row,Uint32 col,bool & hasNull)7732 Dbspj::appendColToSection(Uint32 & dst, const RowPtr::Section & row,
7733 Uint32 col, bool& hasNull)
7734 {
7735 jam();
7736 /**
7737 * TODO handle errors
7738 */
7739 SegmentedSectionPtr ptr(row.m_dataPtr);
7740 SectionReader reader(ptr, getSectionSegmentPool());
7741 Uint32 offset = row.m_header->m_offset[col];
7742 ndbrequire(reader.step(offset));
7743 Uint32 tmp;
7744 ndbrequire(reader.getWord(&tmp));
7745 Uint32 len = AttributeHeader::getDataSize(tmp);
7746 if (unlikely(len==0))
7747 {
7748 jam();
7749 hasNull = true; // NULL-value in key
7750 return 0;
7751 }
7752 return appendTreeToSection(dst, reader, len);
7753 }
7754
7755 Uint32
appendColToSection(Uint32 & dst,const RowPtr::Linear & row,Uint32 col,bool & hasNull)7756 Dbspj::appendColToSection(Uint32 & dst, const RowPtr::Linear & row,
7757 Uint32 col, bool& hasNull)
7758 {
7759 jam();
7760 Uint32 offset = row.m_header->m_offset[col];
7761 const Uint32 * ptr = row.m_data + offset;
7762 Uint32 len = AttributeHeader::getDataSize(* ptr ++);
7763 if (unlikely(len==0))
7764 {
7765 jam();
7766 hasNull = true; // NULL-value in key
7767 return 0;
7768 }
7769 return appendToSection(dst, ptr, len) ? 0 : DbspjErr::OutOfSectionMemory;
7770 }
7771
7772 Uint32
appendAttrinfoToSection(Uint32 & dst,const RowPtr::Linear & row,Uint32 col,bool & hasNull)7773 Dbspj::appendAttrinfoToSection(Uint32 & dst, const RowPtr::Linear & row,
7774 Uint32 col, bool& hasNull)
7775 {
7776 jam();
7777 Uint32 offset = row.m_header->m_offset[col];
7778 const Uint32 * ptr = row.m_data + offset;
7779 Uint32 len = AttributeHeader::getDataSize(* ptr);
7780 if (unlikely(len==0))
7781 {
7782 jam();
7783 hasNull = true; // NULL-value in key
7784 }
7785 return appendToSection(dst, ptr, 1 + len) ? 0 : DbspjErr::OutOfSectionMemory;
7786 }
7787
7788 Uint32
appendAttrinfoToSection(Uint32 & dst,const RowPtr::Section & row,Uint32 col,bool & hasNull)7789 Dbspj::appendAttrinfoToSection(Uint32 & dst, const RowPtr::Section & row,
7790 Uint32 col, bool& hasNull)
7791 {
7792 jam();
7793 /**
7794 * TODO handle errors
7795 */
7796 SegmentedSectionPtr ptr(row.m_dataPtr);
7797 SectionReader reader(ptr, getSectionSegmentPool());
7798 Uint32 offset = row.m_header->m_offset[col];
7799 ndbrequire(reader.step(offset));
7800 Uint32 tmp;
7801 ndbrequire(reader.peekWord(&tmp));
7802 Uint32 len = AttributeHeader::getDataSize(tmp);
7803 if (unlikely(len==0))
7804 {
7805 jam();
7806 hasNull = true; // NULL-value in key
7807 }
7808 return appendTreeToSection(dst, reader, 1 + len);
7809 }
7810
7811 /**
7812 * 'PkCol' is the composite NDB$PK column in an unique index consisting of
7813 * a fragment id and the composite PK value (all PK columns concatenated)
7814 */
7815 Uint32
appendPkColToSection(Uint32 & dst,const RowPtr::Section & row,Uint32 col)7816 Dbspj::appendPkColToSection(Uint32 & dst, const RowPtr::Section & row, Uint32 col)
7817 {
7818 jam();
7819 /**
7820 * TODO handle errors
7821 */
7822 SegmentedSectionPtr ptr(row.m_dataPtr);
7823 SectionReader reader(ptr, getSectionSegmentPool());
7824 Uint32 offset = row.m_header->m_offset[col];
7825 ndbrequire(reader.step(offset));
7826 Uint32 tmp;
7827 ndbrequire(reader.getWord(&tmp));
7828 Uint32 len = AttributeHeader::getDataSize(tmp);
7829 ndbrequire(len>1); // NULL-value in PkKey is an error
7830 ndbrequire(reader.step(1)); // Skip fragid
7831 return appendTreeToSection(dst, reader, len-1);
7832 }
7833
7834 /**
7835 * 'PkCol' is the composite NDB$PK column in an unique index consisting of
7836 * a fragment id and the composite PK value (all PK columns concatenated)
7837 */
7838 Uint32
appendPkColToSection(Uint32 & dst,const RowPtr::Linear & row,Uint32 col)7839 Dbspj::appendPkColToSection(Uint32 & dst, const RowPtr::Linear & row, Uint32 col)
7840 {
7841 jam();
7842 Uint32 offset = row.m_header->m_offset[col];
7843 Uint32 tmp = row.m_data[offset];
7844 Uint32 len = AttributeHeader::getDataSize(tmp);
7845 ndbrequire(len>1); // NULL-value in PkKey is an error
7846 return appendToSection(dst, row.m_data+offset+2, len - 1) ? 0 : DbspjErr::OutOfSectionMemory;
7847 }
7848
7849 Uint32
appendFromParent(Uint32 & dst,Local_pattern_store & pattern,Local_pattern_store::ConstDataBufferIterator & it,Uint32 levels,const RowPtr & rowptr,bool & hasNull)7850 Dbspj::appendFromParent(Uint32 & dst, Local_pattern_store& pattern,
7851 Local_pattern_store::ConstDataBufferIterator& it,
7852 Uint32 levels, const RowPtr & rowptr,
7853 bool& hasNull)
7854 {
7855 jam();
7856 Ptr<TreeNode> treeNodePtr;
7857 m_treenode_pool.getPtr(treeNodePtr, rowptr.m_src_node_ptrI);
7858 Uint32 corrVal = rowptr.m_src_correlation;
7859 RowPtr targetRow;
7860 DEBUG("appendFromParent-of"
7861 << " node: " << treeNodePtr.p->m_node_no);
7862 while (levels--)
7863 {
7864 jam();
7865 if (unlikely(treeNodePtr.p->m_parentPtrI == RNIL))
7866 {
7867 DEBUG_CRASH();
7868 return DbspjErr::InvalidPattern;
7869 }
7870 m_treenode_pool.getPtr(treeNodePtr, treeNodePtr.p->m_parentPtrI);
7871 DEBUG("appendFromParent"
7872 << ", node: " << treeNodePtr.p->m_node_no);
7873 if (unlikely(treeNodePtr.p->m_rows.m_type != RowCollection::COLLECTION_MAP))
7874 {
7875 DEBUG_CRASH();
7876 return DbspjErr::InvalidPattern;
7877 }
7878
7879 RowRef ref;
7880 treeNodePtr.p->m_rows.m_map.copyto(ref);
7881 const Uint32* const mapptr = get_row_ptr(ref);
7882
7883 Uint32 pos = corrVal >> 16; // parent corr-val
7884 if (unlikely(! (pos < treeNodePtr.p->m_rows.m_map.m_size)))
7885 {
7886 DEBUG_CRASH();
7887 return DbspjErr::InvalidPattern;
7888 }
7889
7890 // load ref to parent row
7891 treeNodePtr.p->m_rows.m_map.load(mapptr, pos, ref);
7892
7893 const Uint32* const rowptr = get_row_ptr(ref);
7894 setupRowPtr(treeNodePtr.p->m_rows, targetRow, ref, rowptr);
7895
7896 if (levels)
7897 {
7898 jam();
7899 getCorrelationData(targetRow.m_row_data.m_linear,
7900 targetRow.m_row_data.m_linear.m_header->m_len - 1,
7901 corrVal);
7902 }
7903 }
7904
7905 if (unlikely(it.isNull()))
7906 {
7907 DEBUG_CRASH();
7908 return DbspjErr::InvalidPattern;
7909 }
7910
7911 Uint32 info = *it.data;
7912 Uint32 type = QueryPattern::getType(info);
7913 Uint32 val = QueryPattern::getLength(info);
7914 pattern.next(it);
7915 switch(type){
7916 case QueryPattern::P_COL:
7917 jam();
7918 return appendColToSection(dst, targetRow.m_row_data.m_linear, val, hasNull);
7919 case QueryPattern::P_UNQ_PK:
7920 jam();
7921 return appendPkColToSection(dst, targetRow.m_row_data.m_linear, val);
7922 case QueryPattern::P_ATTRINFO:
7923 jam();
7924 return appendAttrinfoToSection(dst, targetRow.m_row_data.m_linear, val, hasNull);
7925 case QueryPattern::P_DATA:
7926 jam();
7927 // retreiving DATA from parent...is...an error
7928 DEBUG_CRASH();
7929 return DbspjErr::InvalidPattern;
7930 case QueryPattern::P_PARENT:
7931 jam();
7932 // no point in nesting P_PARENT...an error
7933 DEBUG_CRASH();
7934 return DbspjErr::InvalidPattern;
7935 case QueryPattern::P_PARAM:
7936 case QueryPattern::P_PARAM_HEADER:
7937 jam();
7938 // should have been expanded during build
7939 DEBUG_CRASH();
7940 return DbspjErr::InvalidPattern;
7941 default:
7942 jam();
7943 DEBUG_CRASH();
7944 return DbspjErr::InvalidPattern;
7945 }
7946 }
7947
7948 Uint32
appendDataToSection(Uint32 & ptrI,Local_pattern_store & pattern,Local_pattern_store::ConstDataBufferIterator & it,Uint32 len,bool & hasNull)7949 Dbspj::appendDataToSection(Uint32 & ptrI,
7950 Local_pattern_store& pattern,
7951 Local_pattern_store::ConstDataBufferIterator& it,
7952 Uint32 len, bool& hasNull)
7953 {
7954 jam();
7955 if (unlikely(len==0))
7956 {
7957 jam();
7958 hasNull = true;
7959 return 0;
7960 }
7961
7962 #if 0
7963 /**
7964 * TODO handle errors
7965 */
7966 Uint32 tmp[NDB_SECTION_SEGMENT_SZ];
7967 while (len > NDB_SECTION_SEGMENT_SZ)
7968 {
7969 pattern.copyout(tmp, NDB_SECTION_SEGMENT_SZ, it);
7970 appendToSection(ptrI, tmp, NDB_SECTION_SEGMENT_SZ);
7971 len -= NDB_SECTION_SEGMENT_SZ;
7972 }
7973
7974 pattern.copyout(tmp, len, it);
7975 appendToSection(ptrI, tmp, len);
7976 return 0;
7977 #else
7978 Uint32 remaining = len;
7979 Uint32 dstIdx = 0;
7980 Uint32 tmp[NDB_SECTION_SEGMENT_SZ];
7981
7982 while (remaining > 0 && !it.isNull())
7983 {
7984 tmp[dstIdx] = *it.data;
7985 remaining--;
7986 dstIdx++;
7987 pattern.next(it);
7988 if (dstIdx == NDB_SECTION_SEGMENT_SZ || remaining == 0)
7989 {
7990 if (!appendToSection(ptrI, tmp, dstIdx))
7991 {
7992 jam();
7993 return DbspjErr::OutOfSectionMemory;
7994 }
7995 dstIdx = 0;
7996 }
7997 }
7998 if (remaining > 0)
7999 {
8000 DEBUG_CRASH();
8001 return DbspjErr::InvalidPattern;
8002 }
8003 else
8004 {
8005 return 0;
8006 }
8007 #endif
8008 }
8009
8010 /**
8011 * This function takes a pattern and a row and expands it into a section
8012 */
8013 Uint32
expandS(Uint32 & _dst,Local_pattern_store & pattern,const RowPtr & row,bool & hasNull)8014 Dbspj::expandS(Uint32 & _dst, Local_pattern_store& pattern,
8015 const RowPtr & row, bool& hasNull)
8016 {
8017 Uint32 err;
8018 Uint32 dst = _dst;
8019 hasNull = false;
8020 Local_pattern_store::ConstDataBufferIterator it;
8021 pattern.first(it);
8022 while (!it.isNull())
8023 {
8024 Uint32 info = *it.data;
8025 Uint32 type = QueryPattern::getType(info);
8026 Uint32 val = QueryPattern::getLength(info);
8027 pattern.next(it);
8028 switch(type){
8029 case QueryPattern::P_COL:
8030 jam();
8031 err = appendColToSection(dst, row.m_row_data.m_section, val, hasNull);
8032 break;
8033 case QueryPattern::P_UNQ_PK:
8034 jam();
8035 err = appendPkColToSection(dst, row.m_row_data.m_section, val);
8036 break;
8037 case QueryPattern::P_ATTRINFO:
8038 jam();
8039 err = appendAttrinfoToSection(dst, row.m_row_data.m_section, val, hasNull);
8040 break;
8041 case QueryPattern::P_DATA:
8042 jam();
8043 err = appendDataToSection(dst, pattern, it, val, hasNull);
8044 break;
8045 case QueryPattern::P_PARENT:
8046 jam();
8047 // P_PARENT is a prefix to another pattern token
8048 // that permits code to access rows from earlier than immediate parent.
8049 // val is no of levels to move up the tree
8050 err = appendFromParent(dst, pattern, it, val, row, hasNull);
8051 break;
8052 // PARAM's was converted to DATA by ::expand(pattern...)
8053 case QueryPattern::P_PARAM:
8054 case QueryPattern::P_PARAM_HEADER:
8055 default:
8056 jam();
8057 err = DbspjErr::InvalidPattern;
8058 DEBUG_CRASH();
8059 }
8060 if (unlikely(err != 0))
8061 {
8062 jam();
8063 _dst = dst;
8064 return err;
8065 }
8066 }
8067
8068 _dst = dst;
8069 return 0;
8070 }
8071
8072 /**
8073 * This function takes a pattern and a row and expands it into a section
8074 */
8075 Uint32
expandL(Uint32 & _dst,Local_pattern_store & pattern,const RowPtr & row,bool & hasNull)8076 Dbspj::expandL(Uint32 & _dst, Local_pattern_store& pattern,
8077 const RowPtr & row, bool& hasNull)
8078 {
8079 Uint32 err;
8080 Uint32 dst = _dst;
8081 hasNull = false;
8082 Local_pattern_store::ConstDataBufferIterator it;
8083 pattern.first(it);
8084 while (!it.isNull())
8085 {
8086 Uint32 info = *it.data;
8087 Uint32 type = QueryPattern::getType(info);
8088 Uint32 val = QueryPattern::getLength(info);
8089 pattern.next(it);
8090 switch(type){
8091 case QueryPattern::P_COL:
8092 jam();
8093 err = appendColToSection(dst, row.m_row_data.m_linear, val, hasNull);
8094 break;
8095 case QueryPattern::P_UNQ_PK:
8096 jam();
8097 err = appendPkColToSection(dst, row.m_row_data.m_linear, val);
8098 break;
8099 case QueryPattern::P_ATTRINFO:
8100 jam();
8101 err = appendAttrinfoToSection(dst, row.m_row_data.m_linear, val, hasNull);
8102 break;
8103 case QueryPattern::P_DATA:
8104 jam();
8105 err = appendDataToSection(dst, pattern, it, val, hasNull);
8106 break;
8107 case QueryPattern::P_PARENT:
8108 jam();
8109 // P_PARENT is a prefix to another pattern token
8110 // that permits code to access rows from earlier than immediate parent
8111 // val is no of levels to move up the tree
8112 err = appendFromParent(dst, pattern, it, val, row, hasNull);
8113 break;
8114 // PARAM's was converted to DATA by ::expand(pattern...)
8115 case QueryPattern::P_PARAM:
8116 case QueryPattern::P_PARAM_HEADER:
8117 default:
8118 jam();
8119 err = DbspjErr::InvalidPattern;
8120 DEBUG_CRASH();
8121 }
8122 if (unlikely(err != 0))
8123 {
8124 jam();
8125 _dst = dst;
8126 return err;
8127 }
8128 }
8129
8130 _dst = dst;
8131 return 0;
8132 }
8133
8134 /* ::expand() used during initial 'build' phase on 'tree' + 'param' from API */
8135 Uint32
expand(Uint32 & ptrI,DABuffer & pattern,Uint32 len,DABuffer & param,Uint32 paramCnt,bool & hasNull)8136 Dbspj::expand(Uint32 & ptrI, DABuffer& pattern, Uint32 len,
8137 DABuffer& param, Uint32 paramCnt, bool& hasNull)
8138 {
8139 jam();
8140 /**
8141 * TODO handle error
8142 */
8143 Uint32 err = 0;
8144 Uint32 tmp[1+MAX_ATTRIBUTES_IN_TABLE];
8145 struct RowPtr::Linear row;
8146 row.m_data = param.ptr;
8147 row.m_header = CAST_PTR(RowPtr::Header, &tmp[0]);
8148 buildRowHeader(CAST_PTR(RowPtr::Header, &tmp[0]), param.ptr, paramCnt);
8149
8150 Uint32 dst = ptrI;
8151 const Uint32 * ptr = pattern.ptr;
8152 const Uint32 * end = ptr + len;
8153 hasNull = false;
8154
8155 for (; ptr < end; )
8156 {
8157 Uint32 info = * ptr++;
8158 Uint32 type = QueryPattern::getType(info);
8159 Uint32 val = QueryPattern::getLength(info);
8160 switch(type){
8161 case QueryPattern::P_PARAM:
8162 jam();
8163 ndbassert(val < paramCnt);
8164 err = appendColToSection(dst, row, val, hasNull);
8165 break;
8166 case QueryPattern::P_PARAM_HEADER:
8167 jam();
8168 ndbassert(val < paramCnt);
8169 err = appendAttrinfoToSection(dst, row, val, hasNull);
8170 break;
8171 case QueryPattern::P_DATA:
8172 if (unlikely(val==0))
8173 {
8174 jam();
8175 hasNull = true;
8176 }
8177 else if (likely(appendToSection(dst, ptr, val)))
8178 {
8179 jam();
8180 ptr += val;
8181 }
8182 else
8183 {
8184 jam();
8185 err = DbspjErr::OutOfSectionMemory;
8186 }
8187 break;
8188 case QueryPattern::P_COL: // (linked) COL's not expected here
8189 case QueryPattern::P_PARENT: // Prefix to P_COL
8190 case QueryPattern::P_ATTRINFO:
8191 case QueryPattern::P_UNQ_PK:
8192 default:
8193 jam();
8194 jamLine(type);
8195 err = DbspjErr::InvalidPattern;
8196 }
8197 if (unlikely(err != 0))
8198 {
8199 jam();
8200 ptrI = dst;
8201 return err;
8202 }
8203 }
8204
8205 /**
8206 * Iterate forward
8207 */
8208 pattern.ptr = end;
8209 ptrI = dst;
8210 return 0;
8211 }
8212
8213 /* ::expand() used during initial 'build' phase on 'tree' + 'param' from API */
8214 Uint32
expand(Local_pattern_store & dst,Ptr<TreeNode> treeNodePtr,DABuffer & pattern,Uint32 len,DABuffer & param,Uint32 paramCnt)8215 Dbspj::expand(Local_pattern_store& dst, Ptr<TreeNode> treeNodePtr,
8216 DABuffer& pattern, Uint32 len,
8217 DABuffer& param, Uint32 paramCnt)
8218 {
8219 jam();
8220 /**
8221 * TODO handle error
8222 */
8223 Uint32 err;
8224 Uint32 tmp[1+MAX_ATTRIBUTES_IN_TABLE];
8225 struct RowPtr::Linear row;
8226 row.m_header = CAST_PTR(RowPtr::Header, &tmp[0]);
8227 row.m_data = param.ptr;
8228 buildRowHeader(CAST_PTR(RowPtr::Header, &tmp[0]), param.ptr, paramCnt);
8229
8230 const Uint32 * end = pattern.ptr + len;
8231 for (; pattern.ptr < end; )
8232 {
8233 Uint32 info = *pattern.ptr;
8234 Uint32 type = QueryPattern::getType(info);
8235 Uint32 val = QueryPattern::getLength(info);
8236 switch(type){
8237 case QueryPattern::P_COL:
8238 case QueryPattern::P_UNQ_PK:
8239 case QueryPattern::P_ATTRINFO:
8240 jam();
8241 err = appendToPattern(dst, pattern, 1);
8242 break;
8243 case QueryPattern::P_DATA:
8244 jam();
8245 err = appendToPattern(dst, pattern, val+1);
8246 break;
8247 case QueryPattern::P_PARAM:
8248 jam();
8249 // NOTE: Converted to P_DATA by appendParamToPattern
8250 ndbassert(val < paramCnt);
8251 err = appendParamToPattern(dst, row, val);
8252 pattern.ptr++;
8253 break;
8254 case QueryPattern::P_PARAM_HEADER:
8255 jam();
8256 // NOTE: Converted to P_DATA by appendParamHeadToPattern
8257 ndbassert(val < paramCnt);
8258 err = appendParamHeadToPattern(dst, row, val);
8259 pattern.ptr++;
8260 break;
8261 case QueryPattern::P_PARENT: // Prefix to P_COL
8262 {
8263 jam();
8264 err = appendToPattern(dst, pattern, 1);
8265 if (unlikely(err))
8266 {
8267 jam();
8268 break;
8269 }
8270 // Locate requested grandparent and request it to
8271 // T_ROW_BUFFER its result rows
8272 Ptr<TreeNode> parentPtr;
8273 m_treenode_pool.getPtr(parentPtr, treeNodePtr.p->m_parentPtrI);
8274 while (val--)
8275 {
8276 jam();
8277 ndbassert(parentPtr.p->m_parentPtrI != RNIL);
8278 m_treenode_pool.getPtr(parentPtr, parentPtr.p->m_parentPtrI);
8279 parentPtr.p->m_bits |= TreeNode::T_ROW_BUFFER;
8280 parentPtr.p->m_bits |= TreeNode::T_ROW_BUFFER_MAP;
8281 }
8282 Ptr<Request> requestPtr;
8283 m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
8284 requestPtr.p->m_bits |= Request::RT_ROW_BUFFERS;
8285 break;
8286 }
8287 default:
8288 err = DbspjErr::InvalidPattern;
8289 jam();
8290 }
8291
8292 if (unlikely(err != 0))
8293 {
8294 jam();
8295 return err;
8296 }
8297 }
8298 return 0;
8299 }
8300
8301 Uint32
parseDA(Build_context & ctx,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,DABuffer & tree,Uint32 treeBits,DABuffer & param,Uint32 paramBits)8302 Dbspj::parseDA(Build_context& ctx,
8303 Ptr<Request> requestPtr,
8304 Ptr<TreeNode> treeNodePtr,
8305 DABuffer& tree, Uint32 treeBits,
8306 DABuffer& param, Uint32 paramBits)
8307 {
8308 Uint32 err;
8309 Uint32 attrInfoPtrI = RNIL;
8310 Uint32 attrParamPtrI = RNIL;
8311
8312 do
8313 {
8314 /**
8315 * Test execution terminated due to 'OutOfSectionMemory' which
8316 * may happen multiple places (eg. appendtosection, expand) below:
8317 * - 17050: Fail on parseDA at first call
8318 * - 17051: Fail on parseDA if 'isLeaf'
8319 * - 17052: Fail on parseDA if treeNode not root
8320 * - 17053: Fail on parseDA at a random node of the query tree
8321 */
8322 if (ERROR_INSERTED(17050) ||
8323 (ERROR_INSERTED(17051) && (treeNodePtr.p->isLeaf())) ||
8324 (ERROR_INSERTED(17052) && (treeNodePtr.p->m_parentPtrI != RNIL)) ||
8325 (ERROR_INSERTED(17053) && (rand() % 7) == 0))
8326 {
8327 jam();
8328 CLEAR_ERROR_INSERT_VALUE;
8329 ndbout_c("Injecting OutOfSectionMemory error at line %d file %s",
8330 __LINE__, __FILE__);
8331 err = DbspjErr::OutOfSectionMemory;
8332 break;
8333 }
8334
8335 if (treeBits & DABits::NI_REPEAT_SCAN_RESULT)
8336 {
8337 jam();
8338 DEBUG("use REPEAT_SCAN_RESULT when returning results");
8339 requestPtr.p->m_bits |= Request::RT_REPEAT_SCAN_RESULT;
8340 } // DABits::NI_HAS_PARENT
8341
8342 if (treeBits & DABits::NI_HAS_PARENT)
8343 {
8344 jam();
8345 DEBUG("NI_HAS_PARENT");
8346 /**
8347 * OPTIONAL PART 1:
8348 *
8349 * Parent nodes are stored first in optional part
8350 * this is a list of 16-bit numbers refering to
8351 * *earlier* nodes in tree
8352 * the list stores length of list as first 16-bit
8353 */
8354 err = DbspjErr::InvalidTreeNodeSpecification;
8355 Uint32 dst[63];
8356 Uint32 cnt = unpackList(NDB_ARRAY_SIZE(dst), dst, tree);
8357 if (unlikely(cnt > NDB_ARRAY_SIZE(dst)))
8358 {
8359 jam();
8360 break;
8361 }
8362
8363 if (unlikely(cnt!=1))
8364 {
8365 /**
8366 * Only a single parent supported for now, i.e only trees
8367 */
8368 jam();
8369 break;
8370 }
8371
8372 err = 0;
8373 for (Uint32 i = 0; i<cnt; i++)
8374 {
8375 DEBUG("adding " << dst[i] << " as parent");
8376 Ptr<TreeNode> parentPtr = ctx.m_node_list[dst[i]];
8377 LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
8378 Local_dependency_map map(pool, parentPtr.p->m_dependent_nodes);
8379 if (unlikely(!map.append(&treeNodePtr.i, 1)))
8380 {
8381 err = DbspjErr::OutOfQueryMemory;
8382 jam();
8383 break;
8384 }
8385 parentPtr.p->m_bits &= ~(Uint32)TreeNode::T_LEAF;
8386 treeNodePtr.p->m_parentPtrI = parentPtr.i;
8387
8388 // Build Bitmask of all ancestors to treeNode
8389 treeNodePtr.p->m_ancestors = parentPtr.p->m_ancestors;
8390 treeNodePtr.p->m_ancestors.set(parentPtr.p->m_node_no);
8391 }
8392
8393 if (unlikely(err != 0))
8394 break;
8395 } // DABits::NI_HAS_PARENT
8396
8397 err = DbspjErr::InvalidTreeParametersSpecificationKeyParamBitsMissmatch;
8398 if (unlikely( ((treeBits & DABits::NI_KEY_PARAMS)==0) !=
8399 ((paramBits & DABits::PI_KEY_PARAMS)==0)))
8400 {
8401 jam();
8402 break;
8403 }
8404
8405 if (treeBits & (DABits::NI_KEY_PARAMS
8406 | DABits::NI_KEY_LINKED
8407 | DABits::NI_KEY_CONSTS))
8408 {
8409 jam();
8410 DEBUG("NI_KEY_PARAMS | NI_KEY_LINKED | NI_KEY_CONSTS");
8411
8412 /**
8413 * OPTIONAL PART 2:
8414 *
8415 * If keys are parametrized or linked
8416 * DATA0[LO/HI] - Length of key pattern/#parameters to key
8417 */
8418 Uint32 len_cnt = * tree.ptr ++;
8419 Uint32 len = len_cnt & 0xFFFF; // length of pattern in words
8420 Uint32 cnt = len_cnt >> 16; // no of parameters
8421
8422 LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
8423 Local_pattern_store pattern(pool, treeNodePtr.p->m_keyPattern);
8424
8425 err = DbspjErr::InvalidTreeParametersSpecificationIncorrectKeyParamCount;
8426 if (unlikely( ((cnt==0) != ((treeBits & DABits::NI_KEY_PARAMS) == 0)) ||
8427 ((cnt==0) != ((paramBits & DABits::PI_KEY_PARAMS) == 0))))
8428 {
8429 jam();
8430 break;
8431 }
8432
8433 if (treeBits & DABits::NI_KEY_LINKED)
8434 {
8435 jam();
8436 DEBUG("LINKED-KEY PATTERN w/ " << cnt << " PARAM values");
8437 /**
8438 * Expand pattern into a new pattern (with linked values)
8439 */
8440 err = expand(pattern, treeNodePtr, tree, len, param, cnt);
8441 if (unlikely(err != 0))
8442 {
8443 jam();
8444 break;
8445 }
8446 /**
8447 * This node constructs a new key for each send
8448 */
8449 treeNodePtr.p->m_bits |= TreeNode::T_KEYINFO_CONSTRUCTED;
8450 }
8451 else
8452 {
8453 jam();
8454 DEBUG("FIXED-KEY w/ " << cnt << " PARAM values");
8455 /**
8456 * Expand pattern directly into keyinfo
8457 * This means a "fixed" key from here on
8458 */
8459 bool hasNull;
8460 Uint32 keyInfoPtrI = RNIL;
8461 err = expand(keyInfoPtrI, tree, len, param, cnt, hasNull);
8462 if (unlikely(err != 0))
8463 {
8464 jam();
8465 releaseSection(keyInfoPtrI);
8466 break;
8467 }
8468 if (unlikely(hasNull))
8469 {
8470 /* API should have elliminated requests w/ const-NULL keys */
8471 jam();
8472 DEBUG("BEWARE: FIXED-key contain NULL values");
8473 releaseSection(keyInfoPtrI);
8474 // treeNodePtr.p->m_bits |= TreeNode::T_NULL_PRUNE;
8475 // break;
8476 ndbrequire(false);
8477 }
8478 treeNodePtr.p->m_send.m_keyInfoPtrI = keyInfoPtrI;
8479 }
8480 ndbassert(err == 0); // All errors should have been handled
8481 } // DABits::NI_KEY_...
8482
8483 const Uint32 mask =
8484 DABits::NI_LINKED_ATTR | DABits::NI_ATTR_INTERPRET |
8485 DABits::NI_ATTR_LINKED | DABits::NI_ATTR_PARAMS;
8486
8487 if (((treeBits & mask) | (paramBits & DABits::PI_ATTR_LIST)) != 0)
8488 {
8489 jam();
8490 /**
8491 * OPTIONAL PART 3: attrinfo handling
8492 * - NI_LINKED_ATTR - these are attributes to be passed to children
8493 * - PI_ATTR_LIST - this is "user-columns" (passed as parameters)
8494
8495 * - NI_ATTR_INTERPRET - tree contains interpreted program
8496 * - NI_ATTR_LINKED - means that the attr-info contains linked-values
8497 * - NI_ATTR_PARAMS - means that the attr-info is parameterized
8498 * PI_ATTR_PARAMS - means that the parameters contains attr parameters
8499 *
8500 * IF NI_ATTR_INTERPRET
8501 * DATA0[LO/HI] = Length of program / total #arguments to program
8502 * DATA1..N = Program
8503 *
8504 * IF NI_ATTR_PARAMS
8505 * DATA0[LO/HI] = Length / #param
8506 * DATA1..N = PARAM-0...PARAM-M
8507 *
8508 * IF PI_ATTR_INTERPRET
8509 * DATA0[LO/HI] = Length of program / Length of subroutine-part
8510 * DATA1..N = Program (scan filter)
8511 *
8512 * IF NI_ATTR_LINKED
8513 * DATA0[LO/HI] = Length / #
8514 *
8515 *
8516 */
8517 Uint32 sections[5] = { 0, 0, 0, 0, 0 };
8518 Uint32 * sectionptrs = 0;
8519
8520 bool interpreted =
8521 (treeBits & DABits::NI_ATTR_INTERPRET) ||
8522 (paramBits & DABits::PI_ATTR_INTERPRET) ||
8523 (treeNodePtr.p->m_bits & TreeNode::T_ATTR_INTERPRETED);
8524
8525 if (interpreted)
8526 {
8527 /**
8528 * Add section headers for interpreted execution
8529 * and create pointer so that they can be updated later
8530 */
8531 jam();
8532 err = DbspjErr::OutOfSectionMemory;
8533 if (unlikely(!appendToSection(attrInfoPtrI, sections, 5)))
8534 {
8535 jam();
8536 break;
8537 }
8538
8539 SegmentedSectionPtr ptr;
8540 getSection(ptr, attrInfoPtrI);
8541 sectionptrs = ptr.p->theData;
8542
8543 if (treeBits & DABits::NI_ATTR_INTERPRET)
8544 {
8545 jam();
8546
8547 /**
8548 * Having two interpreter programs is an error.
8549 */
8550 err = DbspjErr::BothTreeAndParametersContainInterpretedProgram;
8551 if (unlikely(paramBits & DABits::PI_ATTR_INTERPRET))
8552 {
8553 jam();
8554 break;
8555 }
8556
8557 treeNodePtr.p->m_bits |= TreeNode::T_ATTR_INTERPRETED;
8558 Uint32 len2 = * tree.ptr++;
8559 Uint32 len_prg = len2 & 0xFFFF; // Length of interpret program
8560 Uint32 len_pattern = len2 >> 16;// Length of attr param pattern
8561 err = DbspjErr::OutOfSectionMemory;
8562 if (unlikely(!appendToSection(attrInfoPtrI, tree.ptr, len_prg)))
8563 {
8564 jam();
8565 break;
8566 }
8567
8568 tree.ptr += len_prg;
8569 sectionptrs[1] = len_prg; // size of interpret program
8570
8571 Uint32 tmp = * tree.ptr ++; // attr-pattern header
8572 Uint32 cnt = tmp & 0xFFFF;
8573
8574 if (treeBits & DABits::NI_ATTR_LINKED)
8575 {
8576 jam();
8577 /**
8578 * Expand pattern into a new pattern (with linked values)
8579 */
8580 LocalArenaPoolImpl pool(requestPtr.p->m_arena,
8581 m_dependency_map_pool);
8582 Local_pattern_store pattern(pool,treeNodePtr.p->m_attrParamPattern);
8583 err = expand(pattern, treeNodePtr, tree, len_pattern, param, cnt);
8584 if (unlikely(err))
8585 {
8586 jam();
8587 break;
8588 }
8589 /**
8590 * This node constructs a new attr-info for each send
8591 */
8592 treeNodePtr.p->m_bits |= TreeNode::T_ATTRINFO_CONSTRUCTED;
8593 }
8594 else
8595 {
8596 jam();
8597 /**
8598 * Expand pattern directly into attr-info param
8599 * This means a "fixed" attr-info param from here on
8600 */
8601 bool hasNull;
8602 err = expand(attrParamPtrI, tree, len_pattern, param, cnt, hasNull);
8603 if (unlikely(err))
8604 {
8605 jam();
8606 break;
8607 }
8608 // ndbrequire(!hasNull);
8609 }
8610 }
8611 else // if (treeBits & DABits::NI_ATTR_INTERPRET)
8612 {
8613 jam();
8614 /**
8615 * Only relevant for interpreted stuff
8616 */
8617 ndbrequire((treeBits & DABits::NI_ATTR_PARAMS) == 0);
8618 ndbrequire((paramBits & DABits::PI_ATTR_PARAMS) == 0);
8619 ndbrequire((treeBits & DABits::NI_ATTR_LINKED) == 0);
8620
8621 treeNodePtr.p->m_bits |= TreeNode::T_ATTR_INTERPRETED;
8622
8623 if (! (paramBits & DABits::PI_ATTR_INTERPRET))
8624 {
8625 jam();
8626
8627 /**
8628 * Tree node has interpreted execution,
8629 * but no interpreted program specified
8630 * auto-add Exit_ok (i.e return each row)
8631 */
8632 Uint32 tmp = Interpreter::ExitOK();
8633 err = DbspjErr::OutOfSectionMemory;
8634 if (unlikely(!appendToSection(attrInfoPtrI, &tmp, 1)))
8635 {
8636 jam();
8637 break;
8638 }
8639 sectionptrs[1] = 1;
8640 }
8641 } // if (treeBits & DABits::NI_ATTR_INTERPRET)
8642 } // if (interpreted)
8643
8644 if (paramBits & DABits::PI_ATTR_INTERPRET)
8645 {
8646 jam();
8647
8648 /**
8649 * Add the interpreted code that represents the scan filter.
8650 */
8651 const Uint32 len2 = * param.ptr++;
8652 Uint32 program_len = len2 & 0xFFFF;
8653 Uint32 subroutine_len = len2 >> 16;
8654 err = DbspjErr::OutOfSectionMemory;
8655 if (unlikely(!appendToSection(attrInfoPtrI, param.ptr, program_len)))
8656 {
8657 jam();
8658 break;
8659 }
8660 /**
8661 * The interpreted code is added is in the "Interpreted execute region"
8662 * of the attrinfo (see Dbtup::interpreterStartLab() for details).
8663 * It will thus execute before reading the attributes that constitutes
8664 * the projections.
8665 */
8666 sectionptrs[1] = program_len;
8667 param.ptr += program_len;
8668
8669 if (subroutine_len)
8670 {
8671 if (unlikely(!appendToSection(attrParamPtrI,
8672 param.ptr, subroutine_len)))
8673 {
8674 jam();
8675 break;
8676 }
8677 sectionptrs[4] = subroutine_len;
8678 param.ptr += subroutine_len;
8679 }
8680 treeNodePtr.p->m_bits |= TreeNode::T_ATTR_INTERPRETED;
8681 }
8682
8683 Uint32 sum_read = 0;
8684 Uint32 dst[MAX_ATTRIBUTES_IN_TABLE + 2];
8685
8686 if (paramBits & DABits::PI_ATTR_LIST)
8687 {
8688 jam();
8689 Uint32 len = * param.ptr++;
8690 DEBUG("PI_ATTR_LIST");
8691
8692 treeNodePtr.p->m_bits |= TreeNode::T_USER_PROJECTION;
8693 err = DbspjErr::OutOfSectionMemory;
8694 if (!appendToSection(attrInfoPtrI, param.ptr, len))
8695 {
8696 jam();
8697 break;
8698 }
8699
8700 param.ptr += len;
8701
8702 /**
8703 * Insert a flush of this partial result set
8704 */
8705 Uint32 flush[4];
8706 flush[0] = AttributeHeader::FLUSH_AI << 16;
8707 flush[1] = ctx.m_resultRef;
8708 flush[2] = ctx.m_resultData;
8709 flush[3] = ctx.m_senderRef; // RouteRef
8710 if (!appendToSection(attrInfoPtrI, flush, 4))
8711 {
8712 jam();
8713 break;
8714 }
8715
8716 sum_read += len + 4;
8717 }
8718
8719 if (treeBits & DABits::NI_LINKED_ATTR)
8720 {
8721 jam();
8722 DEBUG("NI_LINKED_ATTR");
8723 err = DbspjErr::InvalidTreeNodeSpecification;
8724 Uint32 cnt = unpackList(MAX_ATTRIBUTES_IN_TABLE, dst, tree);
8725 if (unlikely(cnt > MAX_ATTRIBUTES_IN_TABLE))
8726 {
8727 jam();
8728 break;
8729 }
8730
8731 /**
8732 * AttributeHeader contains attrId in 16-higher bits
8733 */
8734 for (Uint32 i = 0; i<cnt; i++)
8735 dst[i] <<= 16;
8736
8737 /**
8738 * Read correlation factor
8739 */
8740 dst[cnt++] = AttributeHeader::CORR_FACTOR32 << 16;
8741
8742 err = DbspjErr::OutOfSectionMemory;
8743 if (!appendToSection(attrInfoPtrI, dst, cnt))
8744 {
8745 jam();
8746 break;
8747 }
8748
8749 sum_read += cnt;
8750 }
8751
8752 if (interpreted)
8753 {
8754 jam();
8755 /**
8756 * Let reads be performed *after* interpreted program
8757 * i.e in "final read"-section
8758 */
8759 sectionptrs[3] = sum_read;
8760
8761 if (attrParamPtrI != RNIL)
8762 {
8763 jam();
8764 ndbrequire(!(treeNodePtr.p->m_bits&TreeNode::T_ATTRINFO_CONSTRUCTED));
8765
8766 SegmentedSectionPtr ptr;
8767 getSection(ptr, attrParamPtrI);
8768 {
8769 SectionReader r0(ptr, getSectionSegmentPool());
8770 err = appendTreeToSection(attrInfoPtrI, r0, ptr.sz);
8771 if (unlikely(err != 0))
8772 {
8773 jam();
8774 break;
8775 }
8776 sectionptrs[4] = ptr.sz;
8777 }
8778 releaseSection(attrParamPtrI);
8779 attrParamPtrI = RNIL;
8780 }
8781 }
8782
8783 treeNodePtr.p->m_send.m_attrInfoPtrI = attrInfoPtrI;
8784 attrInfoPtrI = RNIL;
8785 } // if (((treeBits & mask) | (paramBits & DABits::PI_ATTR_LIST)) != 0)
8786
8787 // Empty attrinfo would cause node crash.
8788 if (treeNodePtr.p->m_send.m_attrInfoPtrI == RNIL)
8789 {
8790 jam();
8791
8792 // Add dummy interpreted program.
8793 Uint32 tmp = Interpreter::ExitOK();
8794 err = DbspjErr::OutOfSectionMemory;
8795 if (unlikely(!appendToSection(treeNodePtr.p->m_send.m_attrInfoPtrI, &tmp, 1)))
8796 {
8797 jam();
8798 break;
8799 }
8800 }
8801
8802 return 0;
8803 } while (0);
8804
8805 if (attrInfoPtrI != RNIL)
8806 {
8807 jam();
8808 releaseSection(attrInfoPtrI);
8809 }
8810
8811 if (attrParamPtrI != RNIL)
8812 {
8813 jam();
8814 releaseSection(attrParamPtrI);
8815 }
8816
8817 return err;
8818 }
8819
8820 /**
8821 * END - MODULE COMMON PARSE/UNPACK
8822 */
8823
8824 /**
8825 * Process a scan request for an ndb$info table. (These are used for monitoring
8826 * purposes and do not contain application data.)
8827 */
execDBINFO_SCANREQ(Signal * signal)8828 void Dbspj::execDBINFO_SCANREQ(Signal *signal)
8829 {
8830 DbinfoScanReq req= * CAST_PTR(DbinfoScanReq, &signal->theData[0]);
8831 const Ndbinfo::ScanCursor* cursor =
8832 CAST_CONSTPTR(Ndbinfo::ScanCursor, DbinfoScan::getCursorPtr(&req));
8833 Ndbinfo::Ratelimit rl;
8834
8835 jamEntry();
8836
8837 switch(req.tableId){
8838
8839 // The SPJ block only implements the ndbinfo.counters table.
8840 case Ndbinfo::COUNTERS_TABLEID:
8841 {
8842 Ndbinfo::counter_entry counters[] = {
8843 { Ndbinfo::SPJ_READS_RECEIVED_COUNTER,
8844 c_Counters.get_counter(CI_READS_RECEIVED) },
8845 { Ndbinfo::SPJ_LOCAL_READS_SENT_COUNTER,
8846 c_Counters.get_counter(CI_LOCAL_READS_SENT) },
8847 { Ndbinfo::SPJ_REMOTE_READS_SENT_COUNTER,
8848 c_Counters.get_counter(CI_REMOTE_READS_SENT) },
8849 { Ndbinfo::SPJ_READS_NOT_FOUND_COUNTER,
8850 c_Counters.get_counter(CI_READS_NOT_FOUND) },
8851 { Ndbinfo::SPJ_TABLE_SCANS_RECEIVED_COUNTER,
8852 c_Counters.get_counter(CI_TABLE_SCANS_RECEIVED) },
8853 { Ndbinfo::SPJ_LOCAL_TABLE_SCANS_SENT_COUNTER,
8854 c_Counters.get_counter(CI_LOCAL_TABLE_SCANS_SENT) },
8855 { Ndbinfo::SPJ_RANGE_SCANS_RECEIVED_COUNTER,
8856 c_Counters.get_counter(CI_RANGE_SCANS_RECEIVED) },
8857 { Ndbinfo::SPJ_LOCAL_RANGE_SCANS_SENT_COUNTER,
8858 c_Counters.get_counter(CI_LOCAL_RANGE_SCANS_SENT) },
8859 { Ndbinfo::SPJ_REMOTE_RANGE_SCANS_SENT_COUNTER,
8860 c_Counters.get_counter(CI_REMOTE_RANGE_SCANS_SENT) },
8861 { Ndbinfo::SPJ_SCAN_BATCHES_RETURNED_COUNTER,
8862 c_Counters.get_counter(CI_SCAN_BATCHES_RETURNED) },
8863 { Ndbinfo::SPJ_SCAN_ROWS_RETURNED_COUNTER,
8864 c_Counters.get_counter(CI_SCAN_ROWS_RETURNED) },
8865 { Ndbinfo::SPJ_PRUNED_RANGE_SCANS_RECEIVED_COUNTER,
8866 c_Counters.get_counter(CI_PRUNED_RANGE_SCANS_RECEIVED) },
8867 { Ndbinfo::SPJ_CONST_PRUNED_RANGE_SCANS_RECEIVED_COUNTER,
8868 c_Counters.get_counter(CI_CONST_PRUNED_RANGE_SCANS_RECEIVED) }
8869 };
8870 const size_t num_counters = sizeof(counters) / sizeof(counters[0]);
8871
8872 Uint32 i = cursor->data[0];
8873 const BlockNumber bn = blockToMain(number());
8874 while(i < num_counters)
8875 {
8876 jam();
8877 Ndbinfo::Row row(signal, req);
8878 row.write_uint32(getOwnNodeId());
8879 row.write_uint32(bn); // block number
8880 row.write_uint32(instance()); // block instance
8881 row.write_uint32(counters[i].id);
8882
8883 row.write_uint64(counters[i].val);
8884 ndbinfo_send_row(signal, req, row, rl);
8885 i++;
8886 if (rl.need_break(req))
8887 {
8888 jam();
8889 ndbinfo_send_scan_break(signal, req, rl, i);
8890 return;
8891 }
8892 }
8893 break;
8894 }
8895
8896 default:
8897 break;
8898 }
8899
8900 ndbinfo_send_scan_conf(signal, req, rl);
8901 } // Dbspj::execDBINFO_SCANREQ(Signal *signal)
8902
8903
8904 /**
8905 * Incremental calculation of standard deviation:
8906 *
8907 * Suppose that the data set is x1, x2,..., xn then for each xn
8908 * we can find an updated mean (M) and square of sums (S) as:
8909 *
8910 * M(1) = x(1), M(k) = M(k-1) + (x(k) - M(k-1)) / k
8911 * S(1) = 0, S(k) = S(k-1) + (x(k) - M(k-1)) * (x(k) - M(k))
8912 *
8913 * Source: http://mathcentral.uregina.ca/QQ/database/QQ.09.02/carlos1.html
8914 */
update(double sample)8915 void Dbspj::IncrementalStatistics::update(double sample)
8916 {
8917 // Prevent wrap-around
8918 if(m_noOfSamples < 0xffffffff)
8919 {
8920 m_noOfSamples++;
8921 const double delta = sample - m_mean;
8922 m_mean += delta/m_noOfSamples;
8923 m_sumSquare += delta * (sample - m_mean);
8924 }
8925 }
8926