1 /*
2    Copyright (c) 2012, 2021, Oracle and/or its affiliates.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 */
24 
25 #define DBSPJ_C
26 #include "Dbspj.hpp"
27 
28 #include <ndb_version.h>
29 #include <SectionReader.hpp>
30 #include <signaldata/LqhKey.hpp>
31 #include <signaldata/QueryTree.hpp>
32 #include <signaldata/TcKeyRef.hpp>
33 #include <signaldata/RouteOrd.hpp>
34 #include <signaldata/TransIdAI.hpp>
35 #include <signaldata/DiGetNodes.hpp>
36 #include <signaldata/DihScanTab.hpp>
37 #include <signaldata/AttrInfo.hpp>
38 #include <signaldata/CreateTab.hpp>
39 #include <signaldata/PrepDropTab.hpp>
40 #include <signaldata/DropTab.hpp>
41 #include <signaldata/AlterTab.hpp>
42 #include <signaldata/DbspjErr.hpp>
43 #include <Interpreter.hpp>
44 #include <AttributeHeader.hpp>
45 #include <AttributeDescriptor.hpp>
46 #include <KeyDescriptor.hpp>
47 #include <md5_hash.hpp>
48 #include <signaldata/TcKeyConf.hpp>
49 
50 #include <signaldata/NodeFailRep.hpp>
51 #include <signaldata/ReadNodesConf.hpp>
52 #include <signaldata/SignalDroppedRep.hpp>
53 
54 #define JAM_FILE_ID 479
55 
56 
57 #ifdef VM_TRACE
58 
59 /**
60  * DEBUG options for different parts od SPJ block
61  * Comment out those part you don't want DEBUG'ed.
62  */
63 //#define DEBUG(x) ndbout << "DBSPJ: "<< x << endl;
64 //#define DEBUG_DICT(x) ndbout << "DBSPJ: "<< x << endl;
65 //#define DEBUG_LQHKEYREQ
66 //#define DEBUG_SCAN_FRAGREQ
67 #endif
68 
69 /**
70  * Provide empty defs for those DEBUGs which has to be defined.
71  */
72 #if !defined(DEBUG)
73 #define DEBUG(x)
74 #endif
75 
76 #if !defined(DEBUG_DICT)
77 #define DEBUG_DICT(x)
78 #endif
79 
80 #define DEBUG_CRASH() ndbassert(false)
81 
82 const Ptr<Dbspj::TreeNode> Dbspj::NullTreeNodePtr(0, RNIL );
83 const Dbspj::RowRef Dbspj::NullRowRef = { RNIL, GLOBAL_PAGE_SIZE_WORDS, { 0 } };
84 
85 
execSIGNAL_DROPPED_REP(Signal * signal)86 void Dbspj::execSIGNAL_DROPPED_REP(Signal* signal)
87 {
88   /* An incoming signal was dropped, handle it.
89    * Dropped signal really means that we ran out of
90    * long signal buffering to store its sections.
91    */
92   jamEntry();
93 
94   if (!assembleDroppedFragments(signal))
95   {
96     jam();
97     return;
98   }
99 
100   const SignalDroppedRep* rep = (SignalDroppedRep*) &signal->theData[0];
101   Uint32 originalGSN= rep->originalGsn;
102 
103   DEBUG("SignalDroppedRep received for GSN " << originalGSN);
104 
105   switch(originalGSN) {
106   case GSN_SCAN_FRAGREQ:
107   {
108     jam();
109     /* Get information necessary to send SCAN_FRAGREF back to TC */
110     // TODO : Handle dropped signal fragments
111 
112     const ScanFragReq * const truncatedScanFragReq =
113       (ScanFragReq *) &rep->originalData[0];
114 
115     handle_early_scanfrag_ref(signal, truncatedScanFragReq,
116                               DbspjErr::OutOfSectionMemory);
117     break;
118   }
119   default:
120     jam();
121     /* Don't expect dropped signals for other GSNs
122      */
123     SimulatedBlock::execSIGNAL_DROPPED_REP(signal);
124   };
125 
126   return;
127 }
128 
129 inline
130 Uint32
checkTableError(Uint32 schemaVersion) const131 Dbspj::TableRecord::checkTableError(Uint32 schemaVersion) const
132 {
133   DEBUG_DICT("Dbspj::TableRecord::checkTableError"
134             << ", m_flags: " << m_flags
135             << ", m_currentSchemaVersion: " << m_currentSchemaVersion
136             << ", check schemaVersion: " << schemaVersion);
137 
138   if (!get_enabled())
139     return DbspjErr::NoSuchTable;
140   if (get_dropping())
141     return DbspjErr::DropTableInProgress;
142   if (table_version_major(schemaVersion) != table_version_major(m_currentSchemaVersion))
143     return DbspjErr::WrongSchemaVersion;
144 
145   return 0;
146 }
147 
148 // create table prepare
execTC_SCHVERREQ(Signal * signal)149 void Dbspj::execTC_SCHVERREQ(Signal* signal)
150 {
151   jamEntry();
152   if (! assembleFragments(signal)) {
153     jam();
154     return;
155   }
156   const TcSchVerReq* req = CAST_CONSTPTR(TcSchVerReq, signal->getDataPtr());
157   const Uint32 tableId = req->tableId;
158   const Uint32 senderRef = req->senderRef;
159   const Uint32 senderData = req->senderData;
160 
161   DEBUG_DICT("Dbspj::execTC_SCHVERREQ"
162      << ", tableId: " << tableId
163      << ", version: " << req->tableVersion
164   );
165 
166   TableRecordPtr tablePtr;
167   tablePtr.i = tableId;
168   ptrCheckGuard(tablePtr, c_tabrecFilesize, m_tableRecord);
169 
170   ndbrequire(tablePtr.p->get_prepared() == false);
171   ndbrequire(tablePtr.p->get_enabled() == false);
172   new (tablePtr.p) TableRecord(req->tableVersion);
173 
174   /**
175    * NOTE: Even if there are more information, like
176    * 'tableType', 'noOfPrimaryKeys'etc available from
177    * TcSchVerReq, we do *not* store that in TableRecord.
178    * Instead this information is retrieved on demand from
179    * g_key_descriptor_pool where it is readily available.
180    * The 'contract' for consistency of this information is
181    * such that:
182    * 1) g_key_descriptor[ENTRY] will be populated *before*
183    *    any blocks receiving CREATE_TAB_REQ (or equivalent).
184    * 2) g_key_descriptor[ENTRY] will be invalidated *after*
185    *    all blocks sent DROP_TAB_CONF (commit)
186    * Thus, this info is consistent whenever required by SPJ.
187    */
188   TcSchVerConf * conf = (TcSchVerConf*)signal->getDataPtr();
189   conf->senderRef = reference();
190   conf->senderData = senderData;
191   sendSignal(senderRef, GSN_TC_SCHVERCONF, signal,
192              TcSchVerConf::SignalLength, JBB);
193 }//Dbspj::execTC_SCHVERREQ()
194 
195 // create table commit
execTAB_COMMITREQ(Signal * signal)196 void Dbspj::execTAB_COMMITREQ(Signal* signal)
197 {
198   jamEntry();
199   const Uint32 senderData = signal->theData[0];
200   const Uint32 senderRef = signal->theData[1];
201   const Uint32 tableId = signal->theData[2];
202 
203   DEBUG_DICT("Dbspj::execTAB_COMMITREQ"
204      << ", tableId: " << tableId
205   );
206 
207   TableRecordPtr tablePtr;
208   tablePtr.i = tableId;
209   ptrCheckGuard(tablePtr, c_tabrecFilesize, m_tableRecord);
210 
211   ndbrequire(tablePtr.p->get_prepared() == true);
212   ndbrequire(tablePtr.p->get_enabled() == false);
213   tablePtr.p->set_enabled(true);
214   tablePtr.p->set_prepared(false);
215   tablePtr.p->set_dropping(false);
216 
217   signal->theData[0] = senderData;
218   signal->theData[1] = reference();
219   signal->theData[2] = tableId;
220   sendSignal(senderRef, GSN_TAB_COMMITCONF, signal, 3, JBB);
221 }//Dbspj::execTAB_COMMITREQ
222 
223 void
execPREP_DROP_TAB_REQ(Signal * signal)224 Dbspj::execPREP_DROP_TAB_REQ(Signal* signal)
225 {
226   jamEntry();
227 
228   PrepDropTabReq* req = (PrepDropTabReq*)signal->getDataPtr();
229   const Uint32 tableId = req->tableId;
230   const Uint32 senderRef = req->senderRef;
231   const Uint32 senderData = req->senderData;
232 
233   DEBUG_DICT("Dbspj::execPREP_DROP_TAB_REQ"
234      << ", tableId: " << tableId
235   );
236 
237   TableRecordPtr tablePtr;
238   tablePtr.i = tableId;
239   ptrCheckGuard(tablePtr, c_tabrecFilesize, m_tableRecord);
240 
241   if (!tablePtr.p->get_enabled())
242   {
243     jam();
244     PrepDropTabRef* ref = (PrepDropTabRef*)signal->getDataPtrSend();
245     ref->senderRef = reference();
246     ref->senderData = senderData;
247     ref->tableId = tableId;
248     ref->errorCode = PrepDropTabRef::NoSuchTable;
249     sendSignal(senderRef, GSN_PREP_DROP_TAB_REF, signal,
250 	       PrepDropTabRef::SignalLength, JBB);
251     return;
252   }
253 
254   if (tablePtr.p->get_dropping())
255   {
256     jam();
257     PrepDropTabRef* ref = (PrepDropTabRef*)signal->getDataPtrSend();
258     ref->senderRef = reference();
259     ref->senderData = senderData;
260     ref->tableId = tableId;
261     ref->errorCode = PrepDropTabRef::DropInProgress;
262     sendSignal(senderRef, GSN_PREP_DROP_TAB_REF, signal,
263 	       PrepDropTabRef::SignalLength, JBB);
264     return;
265   }
266 
267   tablePtr.p->set_dropping(true);
268   tablePtr.p->set_prepared(false);
269 
270   PrepDropTabConf* conf = (PrepDropTabConf*)signal->getDataPtrSend();
271   conf->tableId = tableId;
272   conf->senderRef = reference();
273   conf->senderData = senderData;
274   sendSignal(senderRef, GSN_PREP_DROP_TAB_CONF, signal,
275              PrepDropTabConf::SignalLength, JBB);
276 }//Dbspj::execPREP_DROP_TAB_REQ
277 
278 void
execDROP_TAB_REQ(Signal * signal)279 Dbspj::execDROP_TAB_REQ(Signal* signal)
280 {
281   jamEntry();
282 
283   const DropTabReq* req = (DropTabReq*)signal->getDataPtr();
284   const Uint32 tableId = req->tableId;
285   const Uint32 senderRef = req->senderRef;
286   const Uint32 senderData = req->senderData;
287   DropTabReq::RequestType rt = (DropTabReq::RequestType)req->requestType;
288 
289   DEBUG_DICT("Dbspj::execDROP_TAB_REQ"
290      << ", tableId: " << tableId
291   );
292 
293   TableRecordPtr tablePtr;
294   tablePtr.i = tableId;
295   ptrCheckGuard(tablePtr, c_tabrecFilesize, m_tableRecord);
296 
297   if (rt == DropTabReq::OnlineDropTab){
298     if (!tablePtr.p->get_enabled()){
299       jam();
300       DropTabRef* ref = (DropTabRef*)signal->getDataPtrSend();
301       ref->senderRef = reference();
302       ref->senderData = senderData;
303       ref->tableId = tableId;
304       ref->errorCode = DropTabRef::NoSuchTable;
305       sendSignal(senderRef, GSN_DROP_TAB_REF, signal,
306 	         DropTabRef::SignalLength, JBB);
307       return;
308     }
309     if (!tablePtr.p->get_dropping()){
310       jam();
311       DropTabRef* ref = (DropTabRef*)signal->getDataPtrSend();
312       ref->senderRef = reference();
313       ref->senderData = senderData;
314       ref->tableId = tableId;
315       ref->errorCode = DropTabRef::DropWoPrep;
316       sendSignal(senderRef, GSN_DROP_TAB_REF, signal,
317 	         DropTabRef::SignalLength, JBB);
318       return;
319     }
320   }
321 
322   tablePtr.p->set_enabled(false);
323   tablePtr.p->set_prepared(false);
324   tablePtr.p->set_dropping(false);
325 
326   DropTabConf * conf = (DropTabConf*)signal->getDataPtrSend();
327   conf->tableId = tableId;
328   conf->senderRef = reference();
329   conf->senderData = senderData;
330   sendSignal(senderRef, GSN_DROP_TAB_CONF, signal,
331 	     PrepDropTabConf::SignalLength, JBB);
332 }//Dbspj::execDROP_TAB_REQ
333 
334 void
execALTER_TAB_REQ(Signal * signal)335 Dbspj::execALTER_TAB_REQ(Signal* signal)
336 {
337   jamEntry();
338 
339   const AlterTabReq* req = (const AlterTabReq*)signal->getDataPtr();
340   const Uint32 tableId = req->tableId;
341   const Uint32 senderRef = req->senderRef;
342   const Uint32 senderData = req->senderData;
343   const Uint32 tableVersion = req->tableVersion;
344   const Uint32 newTableVersion = req->newTableVersion;
345   AlterTabReq::RequestType requestType =
346     (AlterTabReq::RequestType) req->requestType;
347 
348   DEBUG_DICT("Dbspj::execALTER_TAB_REQ"
349      << ", tableId: " << tableId
350      << ", version: " << tableVersion << " --> " << newTableVersion
351   );
352 
353   TableRecordPtr tablePtr;
354   tablePtr.i = tableId;
355   ptrCheckGuard(tablePtr, c_tabrecFilesize, m_tableRecord);
356 
357   switch (requestType) {
358   case AlterTabReq::AlterTablePrepare:
359     jam();
360     break;
361   case AlterTabReq::AlterTableRevert:
362     jam();
363     tablePtr.p->m_currentSchemaVersion = tableVersion;
364     break;
365   case AlterTabReq::AlterTableCommit:
366     jam();
367     tablePtr.p->m_currentSchemaVersion = newTableVersion;
368     break;
369   default:
370     ndbrequire(false);
371     break;
372   }
373 
374   AlterTabConf* conf = (AlterTabConf*)signal->getDataPtrSend();
375   conf->senderRef = reference();
376   conf->senderData = senderData;
377   conf->connectPtr = RNIL;
378   sendSignal(senderRef, GSN_ALTER_TAB_CONF, signal,
379 	     AlterTabConf::SignalLength, JBB);
380 }//Dbspj::execALTER_TAB_REQ
381 
382 /** A noop for now.*/
execREAD_CONFIG_REQ(Signal * signal)383 void Dbspj::execREAD_CONFIG_REQ(Signal* signal)
384 {
385   jamEntry();
386   const ReadConfigReq req =
387     *reinterpret_cast<const ReadConfigReq*>(signal->getDataPtr());
388 
389   Pool_context pc;
390   pc.m_block = this;
391 
392   DEBUG("execREAD_CONFIG_REQ");
393   DEBUG("sizeof(Request): " << sizeof(Request) <<
394         " sizeof(TreeNode): " << sizeof(TreeNode));
395 
396   m_arenaAllocator.init(1024, RT_SPJ_ARENA_BLOCK, pc);
397   m_request_pool.arena_pool_init(&m_arenaAllocator, RT_SPJ_REQUEST, pc);
398   m_treenode_pool.arena_pool_init(&m_arenaAllocator, RT_SPJ_TREENODE, pc);
399   m_scanfraghandle_pool.arena_pool_init(&m_arenaAllocator, RT_SPJ_SCANFRAG, pc);
400   m_lookup_request_hash.setSize(16);
401   m_scan_request_hash.setSize(16);
402   void* ptr = m_ctx.m_mm.get_memroot();
403   m_page_pool.set((RowPage*)ptr, (Uint32)~0);
404 
405   Record_info ri;
406   Dependency_map::createRecordInfo(ri, RT_SPJ_DATABUFFER);
407   m_dependency_map_pool.init(&m_arenaAllocator, ri, pc);
408 
409   {
410     const ndb_mgm_configuration_iterator * p =
411       m_ctx.m_config.getOwnConfigIterator();
412     ndbrequire(p != 0);
413 
414     ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_SPJ_TABLE, &c_tabrecFilesize));
415   }
416   m_tableRecord = (TableRecord*)allocRecord("TableRecord",
417                                             sizeof(TableRecord),
418                                             c_tabrecFilesize);
419 
420   TableRecordPtr tablePtr;
421   for (tablePtr.i = 0; tablePtr.i < c_tabrecFilesize; tablePtr.i++) {
422     ptrAss(tablePtr, m_tableRecord);
423     new (tablePtr.p) TableRecord;
424   }//for
425 
426   ReadConfigConf* const conf =
427     reinterpret_cast<ReadConfigConf*>(signal->getDataPtrSend());
428   conf->senderRef = reference();
429   conf->senderData = req.senderData;
430 
431   sendSignal(req.senderRef, GSN_READ_CONFIG_CONF, signal,
432              ReadConfigConf::SignalLength, JBB);
433 }//Dbspj::execREAD_CONF_REQ()
434 
435 static Uint32 f_STTOR_REF = 0;
436 
execSTTOR(Signal * signal)437 void Dbspj::execSTTOR(Signal* signal)
438 {
439 //#define UNIT_TEST_DATABUFFER2
440 
441   jamEntry();
442   /* START CASE */
443   const Uint16 tphase = signal->theData[1];
444   f_STTOR_REF = signal->getSendersBlockRef();
445 
446   if (tphase == 1)
447   {
448     jam();
449     signal->theData[0] = 0;
450     sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 1000, 1);
451   }
452 
453   if (tphase == 4)
454   {
455     jam();
456 
457     signal->theData[0] = reference();
458     sendSignal(NDBCNTR_REF, GSN_READ_NODESREQ, signal, 1, JBB);
459     return;
460   }
461 
462   sendSTTORRY(signal);
463 
464 #ifdef UNIT_TEST_DATABUFFER2
465   if (tphase == 120)
466   {
467     ndbout_c("basic test of ArenaPool / DataBuffer2");
468 
469     for (Uint32 i = 0; i<100; i++)
470     {
471       ArenaHead ah;
472       if (!m_arenaAllocator.seize(ah))
473       {
474         ndbout_c("Failed to allocate arena");
475         break;
476       }
477 
478       ndbout_c("*** LOOP %u", i);
479       Uint32 sum = 0;
480       Dependency_map::Head head;
481       LocalArenaPoolImpl pool(ah, m_dependency_map_pool);
482       for (Uint32 j = 0; j<100; j++)
483       {
484         Uint32 sz = rand() % 1000;
485         if (0)
486           ndbout_c("adding %u", sz);
487         Local_dependency_map list(pool, head);
488         for (Uint32 i = 0; i<sz; i++)
489           signal->theData[i] = sum + i;
490         list.append(signal->theData, sz);
491         sum += sz;
492       }
493 
494       {
495         ndbrequire(head.getSize() == sum);
496         Local_dependency_map list(pool, head);
497         Dependency_map::ConstDataBufferIterator it;
498         Uint32 cnt = 0;
499         for (list.first(it); !it.isNull(); list.next(it))
500         {
501           ndbrequire(* it.data == cnt);
502           cnt++;
503         }
504 
505         ndbrequire(cnt == sum);
506       }
507 
508       Resource_limit rl;
509       if (m_ctx.m_mm.get_resource_limit(7, rl))
510       {
511         ndbout_c("Resource %d min: %d max: %d curr: %d",
512                  7, rl.m_min, rl.m_max, rl.m_curr);
513       }
514 
515       {
516         ndbout_c("release map");
517         Local_dependency_map list(pool, head);
518         list.release();
519       }
520 
521       ndbout_c("release all");
522       m_arenaAllocator.release(ah);
523       ndbout_c("*** LOOP %u sum: %u", i, sum);
524     }
525   }
526 #endif
527 }//Dbspj::execSTTOR()
528 
529 void
sendSTTORRY(Signal * signal)530 Dbspj::sendSTTORRY(Signal* signal)
531 {
532   signal->theData[0] = 0;
533   signal->theData[1] = 0;    /* BLOCK CATEGORY */
534   signal->theData[2] = 0;    /* SIGNAL VERSION NUMBER */
535   signal->theData[3] = 4;
536 #ifdef UNIT_TEST_DATABUFFER2
537   signal->theData[4] = 120;  /* Start phase end*/
538 #else
539   signal->theData[4] = 255;
540 #endif
541   signal->theData[5] = 255;
542   sendSignal(f_STTOR_REF, GSN_STTORRY, signal, 6, JBB);
543 }
544 
545 void
execREAD_NODESCONF(Signal * signal)546 Dbspj::execREAD_NODESCONF(Signal* signal)
547 {
548   jamEntry();
549 
550   ReadNodesConf * const conf = (ReadNodesConf *)signal->getDataPtr();
551 
552   if (getNodeState().getNodeRestartInProgress())
553   {
554     jam();
555     c_alive_nodes.assign(NdbNodeBitmask::Size, conf->startedNodes);
556     c_alive_nodes.set(getOwnNodeId());
557   }
558   else
559   {
560     jam();
561     c_alive_nodes.assign(NdbNodeBitmask::Size, conf->startingNodes);
562     NdbNodeBitmask tmp;
563     tmp.assign(NdbNodeBitmask::Size, conf->startedNodes);
564     c_alive_nodes.bitOR(tmp);
565   }
566 
567   sendSTTORRY(signal);
568 }
569 
570 void
execINCL_NODEREQ(Signal * signal)571 Dbspj::execINCL_NODEREQ(Signal* signal)
572 {
573   jamEntry();
574   const Uint32 senderRef = signal->theData[0];
575   const Uint32 nodeId  = signal->theData[1];
576 
577   ndbrequire(!c_alive_nodes.get(nodeId));
578   c_alive_nodes.set(nodeId);
579 
580   signal->theData[0] = nodeId;
581   signal->theData[1] = reference();
582   sendSignal(senderRef, GSN_INCL_NODECONF, signal, 2, JBB);
583 }
584 
585 void
execNODE_FAILREP(Signal * signal)586 Dbspj::execNODE_FAILREP(Signal* signal)
587 {
588   jamEntry();
589 
590   const NodeFailRep * rep = (NodeFailRep*)signal->getDataPtr();
591   NdbNodeBitmask failed;
592   failed.assign(NdbNodeBitmask::Size, rep->theNodes);
593 
594   c_alive_nodes.bitANDC(failed);
595 
596   /* Clean up possibly fragmented signals being received or sent */
597   for (Uint32 node = 1; node < MAX_NDB_NODES; node++)
598   {
599     if (failed.get(node))
600     {
601       jam();
602       simBlockNodeFailure(signal, node);
603     }//if
604   }//for
605 
606   signal->theData[0] = 1;
607   signal->theData[1] = 0;
608   failed.copyto(NdbNodeBitmask::Size, signal->theData + 2);
609   sendSignal(reference(), GSN_CONTINUEB, signal, 2 + NdbNodeBitmask::Size,
610              JBB);
611 }
612 
613 void
execAPI_FAILREQ(Signal * signal)614 Dbspj::execAPI_FAILREQ(Signal* signal)
615 {
616   jamEntry();
617   Uint32 failedApiNode = signal->theData[0];
618   Uint32 ref = signal->theData[1];
619 
620   /**
621    * We only need to care about lookups
622    *   as SCAN's are aborted by DBTC
623    *
624    * As SPJ does not receive / send fragmented signals
625    *   directly to API nodes, simBlockNodeFailure()
626    *   should not really be required - assert this.
627    */
628   Uint32 elementsCleaned = simBlockNodeFailure(signal, failedApiNode);
629   ndbassert(elementsCleaned == 0); // As SPJ has no fragmented API signals
630   (void) elementsCleaned;          // Avoid compiler error
631 
632   signal->theData[0] = failedApiNode;
633   signal->theData[1] = reference();
634   sendSignal(ref, GSN_API_FAILCONF, signal, 2, JBB);
635 }
636 
637 void
execCONTINUEB(Signal * signal)638 Dbspj::execCONTINUEB(Signal* signal)
639 {
640   jamEntry();
641   switch(signal->theData[0]) {
642   case 0:
643     releaseGlobal(signal);
644     return;
645   case 1:
646     nodeFail_checkRequests(signal);
647     return;
648   case 2:
649     nodeFail_checkRequests(signal);
650     return;
651   }
652 
653   ndbrequire(false);
654 }
655 
656 void
nodeFail_checkRequests(Signal * signal)657 Dbspj::nodeFail_checkRequests(Signal* signal)
658 {
659   jam();
660   const Uint32 type = signal->theData[0];
661   const Uint32 bucket = signal->theData[1];
662 
663   NdbNodeBitmask failed;
664   failed.assign(NdbNodeBitmask::Size, signal->theData+2);
665 
666   Request_iterator iter;
667   Request_hash * hash;
668   switch(type){
669   case 1:
670     hash = &m_lookup_request_hash;
671     break;
672   case 2:
673     hash = &m_scan_request_hash;
674     break;
675   }
676   hash->next(bucket, iter);
677 
678   const Uint32 RT_BREAK = 64;
679   for(Uint32 i = 0; (i<RT_BREAK || iter.bucket == bucket) &&
680         !iter.curr.isNull(); i++)
681   {
682     jam();
683 
684     Ptr<Request> requestPtr = iter.curr;
685     hash->next(iter);
686     i += nodeFail(signal, requestPtr, failed);
687   }
688 
689   if (!iter.curr.isNull())
690   {
691     jam();
692     signal->theData[0] = type;
693     signal->theData[1] = bucket;
694     failed.copyto(NdbNodeBitmask::Size, signal->theData+2);
695     sendSignal(reference(), GSN_CONTINUEB, signal, 2 + NdbNodeBitmask::Size,
696                JBB);
697   }
698   else if (type == 1)
699   {
700     jam();
701     signal->theData[0] = 2;
702     signal->theData[1] = 0;
703     failed.copyto(NdbNodeBitmask::Size, signal->theData+2);
704     sendSignal(reference(), GSN_CONTINUEB, signal, 2 + NdbNodeBitmask::Size,
705                JBB);
706   }
707   else if (type == 2)
708   {
709     jam();
710   }
711 }
712 
713 /**
714  * MODULE LQHKEYREQ
715  */
execLQHKEYREQ(Signal * signal)716 void Dbspj::execLQHKEYREQ(Signal* signal)
717 {
718   jamEntry();
719   c_Counters.incr_counter(CI_READS_RECEIVED, 1);
720 
721   const LqhKeyReq* req = reinterpret_cast<const LqhKeyReq*>(signal->getDataPtr());
722 
723   /**
724    * #0 - KEYINFO contains key for first operation (used for hash in TC)
725    * #1 - ATTRINFO contains tree + parameters
726    *      (unless StoredProcId is set, when only paramters are sent,
727    *       but this is not yet implemented)
728    */
729   SegmentedSectionPtr attrPtr;
730   SectionHandle handle = SectionHandle(this, signal);
731   handle.getSection(attrPtr, LqhKeyReq::AttrInfoSectionNum);
732   const Uint32 keyPtrI = handle.m_ptr[LqhKeyReq::KeyInfoSectionNum].i;
733 
734   Uint32 err;
735   Ptr<Request> requestPtr(0, RNIL);
736   do
737   {
738     ArenaHead ah;
739     err = DbspjErr::OutOfQueryMemory;
740     if (unlikely(!m_arenaAllocator.seize(ah)))
741       break;
742 
743     if (ERROR_INSERTED_CLEAR(17001))
744     {
745       jam();
746       ndbout_c("Injecting OutOfQueryMem error 17001 at line %d file %s",
747                 __LINE__,  __FILE__);
748       break;
749     }
750     if (unlikely(!m_request_pool.seize(ah, requestPtr)))
751     {
752       jam();
753       break;
754     }
755     new (requestPtr.p) Request(ah);
756     do_init(requestPtr.p, req, signal->getSendersBlockRef());
757 
758     Uint32 len_cnt;
759 
760     {
761       SectionReader r0(attrPtr, getSectionSegmentPool());
762 
763       err = DbspjErr::ZeroLengthQueryTree;
764       if (unlikely(!r0.getWord(&len_cnt)))
765         break;
766     }
767 
768     Uint32 len = QueryTree::getLength(len_cnt);
769     Uint32 cnt = QueryTree::getNodeCnt(len_cnt);
770 
771     {
772       SectionReader treeReader(attrPtr, getSectionSegmentPool());
773       SectionReader paramReader(attrPtr, getSectionSegmentPool());
774       paramReader.step(len); // skip over tree to parameters
775 
776       Build_context ctx;
777       ctx.m_resultRef = req->variableData[0];
778       ctx.m_savepointId = req->savePointId;
779       ctx.m_scanPrio = 1;
780       ctx.m_start_signal = signal;
781       ctx.m_senderRef = signal->getSendersBlockRef();
782 
783       err = build(ctx, requestPtr, treeReader, paramReader);
784       if (unlikely(err != 0))
785         break;
786 
787       /**
788        * Root TreeNode in Request takes ownership of keyPtr
789        * section when build has completed.
790        * We are done with attrPtr which is now released.
791        */
792       Ptr<TreeNode> rootNodePtr = ctx.m_node_list[0];
793       rootNodePtr.p->m_send.m_keyInfoPtrI = keyPtrI;
794       release(attrPtr);
795       handle.clear();
796     }
797 
798     /**
799      * Store request in list(s)/hash(es)
800      */
801     store_lookup(requestPtr);
802 
803     /**
804      * A query being shipped as a LQHKEYREQ may return at most a row
805      * per operation i.e be a (multi-)lookup
806      */
807     if (ERROR_INSERTED_CLEAR(17013) ||
808         unlikely(!requestPtr.p->isLookup() || requestPtr.p->m_node_cnt != cnt))
809     {
810       jam();
811       err = DbspjErr::InvalidRequest;
812       break;
813     }
814 
815     start(signal, requestPtr);
816     return;
817   } while (0);
818 
819   /**
820    * Error handling below,
821    *  'err' may contain error code.
822    */
823   if (!requestPtr.isNull())
824   {
825     jam();
826     cleanup(requestPtr);
827   }
828   releaseSections(handle);  // a NOOP, if we reached 'handle.clear()' above
829   handle_early_lqhkey_ref(signal, req, err);
830 }
831 
832 void
do_init(Request * requestP,const LqhKeyReq * req,Uint32 senderRef)833 Dbspj::do_init(Request* requestP, const LqhKeyReq* req, Uint32 senderRef)
834 {
835   requestP->m_bits = 0;
836   requestP->m_errCode = 0;
837   requestP->m_state = Request::RS_BUILDING;
838   requestP->m_node_cnt = 0;
839   requestP->m_cnt_active = 0;
840   requestP->m_rows = 0;
841   requestP->m_active_nodes.clear();
842   requestP->m_completed_nodes.clear();
843   requestP->m_outstanding = 0;
844   requestP->m_transId[0] = req->transId1;
845   requestP->m_transId[1] = req->transId2;
846   requestP->m_rootFragId = LqhKeyReq::getFragmentId(req->fragmentData);
847   bzero(requestP->m_lookup_node_data, sizeof(requestP->m_lookup_node_data));
848 #ifdef SPJ_TRACE_TIME
849   requestP->m_cnt_batches = 0;
850   requestP->m_sum_rows = 0;
851   requestP->m_sum_running = 0;
852   requestP->m_sum_waiting = 0;
853   requestP->m_save_time = NdbTick_getCurrentTicks();
854 #endif
855   const Uint32 reqInfo = req->requestInfo;
856   Uint32 tmp = req->clientConnectPtr;
857   if (LqhKeyReq::getDirtyFlag(reqInfo) &&
858       LqhKeyReq::getOperation(reqInfo) == ZREAD)
859   {
860     jam();
861 
862     ndbrequire(LqhKeyReq::getApplicationAddressFlag(reqInfo));
863     //const Uint32 apiRef   = lqhKeyReq->variableData[0];
864     //const Uint32 apiOpRec = lqhKeyReq->variableData[1];
865     tmp = req->variableData[1];
866     requestP->m_senderData = tmp;
867     requestP->m_senderRef = senderRef;
868   }
869   else
870   {
871     if (LqhKeyReq::getSameClientAndTcFlag(reqInfo) == 1)
872     {
873       if (LqhKeyReq::getApplicationAddressFlag(reqInfo))
874         tmp = req->variableData[2];
875       else
876         tmp = req->variableData[0];
877     }
878     requestP->m_senderData = tmp;
879     requestP->m_senderRef = senderRef;
880   }
881   requestP->m_rootResultData = tmp;
882 }
883 
884 void
store_lookup(Ptr<Request> requestPtr)885 Dbspj::store_lookup(Ptr<Request> requestPtr)
886 {
887   ndbassert(requestPtr.p->isLookup());
888   Ptr<Request> tmp;
889   bool found = m_lookup_request_hash.find(tmp, *requestPtr.p);
890   ndbrequire(found == false);
891   m_lookup_request_hash.add(requestPtr);
892 }
893 
894 void
handle_early_lqhkey_ref(Signal * signal,const LqhKeyReq * lqhKeyReq,Uint32 err)895 Dbspj::handle_early_lqhkey_ref(Signal* signal,
896                                const LqhKeyReq * lqhKeyReq,
897                                Uint32 err)
898 {
899   /**
900    * Error path...
901    */
902   ndbrequire(err);
903   const Uint32 reqInfo = lqhKeyReq->requestInfo;
904   const Uint32 transid[2] = { lqhKeyReq->transId1, lqhKeyReq->transId2 };
905 
906   if (LqhKeyReq::getDirtyFlag(reqInfo) &&
907       LqhKeyReq::getOperation(reqInfo) == ZREAD)
908   {
909     jam();
910     /* Dirty read sends TCKEYREF direct to client, and nothing to TC */
911     ndbrequire(LqhKeyReq::getApplicationAddressFlag(reqInfo));
912     const Uint32 apiRef   = lqhKeyReq->variableData[0];
913     const Uint32 apiOpRec = lqhKeyReq->variableData[1];
914 
915     TcKeyRef* const tcKeyRef = reinterpret_cast<TcKeyRef*>(signal->getDataPtrSend());
916 
917     tcKeyRef->connectPtr = apiOpRec;
918     tcKeyRef->transId[0] = transid[0];
919     tcKeyRef->transId[1] = transid[1];
920     tcKeyRef->errorCode = err;
921     sendTCKEYREF(signal, apiRef, signal->getSendersBlockRef());
922   }
923   else
924   {
925     jam();
926     const Uint32 returnref = signal->getSendersBlockRef();
927     const Uint32 clientPtr = lqhKeyReq->clientConnectPtr;
928 
929     Uint32 TcOprec = clientPtr;
930     if (LqhKeyReq::getSameClientAndTcFlag(reqInfo) == 1)
931     {
932       if (LqhKeyReq::getApplicationAddressFlag(reqInfo))
933         TcOprec = lqhKeyReq->variableData[2];
934       else
935         TcOprec = lqhKeyReq->variableData[0];
936     }
937 
938     LqhKeyRef* const ref = reinterpret_cast<LqhKeyRef*>(signal->getDataPtrSend());
939     ref->userRef = clientPtr;
940     ref->connectPtr = TcOprec;
941     ref->errorCode = err;
942     ref->transId1 = transid[0];
943     ref->transId2 = transid[1];
944     sendSignal(returnref, GSN_LQHKEYREF, signal,
945                LqhKeyRef::SignalLength, JBB);
946   }
947 }
948 
949 void
sendTCKEYREF(Signal * signal,Uint32 ref,Uint32 routeRef)950 Dbspj::sendTCKEYREF(Signal* signal, Uint32 ref, Uint32 routeRef)
951 {
952   const Uint32 nodeId = refToNode(ref);
953   const bool connectedToNode = getNodeInfo(nodeId).m_connected;
954 
955   if (likely(connectedToNode))
956   {
957     jam();
958     sendSignal(ref, GSN_TCKEYREF, signal, TcKeyRef::SignalLength, JBB);
959   }
960   else
961   {
962     jam();
963     memmove(signal->theData+25, signal->theData, 4*TcKeyRef::SignalLength);
964     RouteOrd* ord = (RouteOrd*)signal->getDataPtrSend();
965     ord->dstRef = ref;
966     ord->srcRef = reference();
967     ord->gsn = GSN_TCKEYREF;
968     ord->cnt = 0;
969     LinearSectionPtr ptr[3];
970     ptr[0].p = signal->theData+25;
971     ptr[0].sz = TcKeyRef::SignalLength;
972     sendSignal(routeRef, GSN_ROUTE_ORD, signal, RouteOrd::SignalLength, JBB,
973                ptr, 1);
974   }
975 }
976 
977 void
sendTCKEYCONF(Signal * signal,Uint32 len,Uint32 ref,Uint32 routeRef)978 Dbspj::sendTCKEYCONF(Signal* signal, Uint32 len, Uint32 ref, Uint32 routeRef)
979 {
980   const Uint32 nodeId = refToNode(ref);
981   const bool connectedToNode = getNodeInfo(nodeId).m_connected;
982 
983   if (likely(connectedToNode))
984   {
985     jam();
986     sendSignal(ref, GSN_TCKEYCONF, signal, len, JBB);
987   }
988   else
989   {
990     jam();
991     memmove(signal->theData+25, signal->theData, 4*len);
992     RouteOrd* ord = (RouteOrd*)signal->getDataPtrSend();
993     ord->dstRef = ref;
994     ord->srcRef = reference();
995     ord->gsn = GSN_TCKEYCONF;
996     ord->cnt = 0;
997     LinearSectionPtr ptr[3];
998     ptr[0].p = signal->theData+25;
999     ptr[0].sz = len;
1000     sendSignal(routeRef, GSN_ROUTE_ORD, signal, RouteOrd::SignalLength, JBB,
1001                ptr, 1);
1002   }
1003 }
1004 
1005 /**
1006  * END - MODULE LQHKEYREQ
1007  */
1008 
1009 
1010 /**
1011  * MODULE SCAN_FRAGREQ
1012  */
1013 void
execSCAN_FRAGREQ(Signal * signal)1014 Dbspj::execSCAN_FRAGREQ(Signal* signal)
1015 {
1016   jamEntry();
1017 
1018   /* Reassemble if the request was fragmented */
1019   if (!assembleFragments(signal))
1020   {
1021     jam();
1022     return;
1023   }
1024 
1025   const ScanFragReq * req = (ScanFragReq *)&signal->theData[0];
1026 
1027 #ifdef DEBUG_SCAN_FRAGREQ
1028   ndbout_c("Incomming SCAN_FRAGREQ ");
1029   printSCAN_FRAGREQ(stdout, signal->getDataPtrSend(),
1030                     ScanFragReq::SignalLength + 2,
1031                     DBLQH);
1032 #endif
1033 
1034   /**
1035    * #0 - ATTRINFO contains tree + parameters
1036    *      (unless StoredProcId is set, when only paramters are sent,
1037    *       but this is not yet implemented)
1038    * #1 - KEYINFO if first op is index scan - contains bounds for first scan
1039    *              if first op is lookup - contains keyinfo for lookup
1040    */
1041   SectionHandle handle = SectionHandle(this, signal);
1042   SegmentedSectionPtr attrPtr;
1043   handle.getSection(attrPtr, ScanFragReq::AttrInfoSectionNum);
1044 
1045   Uint32 err;
1046   Ptr<Request> requestPtr(0, RNIL);
1047   do
1048   {
1049     ArenaHead ah;
1050     err = DbspjErr::OutOfQueryMemory;
1051     if (unlikely(!m_arenaAllocator.seize(ah)))
1052       break;
1053 
1054     if (ERROR_INSERTED_CLEAR(17002))
1055     {
1056       ndbout_c("Injecting OutOfQueryMem error 17002 at line %d file %s",
1057                 __LINE__,  __FILE__);
1058       jam();
1059       break;
1060     }
1061     if (unlikely(!m_request_pool.seize(ah, requestPtr)))
1062     {
1063       jam();
1064       break;
1065     }
1066     new (requestPtr.p) Request(ah);
1067     do_init(requestPtr.p, req, signal->getSendersBlockRef());
1068 
1069     Uint32 len_cnt;
1070     {
1071       SectionReader r0(attrPtr, getSectionSegmentPool());
1072       err = DbspjErr::ZeroLengthQueryTree;
1073       if (unlikely(!r0.getWord(&len_cnt)))
1074         break;
1075     }
1076 
1077     Uint32 len = QueryTree::getLength(len_cnt);
1078     Uint32 cnt = QueryTree::getNodeCnt(len_cnt);
1079 
1080     {
1081       SectionReader treeReader(attrPtr, getSectionSegmentPool());
1082       SectionReader paramReader(attrPtr, getSectionSegmentPool());
1083       paramReader.step(len); // skip over tree to parameters
1084 
1085       Build_context ctx;
1086       ctx.m_resultRef = req->resultRef;
1087       ctx.m_scanPrio = ScanFragReq::getScanPrio(req->requestInfo);
1088       ctx.m_savepointId = req->savePointId;
1089       ctx.m_batch_size_rows = req->batch_size_rows;
1090       ctx.m_start_signal = signal;
1091       ctx.m_senderRef = signal->getSendersBlockRef();
1092 
1093       err = build(ctx, requestPtr, treeReader, paramReader);
1094       if (unlikely(err != 0))
1095         break;
1096 
1097       /**
1098        * Root TreeNode in Request takes ownership of keyPtr
1099        * section when build has completed.
1100        * We are done with attrPtr which is now released.
1101        */
1102       Ptr<TreeNode> rootNodePtr = ctx.m_node_list[0];
1103       if (handle.m_cnt > 1)
1104       {
1105         jam();
1106         const Uint32 keyPtrI = handle.m_ptr[ScanFragReq::KeyInfoSectionNum].i;
1107         rootNodePtr.p->m_send.m_keyInfoPtrI = keyPtrI;
1108       }
1109       release(attrPtr);
1110       handle.clear();
1111     }
1112 
1113     /**
1114      * Store request in list(s)/hash(es)
1115      */
1116     store_scan(requestPtr);
1117 
1118     if (ERROR_INSERTED_CLEAR(17013) ||
1119         unlikely(!requestPtr.p->isScan() || requestPtr.p->m_node_cnt != cnt))
1120     {
1121       jam();
1122       err = DbspjErr::InvalidRequest;
1123       break;
1124     }
1125 
1126     start(signal, requestPtr);
1127     return;
1128   } while (0);
1129 
1130   if (!requestPtr.isNull())
1131   {
1132     jam();
1133     cleanup(requestPtr);
1134   }
1135   releaseSections(handle);  // a NOOP, if we reached 'handle.clear()' above
1136   handle_early_scanfrag_ref(signal, req, err);
1137 }
1138 
1139 void
do_init(Request * requestP,const ScanFragReq * req,Uint32 senderRef)1140 Dbspj::do_init(Request* requestP, const ScanFragReq* req, Uint32 senderRef)
1141 {
1142   requestP->m_bits = 0;
1143   requestP->m_errCode = 0;
1144   requestP->m_state = Request::RS_BUILDING;
1145   requestP->m_node_cnt = 0;
1146   requestP->m_cnt_active = 0;
1147   requestP->m_rows = 0;
1148   requestP->m_active_nodes.clear();
1149   requestP->m_completed_nodes.clear();
1150   requestP->m_outstanding = 0;
1151   requestP->m_senderRef = senderRef;
1152   requestP->m_senderData = req->senderData;
1153   requestP->m_transId[0] = req->transId1;
1154   requestP->m_transId[1] = req->transId2;
1155   requestP->m_rootResultData = req->resultData;
1156   requestP->m_rootFragId = req->fragmentNoKeyLen;
1157   bzero(requestP->m_lookup_node_data, sizeof(requestP->m_lookup_node_data));
1158 #ifdef SPJ_TRACE_TIME
1159   requestP->m_cnt_batches = 0;
1160   requestP->m_sum_rows = 0;
1161   requestP->m_sum_running = 0;
1162   requestP->m_sum_waiting = 0;
1163   requestP->m_save_time = NdbTick_getCurrentTicks();
1164 #endif
1165 }
1166 
1167 void
store_scan(Ptr<Request> requestPtr)1168 Dbspj::store_scan(Ptr<Request> requestPtr)
1169 {
1170   ndbassert(requestPtr.p->isScan());
1171   Ptr<Request> tmp;
1172   bool found = m_scan_request_hash.find(tmp, *requestPtr.p);
1173   ndbrequire(found == false);
1174   m_scan_request_hash.add(requestPtr);
1175 }
1176 
1177 void
handle_early_scanfrag_ref(Signal * signal,const ScanFragReq * _req,Uint32 err)1178 Dbspj::handle_early_scanfrag_ref(Signal* signal,
1179                                  const ScanFragReq * _req,
1180                                  Uint32 err)
1181 {
1182   ScanFragReq req = *_req;
1183   Uint32 senderRef = signal->getSendersBlockRef();
1184 
1185   ScanFragRef * ref = (ScanFragRef*)&signal->theData[0];
1186   ref->senderData = req.senderData;
1187   ref->transId1 = req.transId1;
1188   ref->transId2 = req.transId2;
1189   ref->errorCode = err;
1190   sendSignal(senderRef, GSN_SCAN_FRAGREF, signal,
1191              ScanFragRef::SignalLength, JBB);
1192 }
1193 
1194 /**
1195  * END - MODULE SCAN_FRAGREQ
1196  */
1197 
1198 /**
1199  * MODULE GENERIC
1200  */
1201 Uint32
build(Build_context & ctx,Ptr<Request> requestPtr,SectionReader & tree,SectionReader & param)1202 Dbspj::build(Build_context& ctx,
1203              Ptr<Request> requestPtr,
1204              SectionReader & tree,
1205              SectionReader & param)
1206 {
1207   Uint32 tmp0, tmp1;
1208   Uint32 err = DbspjErr::ZeroLengthQueryTree;
1209   ctx.m_cnt = 0;
1210   ctx.m_scan_cnt = 0;
1211 
1212   tree.getWord(&tmp0);
1213   Uint32 loop = QueryTree::getNodeCnt(tmp0);
1214 
1215   DEBUG("::build()");
1216   err = DbspjErr::InvalidTreeNodeCount;
1217   if (loop == 0 || loop > NDB_SPJ_MAX_TREE_NODES)
1218   {
1219     jam();
1220     goto error;
1221   }
1222 
1223   while (ctx.m_cnt < loop)
1224   {
1225     DEBUG(" - loop " << ctx.m_cnt << " pos: " << tree.getPos().currPos);
1226     tree.peekWord(&tmp0);
1227     param.peekWord(&tmp1);
1228     Uint32 node_op = QueryNode::getOpType(tmp0);
1229     Uint32 node_len = QueryNode::getLength(tmp0);
1230     Uint32 param_op = QueryNodeParameters::getOpType(tmp1);
1231     Uint32 param_len = QueryNodeParameters::getLength(tmp1);
1232 
1233     err = DbspjErr::QueryNodeTooBig;
1234     if (unlikely(node_len >= NDB_ARRAY_SIZE(m_buffer0)))
1235     {
1236       jam();
1237       goto error;
1238     }
1239 
1240     err = DbspjErr::QueryNodeParametersTooBig;
1241     if (unlikely(param_len >= NDB_ARRAY_SIZE(m_buffer1)))
1242     {
1243       jam();
1244       goto error;
1245     }
1246 
1247     err = DbspjErr::InvalidTreeNodeSpecification;
1248     if (unlikely(tree.getWords(m_buffer0, node_len) == false))
1249     {
1250       jam();
1251       goto error;
1252     }
1253 
1254     err = DbspjErr::InvalidTreeParametersSpecification;
1255     if (unlikely(param.getWords(m_buffer1, param_len) == false))
1256     {
1257       jam();
1258       goto error;
1259     }
1260 
1261 #if defined(DEBUG_LQHKEYREQ) || defined(DEBUG_SCAN_FRAGREQ)
1262     printf("node: ");
1263     for (Uint32 i = 0; i<node_len; i++)
1264       printf("0x%.8x ", m_buffer0[i]);
1265     printf("\n");
1266 
1267     printf("param: ");
1268     for (Uint32 i = 0; i<param_len; i++)
1269       printf("0x%.8x ", m_buffer1[i]);
1270     printf("\n");
1271 #endif
1272 
1273     err = DbspjErr::UnknowQueryOperation;
1274     if (unlikely(node_op != param_op))
1275     {
1276       jam();
1277       goto error;
1278     }
1279     if (ERROR_INSERTED_CLEAR(17006))
1280     {
1281       ndbout_c("Injecting UnknowQueryOperation error 17006 at line %d file %s",
1282                 __LINE__,  __FILE__);
1283       jam();
1284       goto error;
1285     }
1286 
1287     const OpInfo* info = getOpInfo(node_op);
1288     if (unlikely(info == 0))
1289     {
1290       jam();
1291       goto error;
1292     }
1293 
1294     QueryNode* qn = (QueryNode*)m_buffer0;
1295     QueryNodeParameters * qp = (QueryNodeParameters*)m_buffer1;
1296     qn->len = node_len;
1297     qp->len = param_len;
1298     err = (this->*(info->m_build))(ctx, requestPtr, qn, qp);
1299     if (unlikely(err != 0))
1300     {
1301       jam();
1302       goto error;
1303     }
1304 
1305     /**
1306      * only first node gets access to signal
1307      */
1308     ctx.m_start_signal = 0;
1309 
1310     ndbrequire(ctx.m_cnt < NDB_ARRAY_SIZE(ctx.m_node_list));
1311     ctx.m_cnt++;
1312   }
1313   requestPtr.p->m_node_cnt = ctx.m_cnt;
1314 
1315   if (ctx.m_scan_cnt > 1)
1316   {
1317     jam();
1318     requestPtr.p->m_bits |= Request::RT_MULTI_SCAN;
1319   }
1320 
1321   // Construct RowBuffers where required
1322   err = initRowBuffers(requestPtr);
1323   if (unlikely(err != 0))
1324   {
1325     jam();
1326     goto error;
1327   }
1328 
1329   return 0;
1330 
1331 error:
1332   jam();
1333   return err;
1334 }
1335 
1336 /**
1337  * initRowBuffers will decide row-buffering strategy, and init
1338  * the RowBuffers where required.
1339  */
1340 Uint32
initRowBuffers(Ptr<Request> requestPtr)1341 Dbspj::initRowBuffers(Ptr<Request> requestPtr)
1342 {
1343   jam();
1344   /**
1345    * Execution of scan request requires restrictions
1346    * of how lookup-children issues their LQHKEYREQs:
1347    * A large scan result with many parallel lookup
1348    * siblings can easily flood the job buffers with too many
1349    * REQs. So we set up an 'execution plan' for how a
1350    * scan request should be executed:
1351    *
1352    * NOTE: It could make sense to do the same for a lookup Req.
1353    * However, CONF/REF for these leafs operations are not
1354    * returned to SPJ. Thus, there are no way to know when
1355    * the operation has completed, and other operation could
1356    * be resumed.
1357    *
1358    * As a lookup request does not have the same potential for
1359    * producing lots of LQHKEYREQs, we believe/hope the risk
1360    * of flooding job buffers for a lookup request can be ignored.
1361    */
1362   if (requestPtr.p->isScan())
1363   {
1364     jam();
1365     Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
1366     Ptr<TreeNode> treeRootPtr;
1367 
1368     list.first(treeRootPtr);   // treeRootPtr is a scan
1369     ndbrequire(!treeRootPtr.isNull());
1370     buildExecPlan(requestPtr, treeRootPtr, NullTreeNodePtr);
1371   }
1372 
1373   /**
1374    * Init ROW_BUFFERS iff Request has to buffer any rows.
1375    */
1376   if (requestPtr.p->m_bits & Request::RT_ROW_BUFFERS)
1377   {
1378     jam();
1379 
1380     /**
1381      * Iff, multi-scan is non-bushy (normal case)
1382      *   we don't strictly need BUFFER_VAR for RT_ROW_BUFFERS
1383      *   but could instead pop-row stack frame,
1384      *     however this is not implemented...
1385      *
1386      * so, currently use BUFFER_VAR if 'RT_MULTI_SCAN'
1387      *
1388      * NOTE: This should easily be solvable by having a
1389      *       RowBuffer for each TreeNode instead
1390      */
1391     if (requestPtr.p->m_bits & Request::RT_MULTI_SCAN)
1392     {
1393       jam();
1394       requestPtr.p->m_rowBuffer.init(BUFFER_VAR);
1395     }
1396     else
1397     {
1398       jam();
1399       requestPtr.p->m_rowBuffer.init(BUFFER_STACK);
1400     }
1401 
1402     Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
1403     Ptr<TreeNode> treeNodePtr;
1404     for (list.first(treeNodePtr); !treeNodePtr.isNull(); list.next(treeNodePtr))
1405     {
1406       jam();
1407       ndbassert(treeNodePtr.p->m_batch_size > 0);
1408       /**
1409        * Construct a List or Map RowCollection for those TreeNodes
1410        * requiring rows to be buffered.
1411        */
1412       if (treeNodePtr.p->m_bits & TreeNode::T_ROW_BUFFER_MAP)
1413       {
1414         jam();
1415         treeNodePtr.p->m_rows.construct (RowCollection::COLLECTION_MAP,
1416                                          requestPtr.p->m_rowBuffer,
1417                                          treeNodePtr.p->m_batch_size);
1418       }
1419       else if (treeNodePtr.p->m_bits & TreeNode::T_ROW_BUFFER)
1420       {
1421         jam();
1422         treeNodePtr.p->m_rows.construct (RowCollection::COLLECTION_LIST,
1423                                          requestPtr.p->m_rowBuffer,
1424                                          treeNodePtr.p->m_batch_size);
1425       }
1426     }
1427   }
1428 
1429   return 0;
1430 } // Dbspj::initRowBuffers
1431 
1432 /**
1433  * buildExecPlan():
1434  *   Decides the order/pace in which the different
1435  *   TreeNodes should be executed.
1436  *   Currently it is only used to insert sequentialization point in
1437  *   the execution of bushy lookup-child nodes. (aka star-join).
1438  *   This is done in order to avoid too many LQHKEYREQ-signals to
1439  *   be sent which could overflow the job buffers.
1440  *
1441  *   For each branch of TreeNodes starting with a scan, we identify
1442  *   any 'bushines' among its lookup children. We set up a left -> right
1443  *   execution order among these such that:
1444  *    - A child lookup operation can not be REQuested before we
1445  *      either has executed a TRANSID_AI from the scan parent,
1446  *      or executed a CONF / REF from another lookup child.
1447  *    - When a lookup CONF or REF is executed, its TreeNode is
1448  *      annotated with 'resume' info which decides if/which TreeNode
1449  *      we should execute next.
1450  *
1451  *   This will maintain a strict 1:1 fanout between incomming rows
1452  *   being processed, and new row REQuest being produced.
1453  *   Thus avoiding that large scan result will flood the jobb buffers
1454  *   with too many lookup requests.
1455  *
1456  * FUTURE:
1457  *   For join children where child execution now is T_EXEC_SEQUENTIAL,
1458  *   it should be relatively simple to extend SPJ to do 'inner join'.
1459  *   As we at these sequential point knows wheteher the previous
1460  *   joined children didn't found any matches, we can skip REQuesting
1461  *   rows from other children having the same parent row.
1462  */
1463 void
buildExecPlan(Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,Ptr<TreeNode> nextLookup)1464 Dbspj::buildExecPlan(Ptr<Request>  requestPtr,
1465                      Ptr<TreeNode> treeNodePtr,
1466                      Ptr<TreeNode> nextLookup)
1467 {
1468   Uint32 lookupChildren[NDB_SPJ_MAX_TREE_NODES];
1469   Uint32 lookupChildCnt = 0;
1470 
1471   /**
1472    * Need to iterate lookup childs in reverse order to set up 'next'
1473    * operations. As this is not possible throught ConstDataBufferIterator,
1474    * store any lookup childs into temp array childPtrI[].
1475    * Scan childs are parents of new 'scan -> lookup' branches.
1476    */
1477   {
1478     LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
1479     Local_dependency_map childList(pool, treeNodePtr.p->m_dependent_nodes);
1480     Dependency_map::ConstDataBufferIterator it;
1481     for (childList.first(it); !it.isNull(); childList.next(it))
1482     {
1483       jam();
1484       Ptr<TreeNode> childPtr;
1485       m_treenode_pool.getPtr(childPtr, *it.data);
1486 
1487       if (childPtr.p->m_info == &g_LookupOpInfo)
1488       {
1489         jam();
1490         lookupChildren[lookupChildCnt++] = *it.data;
1491       }
1492       else
1493       {
1494         // Build a new plan starting from this scan operation
1495         jam();
1496         buildExecPlan(requestPtr, childPtr, NullTreeNodePtr);
1497       }
1498     }
1499   }
1500 
1501   /**
1502    * Lookup children might have to wait for previous LQHKEYREQs to
1503    * complete before they are allowed to send their own requests.
1504    * (In order to not overfill jobb buffers)
1505    */
1506   if (treeNodePtr.p->m_info == &g_LookupOpInfo &&
1507       !nextLookup.isNull())
1508   {
1509     jam();
1510     /**
1511      * Annotate that:
1512      *  - 'nextLookup' is not allowed to start immediately.
1513      *  - 'treeNode' restart 'nextLookup' when it completes
1514      */
1515     nextLookup.p->m_bits |= TreeNode::T_EXEC_SEQUENTIAL;
1516 
1517     if (lookupChildCnt==0)  //'isLeaf() or only scan children
1518     {
1519       jam();
1520       treeNodePtr.p->m_resumeEvents = TreeNode::TN_RESUME_CONF |
1521                                       TreeNode::TN_RESUME_REF;
1522       DEBUG("ExecPlan: 'REF/CONF' from node " << treeNodePtr.p->m_node_no
1523          << " resumes node " << nextLookup.p->m_node_no);
1524     }
1525     else
1526     {
1527       /**
1528        * Will REQuest from one of its child lookups if CONF,
1529        * so we don't resume another TreeNode in addition.
1530        */
1531       jam();
1532       treeNodePtr.p->m_resumeEvents = TreeNode::TN_RESUME_REF;
1533       DEBUG("ExecPlan: 'REF' from node " << treeNodePtr.p->m_node_no
1534          << " resumes node " << nextLookup.p->m_node_no);
1535     }
1536     treeNodePtr.p->m_resumePtrI = nextLookup.i;
1537 
1538     /**
1539      * When we T_EXEC_SEQUENTIAL, TreeNode will iterate its
1540      * parent rows in order to create new REQ's as previous
1541      * are completed (CONF or REF).
1542      *  - Prepare RowIterator for parent rows
1543      *  - Buffer rows to be iterated in the parent node
1544      */
1545     {
1546       jam();
1547 
1548       ndbassert(nextLookup.p->m_parentPtrI != RNIL);
1549       Ptr<TreeNode> parentPtr;
1550       m_treenode_pool.getPtr(parentPtr, nextLookup.p->m_parentPtrI);
1551       parentPtr.p->m_bits |= TreeNode::T_ROW_BUFFER
1552                            | TreeNode::T_ROW_BUFFER_MAP;
1553       requestPtr.p->m_bits |= Request::RT_ROW_BUFFERS;
1554 
1555       DEBUG("ExecPlan: rows from node " << parentPtr.p->m_node_no
1556          << " are buffered");
1557     }
1558   }
1559 
1560   /**
1561    * Recursively build exec. plan for any lookup child.
1562    */
1563   for (int i = lookupChildCnt-1; i >= 0; i--)
1564   {
1565     jam();
1566     Ptr<TreeNode> childPtr;
1567     m_treenode_pool.getPtr(childPtr, lookupChildren[i]);
1568     ndbassert(childPtr.p->m_info == &g_LookupOpInfo);
1569 
1570     buildExecPlan(requestPtr, childPtr, nextLookup);
1571     nextLookup = childPtr;
1572   }
1573 } // Dbspj::buildExecPlan
1574 
1575 Uint32
createNode(Build_context & ctx,Ptr<Request> requestPtr,Ptr<TreeNode> & treeNodePtr)1576 Dbspj::createNode(Build_context& ctx, Ptr<Request> requestPtr,
1577                   Ptr<TreeNode> & treeNodePtr)
1578 {
1579   /**
1580    * In the future, we can have different TreeNode-allocation strategies
1581    *   that can be setup using the Build_context
1582    *
1583    */
1584   if (ERROR_INSERTED_CLEAR(17005))
1585   {
1586     ndbout_c("Injecting OutOfOperations error 17005 at line %d file %s",
1587              __LINE__,  __FILE__);
1588     jam();
1589     return DbspjErr::OutOfOperations;
1590   }
1591   if (m_treenode_pool.seize(requestPtr.p->m_arena, treeNodePtr))
1592   {
1593     DEBUG("createNode - seize -> ptrI: " << treeNodePtr.i);
1594     new (treeNodePtr.p) TreeNode(requestPtr.i);
1595     ctx.m_node_list[ctx.m_cnt] = treeNodePtr;
1596     Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
1597     list.addLast(treeNodePtr);
1598     treeNodePtr.p->m_node_no = ctx.m_cnt;
1599     return 0;
1600   }
1601   return DbspjErr::OutOfOperations;
1602 }
1603 
1604 void
start(Signal * signal,Ptr<Request> requestPtr)1605 Dbspj::start(Signal* signal,
1606              Ptr<Request> requestPtr)
1607 {
1608   Uint32 err = 0;
1609   if (requestPtr.p->m_bits & Request::RT_NEED_PREPARE)
1610   {
1611     jam();
1612     requestPtr.p->m_outstanding = 0;
1613     requestPtr.p->m_state = Request::RS_PREPARING;
1614 
1615     Ptr<TreeNode> nodePtr;
1616     Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
1617     for (list.first(nodePtr); !nodePtr.isNull(); list.next(nodePtr))
1618     {
1619       jam();
1620       /**
1621        * Verify existence of all involved tables.
1622        */
1623       err = checkTableError(nodePtr);
1624       if (unlikely(err))
1625       {
1626         jam();
1627         break;
1628       }
1629       ndbrequire(nodePtr.p->m_info != 0);
1630       if (nodePtr.p->m_info->m_prepare != 0)
1631       {
1632         jam();
1633         (this->*(nodePtr.p->m_info->m_prepare))(signal, requestPtr, nodePtr);
1634       }
1635     }
1636 
1637     /**
1638      * preferably RT_NEED_PREPARE should only be set if blocking
1639      * calls are used, in which case m_outstanding should have been increased
1640      */
1641     ndbassert(err || requestPtr.p->m_outstanding);
1642   }
1643   if (unlikely(err))
1644   {
1645     jam();
1646     abort(signal, requestPtr, err);
1647     return;
1648   }
1649 
1650   checkPrepareComplete(signal, requestPtr, 0);
1651 }
1652 
1653 void
checkPrepareComplete(Signal * signal,Ptr<Request> requestPtr,Uint32 cnt)1654 Dbspj::checkPrepareComplete(Signal * signal, Ptr<Request> requestPtr,
1655                             Uint32 cnt)
1656 {
1657   ndbrequire(requestPtr.p->m_outstanding >= cnt);
1658   requestPtr.p->m_outstanding -= cnt;
1659 
1660   if (requestPtr.p->m_outstanding == 0)
1661   {
1662     jam();
1663 
1664     if (unlikely((requestPtr.p->m_state & Request::RS_ABORTING) != 0))
1665     {
1666       jam();
1667       batchComplete(signal, requestPtr);
1668       return;
1669     }
1670 
1671     Ptr<TreeNode> nodePtr;
1672     {
1673       Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
1674       ndbrequire(list.first(nodePtr));
1675     }
1676     Uint32 err = checkTableError(nodePtr);
1677     if (unlikely(err != 0))
1678     {
1679       jam();
1680       abort(signal, requestPtr, err);
1681       return;
1682     }
1683 
1684     requestPtr.p->m_state = Request::RS_RUNNING;
1685     ndbrequire(nodePtr.p->m_info != 0 && nodePtr.p->m_info->m_start != 0);
1686     (this->*(nodePtr.p->m_info->m_start))(signal, requestPtr, nodePtr);
1687   }
1688 }
1689 
1690 /**
1691  * Check if all outstanding work for 'Request' has completed.
1692  */
1693 void
checkBatchComplete(Signal * signal,Ptr<Request> requestPtr,Uint32 cnt)1694 Dbspj::checkBatchComplete(Signal * signal, Ptr<Request> requestPtr,
1695                           Uint32 cnt)
1696 {
1697   ndbrequire(requestPtr.p->m_outstanding >= cnt);
1698   requestPtr.p->m_outstanding -= cnt;
1699 
1700   if (requestPtr.p->m_outstanding == 0)
1701   {
1702     jam();
1703     batchComplete(signal, requestPtr);
1704   }
1705 }
1706 
1707 /**
1708  * Request has completed all outstanding work.
1709  * Signal API about completion status and cleanup
1710  * resources if appropriate.
1711  */
1712 void
batchComplete(Signal * signal,Ptr<Request> requestPtr)1713 Dbspj::batchComplete(Signal* signal, Ptr<Request> requestPtr)
1714 {
1715   ndbrequire(requestPtr.p->m_outstanding == 0); // "definition" of batchComplete
1716 
1717   bool is_complete = requestPtr.p->m_cnt_active == 0;
1718   bool need_complete_phase = requestPtr.p->m_bits & Request::RT_NEED_COMPLETE;
1719 
1720   if (requestPtr.p->isLookup())
1721   {
1722     ndbassert(requestPtr.p->m_cnt_active == 0);
1723   }
1724 
1725   if (!is_complete || (is_complete && need_complete_phase == false))
1726   {
1727     /**
1728      * one batch complete, and either
1729      *   - request not complete
1730      *   - or not complete_phase needed
1731      */
1732     jam();
1733 
1734     if ((requestPtr.p->m_state & Request::RS_ABORTING) != 0)
1735     {
1736       ndbassert(is_complete);
1737     }
1738 
1739     prepareNextBatch(signal, requestPtr);
1740     sendConf(signal, requestPtr, is_complete);
1741   }
1742   else if (is_complete && need_complete_phase)
1743   {
1744     jam();
1745     /**
1746      * run complete-phase
1747      */
1748     complete(signal, requestPtr);
1749     return;
1750   }
1751 
1752   if (requestPtr.p->m_cnt_active == 0)
1753   {
1754     jam();
1755     /**
1756      * Entire Request completed
1757      */
1758     cleanup(requestPtr);
1759   }
1760   else
1761   {
1762     jam();
1763     /**
1764      * Cleanup the TreeNode branches getting another
1765      * batch of result rows.
1766      */
1767     cleanupBatch(requestPtr);
1768   }
1769 }
1770 
1771 /**
1772  * Locate next TreeNode(s) to retrieve more rows from.
1773  *
1774  *   Calculate set of the 'm_active_nodes' we will receive from in NEXTREQ.
1775  *   Add these TreeNodes to the cursor list to be iterated.
1776  */
1777 void
prepareNextBatch(Signal * signal,Ptr<Request> requestPtr)1778 Dbspj::prepareNextBatch(Signal* signal, Ptr<Request> requestPtr)
1779 {
1780   requestPtr.p->m_cursor_nodes.init();
1781   requestPtr.p->m_active_nodes.clear();
1782 
1783   if (requestPtr.p->m_cnt_active == 0)
1784   {
1785     jam();
1786     return;
1787   }
1788 
1789   DEBUG("prepareNextBatch, request: " << requestPtr.i);
1790 
1791   if (requestPtr.p->m_bits & Request::RT_REPEAT_SCAN_RESULT)
1792   {
1793     /**
1794      * If REPEAT_SCAN_RESULT we handle bushy scans by return more *new* rows
1795      * from only one of the active child scans. If there are multiple
1796      * bushy scans not being able to return their current result set in
1797      * a single batch, result sets from the other child scans are repeated
1798      * until all rows has been returned to the API client.
1799      *
1800      * Hence, the cross joined results from the bushy scans are partly
1801      * produced within the SPJ block on a 'batchsize granularity',
1802      * and partly is the responsibility of the API-client by iterating
1803      * the result rows within the current result batches.
1804      * (Opposed to non-REPEAT_SCAN_RESULT, the client only have to care about
1805      *  the current batched rows - no buffering is required)
1806      */
1807     jam();
1808     Ptr<TreeNode> nodePtr;
1809     Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
1810 
1811     /**
1812      * Locate last 'TN_ACTIVE' TreeNode which is the only one choosen
1813      * to return more *new* rows.
1814      */
1815     for (list.last(nodePtr); !nodePtr.isNull(); list.prev(nodePtr))
1816     {
1817       if (nodePtr.p->m_state == TreeNode::TN_ACTIVE)
1818       {
1819         jam();
1820         DEBUG("Will fetch more from 'active' m_node_no: " << nodePtr.p->m_node_no);
1821         /**
1822          * A later NEXTREQ will request a *new* batch of rows from this TreeNode.
1823          */
1824         registerActiveCursor(requestPtr, nodePtr);
1825         break;
1826       }
1827     }
1828 
1829     /**
1830      *  Restart/repeat other (index scan) child batches which:
1831      *    - Being 'after' nodePtr located above.
1832      *    - Not being an ancestor of (depends on) any 'active' TreeNode.
1833      *      (As these scans are started when rows from these parent nodes
1834      *      arrives.)
1835      */
1836     if (!nodePtr.isNull())
1837     {
1838       jam();
1839       DEBUG("Calculate 'active', w/ cursor on m_node_no: " << nodePtr.p->m_node_no);
1840 
1841       /* Restart any partial index-scans after this 'TN_ACTIVE' TreeNode */
1842       for (list.next(nodePtr); !nodePtr.isNull(); list.next(nodePtr))
1843       {
1844         jam();
1845         if (!nodePtr.p->m_ancestors.overlaps (requestPtr.p->m_active_nodes))
1846         {
1847           jam();
1848           ndbrequire(nodePtr.p->m_state != TreeNode::TN_ACTIVE);
1849           ndbrequire(nodePtr.p->m_info != 0);
1850           if (nodePtr.p->m_info->m_parent_batch_repeat != 0)
1851           {
1852             jam();
1853             (this->*(nodePtr.p->m_info->m_parent_batch_repeat))(signal,
1854                                                                 requestPtr,
1855                                                                 nodePtr);
1856           }
1857         }
1858       }
1859     } // if (!nodePtr.isNull()
1860   }
1861   else  // not 'RT_REPEAT_SCAN_RESULT'
1862   {
1863     /**
1864      * If not REPEAT_SCAN_RESULT multiple active TreeNodes may return their
1865      * remaining result simultaneously. In case of bushy-scans, these
1866      * concurrent result streams are cross joins of each other
1867      * in SQL terms. In order to produce the cross joined result, it is
1868      * the responsibility of the API-client to buffer these streams and
1869      * iterate them to produce the cross join.
1870      */
1871     jam();
1872     Ptr<TreeNode> nodePtr;
1873     Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
1874     TreeNodeBitMask ancestors_of_active;
1875 
1876     for (list.last(nodePtr); !nodePtr.isNull(); list.prev(nodePtr))
1877     {
1878       /**
1879        * If we are active (i.e not consumed all rows originating
1880        *   from parent rows) and we are not in the set of parents
1881        *   for any active child:
1882        *
1883        * Then, this is a position that execSCAN_NEXTREQ should continue
1884        */
1885       if (nodePtr.p->m_state == TreeNode::TN_ACTIVE &&
1886          !ancestors_of_active.get (nodePtr.p->m_node_no))
1887       {
1888         jam();
1889         DEBUG("Add 'active' m_node_no: " << nodePtr.p->m_node_no);
1890         registerActiveCursor(requestPtr, nodePtr);
1891         ancestors_of_active.bitOR(nodePtr.p->m_ancestors);
1892       }
1893     }
1894   } // if (RT_REPEAT_SCAN_RESULT)
1895 
1896   DEBUG("Calculated 'm_active_nodes': " << requestPtr.p->m_active_nodes.rep.data[0]);
1897 }
1898 
1899 void
registerActiveCursor(Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)1900 Dbspj::registerActiveCursor(Ptr<Request> requestPtr, Ptr<TreeNode> treeNodePtr)
1901 {
1902   Uint32 bit = treeNodePtr.p->m_node_no;
1903   ndbrequire(!requestPtr.p->m_active_nodes.get(bit));
1904   requestPtr.p->m_active_nodes.set(bit);
1905 
1906   Local_TreeNodeCursor_list list(m_treenode_pool, requestPtr.p->m_cursor_nodes);
1907 #ifdef VM_TRACE
1908   {
1909     Ptr<TreeNode> nodePtr;
1910     for (list.first(nodePtr); !nodePtr.isNull(); list.next(nodePtr))
1911     {
1912       ndbrequire(nodePtr.i != treeNodePtr.i);
1913     }
1914   }
1915 #endif
1916   list.addFirst(treeNodePtr);
1917 }
1918 
1919 void
sendConf(Signal * signal,Ptr<Request> requestPtr,bool is_complete)1920 Dbspj::sendConf(Signal* signal, Ptr<Request> requestPtr, bool is_complete)
1921 {
1922   if (requestPtr.p->isScan())
1923   {
1924     if (unlikely((requestPtr.p->m_state & Request::RS_WAITING) != 0))
1925     {
1926       jam();
1927       /**
1928        * We aborted request ourselves (due to node-failure ?)
1929        *   but TC haven't contacted us...so we can't reply yet...
1930        */
1931       ndbrequire(is_complete);
1932       ndbrequire((requestPtr.p->m_state & Request::RS_ABORTING) != 0);
1933       return;
1934     }
1935 
1936     if (requestPtr.p->m_errCode == 0)
1937     {
1938       jam();
1939       ScanFragConf * conf=
1940         reinterpret_cast<ScanFragConf*>(signal->getDataPtrSend());
1941       conf->senderData = requestPtr.p->m_senderData;
1942       conf->transId1 = requestPtr.p->m_transId[0];
1943       conf->transId2 = requestPtr.p->m_transId[1];
1944       conf->completedOps = requestPtr.p->m_rows;
1945       conf->fragmentCompleted = is_complete ? 1 : 0;
1946       conf->total_len = requestPtr.p->m_active_nodes.rep.data[0];
1947 
1948       c_Counters.incr_counter(CI_SCAN_BATCHES_RETURNED, 1);
1949       c_Counters.incr_counter(CI_SCAN_ROWS_RETURNED, requestPtr.p->m_rows);
1950 
1951 #ifdef SPJ_TRACE_TIME
1952       const NDB_TICKS now = NdbTick_getCurrentTicks();
1953       const NDB_TICKS then = requestPtr.p->m_save_time;
1954       const Uint64 diff = NdbTick_Elapsed(then,now).microSec();
1955 
1956       requestPtr.p->m_sum_rows += requestPtr.p->m_rows;
1957       requestPtr.p->m_sum_running += Uint32(diff);
1958       requestPtr.p->m_cnt_batches++;
1959       requestPtr.p->m_save_time = now;
1960 
1961       if (is_complete)
1962       {
1963         Uint32 cnt = requestPtr.p->m_cnt_batches;
1964         ndbout_c("batches: %u avg_rows: %u avg_running: %u avg_wait: %u",
1965                  cnt,
1966                  (requestPtr.p->m_sum_rows / cnt),
1967                  (requestPtr.p->m_sum_running / cnt),
1968                  cnt == 1 ? 0 : requestPtr.p->m_sum_waiting / (cnt - 1));
1969       }
1970 #endif
1971 
1972       /**
1973        * reset for next batch
1974        */
1975       requestPtr.p->m_rows = 0;
1976       if (!is_complete)
1977       {
1978         jam();
1979         requestPtr.p->m_state |= Request::RS_WAITING;
1980       }
1981 #ifdef DEBUG_SCAN_FRAGREQ
1982       ndbout_c("Dbspj::sendConf() sending SCAN_FRAGCONF ");
1983       printSCAN_FRAGCONF(stdout, signal->getDataPtrSend(),
1984                          conf->total_len,
1985                          DBLQH);
1986 #endif
1987       sendSignal(requestPtr.p->m_senderRef, GSN_SCAN_FRAGCONF, signal,
1988                  ScanFragConf::SignalLength, JBB);
1989     }
1990     else
1991     {
1992       jam();
1993       ndbrequire(is_complete);
1994       ScanFragRef * ref=
1995         reinterpret_cast<ScanFragRef*>(signal->getDataPtrSend());
1996       ref->senderData = requestPtr.p->m_senderData;
1997       ref->transId1 = requestPtr.p->m_transId[0];
1998       ref->transId2 = requestPtr.p->m_transId[1];
1999       ref->errorCode = requestPtr.p->m_errCode;
2000 
2001       sendSignal(requestPtr.p->m_senderRef, GSN_SCAN_FRAGREF, signal,
2002                  ScanFragRef::SignalLength, JBB);
2003     }
2004   }
2005   else
2006   {
2007     ndbassert(is_complete);
2008     if (requestPtr.p->m_errCode)
2009     {
2010       jam();
2011       Uint32 resultRef = getResultRef(requestPtr);
2012       TcKeyRef* ref = (TcKeyRef*)signal->getDataPtr();
2013       ref->connectPtr = requestPtr.p->m_senderData;
2014       ref->transId[0] = requestPtr.p->m_transId[0];
2015       ref->transId[1] = requestPtr.p->m_transId[1];
2016       ref->errorCode = requestPtr.p->m_errCode;
2017       ref->errorData = 0;
2018 
2019       sendTCKEYREF(signal, resultRef, requestPtr.p->m_senderRef);
2020     }
2021   }
2022 }
2023 
2024 Uint32
getResultRef(Ptr<Request> requestPtr)2025 Dbspj::getResultRef(Ptr<Request> requestPtr)
2026 {
2027   Ptr<TreeNode> nodePtr;
2028   Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
2029   for (list.first(nodePtr); !nodePtr.isNull(); list.next(nodePtr))
2030   {
2031     if (nodePtr.p->m_info == &g_LookupOpInfo)
2032     {
2033       jam();
2034       return nodePtr.p->m_lookup_data.m_api_resultRef;
2035     }
2036   }
2037   ndbrequire(false);
2038   return 0;
2039 }
2040 
2041 /**
2042  * Cleanup resources in preparation for a SCAN_NEXTREQ
2043  * requesting a new batch of rows.
2044  */
2045 void
cleanupBatch(Ptr<Request> requestPtr)2046 Dbspj::cleanupBatch(Ptr<Request> requestPtr)
2047 {
2048   /**
2049    * Needs to be atleast 1 active otherwise we should have
2050    *   taken the Request cleanup "path" in batchComplete
2051    */
2052   ndbassert(requestPtr.p->m_cnt_active >= 1);
2053 
2054   /**
2055    * Release any buffered rows for the TreeNode branches
2056    * getting new rows.
2057    */
2058   if ((requestPtr.p->m_bits & Request::RT_ROW_BUFFERS) != 0)
2059   {
2060     if ((requestPtr.p->m_bits & Request::RT_MULTI_SCAN) != 0)
2061     {
2062       jam();
2063       /**
2064        * A MULTI_SCAN may selectively retrieve rows from only
2065        * some of the (scan-) branches in the Request.
2066        * Selectively release from only these brances.
2067        */
2068       releaseScanBuffers(requestPtr);
2069     }
2070     else
2071     {
2072       jam();
2073       /**
2074        * if not multiple scans in request, simply release all pages allocated
2075        * for row buffers (all rows will be released anyway)
2076        */
2077       // Root node should be the one and only being active
2078       ndbassert(requestPtr.p->m_cnt_active == 1);
2079       ndbassert(requestPtr.p->m_active_nodes.get(0));
2080       releaseRequestBuffers(requestPtr);
2081     }
2082   } //RT_ROW_BUFFERS
2083 
2084 
2085   Ptr<TreeNode> treeNodePtr;
2086   Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
2087 
2088   for (list.first(treeNodePtr); !treeNodePtr.isNull(); list.next(treeNodePtr))
2089   {
2090     /**
2091      * Re-init row buffer structures for those treeNodes getting more rows
2092      * in the following NEXTREQ, including all its childs.
2093      */
2094     if (requestPtr.p->m_active_nodes.get(treeNodePtr.p->m_node_no) ||
2095         requestPtr.p->m_active_nodes.overlaps(treeNodePtr.p->m_ancestors))
2096     {
2097       jam();
2098       treeNodePtr.p->m_rows.init();
2099     }
2100 
2101     /**
2102      * Do further cleanup in treeNodes having ancestor getting more rows.
2103      * (Which excludes the restarted treeNode itself)
2104      */
2105     if (requestPtr.p->m_active_nodes.overlaps(treeNodePtr.p->m_ancestors))
2106     {
2107       jam();
2108       /**
2109        * Common TreeNode cleanup:
2110        * Release list of deferred operations which may refer
2111        * buffered rows released above.
2112        */
2113       LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
2114       {
2115         Local_correlation_list correlations(pool, treeNodePtr.p->m_deferred.m_correlations);
2116         correlations.release();
2117       }
2118       treeNodePtr.p->m_deferred.init();
2119 
2120       /**
2121        * TreeNode-type specific cleanup.
2122        */
2123       if (treeNodePtr.p->m_info->m_parent_batch_cleanup != 0)
2124       {
2125         jam();
2126         (this->*(treeNodePtr.p->m_info->m_parent_batch_cleanup))(requestPtr,
2127                                                                  treeNodePtr);
2128       }
2129     }
2130   }
2131 }
2132 
2133 void
releaseScanBuffers(Ptr<Request> requestPtr)2134 Dbspj::releaseScanBuffers(Ptr<Request> requestPtr)
2135 {
2136   Ptr<TreeNode> treeNodePtr;
2137   Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
2138 
2139   for (list.first(treeNodePtr); !treeNodePtr.isNull(); list.next(treeNodePtr))
2140   {
2141     /**
2142      * Release buffered rows for all treeNodes getting more rows
2143      * in the following NEXTREQ, including all its childs.
2144      */
2145     if (requestPtr.p->m_active_nodes.get(treeNodePtr.p->m_node_no) ||
2146         requestPtr.p->m_active_nodes.overlaps(treeNodePtr.p->m_ancestors))
2147     {
2148       if (treeNodePtr.p->m_bits & TreeNode::T_ROW_BUFFER)
2149       {
2150         jam();
2151         releaseNodeRows(requestPtr, treeNodePtr);
2152       }
2153     }
2154   }
2155 }
2156 
2157 void
releaseNodeRows(Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)2158 Dbspj::releaseNodeRows(Ptr<Request> requestPtr, Ptr<TreeNode> treeNodePtr)
2159 {
2160   /**
2161    * Release all rows associated with tree node
2162    */
2163   DEBUG("releaseNodeRows"
2164      << ", node: " << treeNodePtr.p->m_node_no
2165      << ", request: " << requestPtr.i
2166   );
2167 
2168   ndbassert(treeNodePtr.p->m_bits & TreeNode::T_ROW_BUFFER);
2169 
2170   Uint32 cnt = 0;
2171   RowIterator iter;
2172   for (first(treeNodePtr.p->m_rows, iter); !iter.isNull(); )
2173   {
2174     jam();
2175     RowRef pos = iter.m_base.m_ref;
2176     next(iter);
2177     releaseRow(treeNodePtr.p->m_rows, pos);
2178     cnt ++;
2179   }
2180   DEBUG("RowIterator: released " << cnt << " rows!");
2181 
2182   if (treeNodePtr.p->m_rows.m_type == RowCollection::COLLECTION_MAP)
2183   {
2184     jam();
2185     // Release the (now empty) RowMap
2186     RowMap& map = treeNodePtr.p->m_rows.m_map;
2187     if (!map.isNull())
2188     {
2189       jam();
2190       RowRef ref;
2191       map.copyto(ref);
2192       releaseRow(treeNodePtr.p->m_rows, ref);  // Map was allocated in row memory
2193     }
2194   }
2195 }
2196 
2197 void
releaseRow(RowCollection & collection,RowRef pos)2198 Dbspj::releaseRow(RowCollection& collection, RowRef pos)
2199 {
2200   // only when var-alloc, or else stack will be popped wo/ consideration
2201   // to individual rows
2202   ndbassert(collection.m_base.m_rowBuffer != NULL);
2203   ndbassert(collection.m_base.m_rowBuffer->m_type == BUFFER_VAR);
2204   ndbassert(pos.m_alloc_type == BUFFER_VAR);
2205 
2206   RowBuffer& rowBuffer = *collection.m_base.m_rowBuffer;
2207   Ptr<RowPage> ptr;
2208   m_page_pool.getPtr(ptr, pos.m_page_id);
2209   ((Var_page*)ptr.p)->free_record(pos.m_page_pos, Var_page::CHAIN);
2210   Uint32 free_space = ((Var_page*)ptr.p)->free_space;
2211   if (free_space == Var_page::DATA_WORDS - 1)
2212   {
2213     jam();
2214     LocalDLFifoList<RowPage> list(m_page_pool,
2215                                   rowBuffer.m_page_list);
2216     const bool last = list.hasNext(ptr) == false;
2217     list.remove(ptr);
2218     if (list.isEmpty())
2219     {
2220       jam();
2221       /**
2222        * Don't remove last page...
2223        */
2224       list.addLast(ptr);
2225       rowBuffer.m_var.m_free = free_space;
2226     }
2227     else
2228     {
2229       jam();
2230       if (last)
2231       {
2232         jam();
2233         /**
2234          * If we were last...set m_var.m_free to free_space of newLastPtr
2235          */
2236         Ptr<RowPage> newLastPtr;
2237         ndbrequire(list.last(newLastPtr));
2238         rowBuffer.m_var.m_free = ((Var_page*)newLastPtr.p)->free_space;
2239       }
2240       releasePage(ptr);
2241     }
2242   }
2243   else if (free_space > rowBuffer.m_var.m_free)
2244   {
2245     jam();
2246     LocalDLFifoList<RowPage> list(m_page_pool,
2247                                   rowBuffer.m_page_list);
2248     list.remove(ptr);
2249     list.addLast(ptr);
2250     rowBuffer.m_var.m_free = free_space;
2251   }
2252 }
2253 
2254 void
releaseRequestBuffers(Ptr<Request> requestPtr)2255 Dbspj::releaseRequestBuffers(Ptr<Request> requestPtr)
2256 {
2257   DEBUG("releaseRequestBuffers"
2258      << ", request: " << requestPtr.i
2259   );
2260   /**
2261    * Release all pages for request
2262    */
2263   {
2264     {
2265       LocalSLList<RowPage> freelist(m_page_pool, m_free_page_list);
2266       freelist.prependList(requestPtr.p->m_rowBuffer.m_page_list);
2267     }
2268     requestPtr.p->m_rowBuffer.reset();
2269   }
2270 }
2271 
2272 /**
2273  * Handle that batch for this 'TreeNode' is complete.
2274  */
2275 void
handleTreeNodeComplete(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)2276 Dbspj::handleTreeNodeComplete(Signal * signal, Ptr<Request> requestPtr,
2277                                Ptr<TreeNode> treeNodePtr)
2278 {
2279   if ((requestPtr.p->m_state & Request::RS_ABORTING) == 0)
2280   {
2281     jam();
2282     ndbassert(!requestPtr.p->m_completed_nodes.get(treeNodePtr.p->m_node_no));
2283     requestPtr.p->m_completed_nodes.set(treeNodePtr.p->m_node_no);
2284 
2285     /**
2286      * If all ancestors are complete, this has to be reported
2287      * as we might be waiting for this condition to start more
2288      * operations.
2289      */
2290     if (requestPtr.p->m_completed_nodes.contains(treeNodePtr.p->m_ancestors))
2291     {
2292       jam();
2293       reportAncestorsComplete(signal, requestPtr, treeNodePtr);
2294     }
2295   }
2296 }
2297 
2298 /**
2299  * Notify any children of this 'TreeNode' that all ancestor
2300  * TreeNodes has completed their batch.
2301  */
2302 void
reportAncestorsComplete(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)2303 Dbspj::reportAncestorsComplete(Signal * signal, Ptr<Request> requestPtr,
2304                                Ptr<TreeNode> treeNodePtr)
2305 {
2306   if (treeNodePtr.p->m_bits & TreeNode::T_REPORT_BATCH_COMPLETE)
2307   {
2308     jam();
2309     LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
2310     Local_dependency_map list(pool, treeNodePtr.p->m_dependent_nodes);
2311     Dependency_map::ConstDataBufferIterator it;
2312 
2313     for (list.first(it); !it.isNull(); list.next(it))
2314     {
2315       jam();
2316       Ptr<TreeNode> childPtr;
2317       m_treenode_pool.getPtr(childPtr, * it.data);
2318 
2319       if (requestPtr.p->m_completed_nodes.contains(childPtr.p->m_ancestors) &&
2320           childPtr.p->m_deferred.isEmpty())
2321       {
2322         jam();
2323 
2324         /**
2325          * Does any child need to know about when *my* batch is complete
2326          */
2327         if (childPtr.p->m_bits & TreeNode::T_NEED_REPORT_BATCH_COMPLETED)
2328         {
2329           jam();
2330           ndbrequire(childPtr.p->m_info != 0 &&
2331                      childPtr.p->m_info->m_parent_batch_complete !=0 );
2332           (this->*(childPtr.p->m_info->m_parent_batch_complete))(signal,
2333                                                                  requestPtr,
2334                                                                  childPtr);
2335         }
2336         reportAncestorsComplete(signal, requestPtr, childPtr);
2337       }
2338     }
2339   }
2340 }
2341 
2342 void
abort(Signal * signal,Ptr<Request> requestPtr,Uint32 errCode)2343 Dbspj::abort(Signal* signal, Ptr<Request> requestPtr, Uint32 errCode)
2344 {
2345   jam();
2346 
2347   /**
2348    * Need to handle online upgrade as the protocoll for
2349    * signaling errors for Lookup-request changed in 7.2.5.
2350    * If API-version is <= 7.2.4 we increase the severity
2351    * of the error to a 'NodeFailure' as this is the only
2352    * errorcode for which the API will stop further
2353    * 'outstanding-counting' in pre 7.2.5.
2354    * (Starting from 7.2.5 we will stop counting for all 'hard errors')
2355    */
2356   if (requestPtr.p->isLookup() &&
2357       !ndbd_fixed_lookup_query_abort(getNodeInfo(getResultRef(requestPtr)).m_version))
2358   {
2359     jam();
2360     errCode = DbspjErr::NodeFailure;
2361   }
2362 
2363   if ((requestPtr.p->m_state & Request::RS_ABORTING) != 0)
2364   {
2365     jam();
2366     goto checkcomplete;
2367   }
2368 
2369   requestPtr.p->m_state |= Request::RS_ABORTING;
2370   requestPtr.p->m_errCode = errCode;
2371 
2372   {
2373     Ptr<TreeNode> nodePtr;
2374     Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
2375     for (list.first(nodePtr); !nodePtr.isNull(); list.next(nodePtr))
2376     {
2377       jam();
2378       ndbrequire(nodePtr.p->m_info != 0);
2379       if (nodePtr.p->m_info->m_abort != 0)
2380       {
2381         jam();
2382         (this->*(nodePtr.p->m_info->m_abort))(signal, requestPtr, nodePtr);
2383       }
2384     }
2385   }
2386 
2387 checkcomplete:
2388   checkBatchComplete(signal, requestPtr, 0);
2389 }
2390 
2391 Uint32
nodeFail(Signal * signal,Ptr<Request> requestPtr,NdbNodeBitmask nodes)2392 Dbspj::nodeFail(Signal* signal, Ptr<Request> requestPtr,
2393                 NdbNodeBitmask nodes)
2394 {
2395   Uint32 cnt = 0;
2396   Uint32 iter = 0;
2397 
2398   {
2399     Ptr<TreeNode> nodePtr;
2400     Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
2401     for (list.first(nodePtr); !nodePtr.isNull(); list.next(nodePtr))
2402     {
2403       jam();
2404       ndbrequire(nodePtr.p->m_info != 0);
2405       if (nodePtr.p->m_info->m_execNODE_FAILREP != 0)
2406       {
2407         jam();
2408         iter ++;
2409         cnt += (this->*(nodePtr.p->m_info->m_execNODE_FAILREP))(signal,
2410                                                                 requestPtr,
2411                                                                 nodePtr, nodes);
2412       }
2413     }
2414   }
2415 
2416   if (cnt == 0)
2417   {
2418     jam();
2419     /**
2420      * None of the operations needed NodeFailRep "action"
2421      *   check if our TC has died...but...only needed in
2422      *   scan case...for lookup...not so...
2423      */
2424     if (requestPtr.p->isScan() &&
2425         nodes.get(refToNode(requestPtr.p->m_senderRef)))
2426     {
2427       jam();
2428       abort(signal, requestPtr, DbspjErr::NodeFailure);
2429     }
2430   }
2431   else
2432   {
2433     jam();
2434     abort(signal, requestPtr, DbspjErr::NodeFailure);
2435   }
2436 
2437   return cnt + iter;
2438 }
2439 
2440 void
complete(Signal * signal,Ptr<Request> requestPtr)2441 Dbspj::complete(Signal* signal, Ptr<Request> requestPtr)
2442 {
2443   /**
2444    * we need to run complete-phase before sending last SCAN_FRAGCONF
2445    */
2446   Uint32 flags = requestPtr.p->m_state &
2447     (Request::RS_ABORTING | Request::RS_WAITING);
2448 
2449   requestPtr.p->m_state = Request::RS_COMPLETING | flags;
2450 
2451   // clear bit so that next batchComplete()
2452   // will continue to cleanup
2453   ndbassert((requestPtr.p->m_bits & Request::RT_NEED_COMPLETE) != 0);
2454   requestPtr.p->m_bits &= ~(Uint32)Request::RT_NEED_COMPLETE;
2455   requestPtr.p->m_outstanding = 0;
2456   {
2457     Ptr<TreeNode> nodePtr;
2458     Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
2459     for (list.first(nodePtr); !nodePtr.isNull(); list.next(nodePtr))
2460     {
2461       jam();
2462       ndbrequire(nodePtr.p->m_info != 0);
2463       if (nodePtr.p->m_info->m_complete != 0)
2464       {
2465         jam();
2466         (this->*(nodePtr.p->m_info->m_complete))(signal, requestPtr, nodePtr);
2467       }
2468     }
2469 
2470     /**
2471      * preferably RT_NEED_COMPLETE should only be set if blocking
2472      * calls are used, in which case m_outstanding should have been increased
2473      *
2474      * BUT: scanIndex does DIH_SCAN_TAB_COMPLETE_REP which does not send reply
2475      *      so it not really "blocking"
2476      *      i.e remove assert
2477      */
2478     //ndbassert(requestPtr.p->m_outstanding);
2479   }
2480   checkBatchComplete(signal, requestPtr, 0);
2481 }
2482 
2483 void
cleanup(Ptr<Request> requestPtr)2484 Dbspj::cleanup(Ptr<Request> requestPtr)
2485 {
2486   ndbrequire(requestPtr.p->m_cnt_active == 0);
2487   {
2488     Ptr<TreeNode> nodePtr;
2489     Local_TreeNode_list list(m_treenode_pool, requestPtr.p->m_nodes);
2490     while (list.removeFirst(nodePtr))
2491     {
2492       jam();
2493       ndbrequire(nodePtr.p->m_info != 0 && nodePtr.p->m_info->m_cleanup != 0);
2494       (this->*(nodePtr.p->m_info->m_cleanup))(requestPtr, nodePtr);
2495 
2496       m_treenode_pool.release(nodePtr);
2497     }
2498   }
2499   if (requestPtr.p->isScan())
2500   {
2501     jam();
2502 
2503     if (unlikely((requestPtr.p->m_state & Request::RS_WAITING) != 0))
2504     {
2505       jam();
2506       requestPtr.p->m_state = Request::RS_ABORTED;
2507       return;
2508     }
2509     m_scan_request_hash.remove(requestPtr, *requestPtr.p);
2510   }
2511   else
2512   {
2513     jam();
2514     m_lookup_request_hash.remove(requestPtr, *requestPtr.p);
2515   }
2516   releaseRequestBuffers(requestPtr);
2517   ArenaHead ah = requestPtr.p->m_arena;
2518   m_request_pool.release(requestPtr);
2519   m_arenaAllocator.release(ah);
2520 }
2521 
2522 void
cleanup_common(Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)2523 Dbspj::cleanup_common(Ptr<Request> requestPtr, Ptr<TreeNode> treeNodePtr)
2524 {
2525   jam();
2526 
2527   LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
2528   {
2529     Local_dependency_map list(pool, treeNodePtr.p->m_dependent_nodes);
2530     list.release();
2531   }
2532 
2533   {
2534     Local_pattern_store pattern(pool, treeNodePtr.p->m_keyPattern);
2535     pattern.release();
2536   }
2537 
2538   {
2539     Local_pattern_store pattern(pool, treeNodePtr.p->m_attrParamPattern);
2540     pattern.release();
2541   }
2542 
2543   {
2544     Local_correlation_list correlations(pool, treeNodePtr.p->m_deferred.m_correlations);
2545     correlations.release();
2546   }
2547 
2548   if (treeNodePtr.p->m_send.m_keyInfoPtrI != RNIL)
2549   {
2550     jam();
2551     releaseSection(treeNodePtr.p->m_send.m_keyInfoPtrI);
2552   }
2553 
2554   if (treeNodePtr.p->m_send.m_attrInfoPtrI != RNIL)
2555   {
2556     jam();
2557     releaseSection(treeNodePtr.p->m_send.m_attrInfoPtrI);
2558   }
2559 }
2560 
2561 /**
2562  * Processing of signals from LQH
2563  */
2564 void
execLQHKEYREF(Signal * signal)2565 Dbspj::execLQHKEYREF(Signal* signal)
2566 {
2567   jamEntry();
2568 
2569   const LqhKeyRef* ref = reinterpret_cast<const LqhKeyRef*>(signal->getDataPtr());
2570 
2571   Ptr<TreeNode> treeNodePtr;
2572   m_treenode_pool.getPtr(treeNodePtr, ref->connectPtr);
2573 
2574   Ptr<Request> requestPtr;
2575   m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
2576   ndbassert(!requestPtr.p->m_completed_nodes.get(treeNodePtr.p->m_node_no));
2577 
2578   DEBUG("execLQHKEYREF"
2579      << ", node: " << treeNodePtr.p->m_node_no
2580      << ", request: " << requestPtr.i
2581      << ", errorCode: " << ref->errorCode
2582   );
2583 
2584   ndbrequire(treeNodePtr.p->m_info && treeNodePtr.p->m_info->m_execLQHKEYREF);
2585   (this->*(treeNodePtr.p->m_info->m_execLQHKEYREF))(signal,
2586                                                     requestPtr,
2587                                                     treeNodePtr);
2588 }
2589 
2590 void
execLQHKEYCONF(Signal * signal)2591 Dbspj::execLQHKEYCONF(Signal* signal)
2592 {
2593   jamEntry();
2594 
2595   const LqhKeyConf* conf = reinterpret_cast<const LqhKeyConf*>(signal->getDataPtr());
2596   Ptr<TreeNode> treeNodePtr;
2597   m_treenode_pool.getPtr(treeNodePtr, conf->opPtr);
2598 
2599   Ptr<Request> requestPtr;
2600   m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
2601   ndbassert(!requestPtr.p->m_completed_nodes.get(treeNodePtr.p->m_node_no));
2602 
2603   DEBUG("execLQHKEYCONF"
2604      << ", node: " << treeNodePtr.p->m_node_no
2605      << ", request: " << requestPtr.i
2606   );
2607 
2608   ndbrequire(treeNodePtr.p->m_info && treeNodePtr.p->m_info->m_execLQHKEYCONF);
2609   (this->*(treeNodePtr.p->m_info->m_execLQHKEYCONF))(signal,
2610                                                      requestPtr,
2611                                                      treeNodePtr);
2612 }
2613 
2614 void
execSCAN_FRAGREF(Signal * signal)2615 Dbspj::execSCAN_FRAGREF(Signal* signal)
2616 {
2617   jamEntry();
2618   const ScanFragRef* ref = reinterpret_cast<const ScanFragRef*>(signal->getDataPtr());
2619 
2620   Ptr<ScanFragHandle> scanFragHandlePtr;
2621   m_scanfraghandle_pool.getPtr(scanFragHandlePtr, ref->senderData);
2622   Ptr<TreeNode> treeNodePtr;
2623   m_treenode_pool.getPtr(treeNodePtr, scanFragHandlePtr.p->m_treeNodePtrI);
2624   Ptr<Request> requestPtr;
2625   m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
2626   ndbassert(!requestPtr.p->m_completed_nodes.get(treeNodePtr.p->m_node_no));
2627 
2628   DEBUG("execSCAN_FRAGREF"
2629      << ", node: " << treeNodePtr.p->m_node_no
2630      << ", request: " << requestPtr.i
2631      << ", errorCode: " << ref->errorCode
2632   );
2633 
2634   ndbrequire(treeNodePtr.p->m_info&&treeNodePtr.p->m_info->m_execSCAN_FRAGREF);
2635   (this->*(treeNodePtr.p->m_info->m_execSCAN_FRAGREF))(signal,
2636                                                        requestPtr,
2637                                                        treeNodePtr,
2638                                                        scanFragHandlePtr);
2639 }
2640 
2641 void
execSCAN_HBREP(Signal * signal)2642 Dbspj::execSCAN_HBREP(Signal* signal)
2643 {
2644   jamEntry();
2645 
2646   Uint32 senderData = signal->theData[0];
2647   //Uint32 transId[2] = { signal->theData[1], signal->theData[2] };
2648 
2649   Ptr<ScanFragHandle> scanFragHandlePtr;
2650   m_scanfraghandle_pool.getPtr(scanFragHandlePtr, senderData);
2651   Ptr<TreeNode> treeNodePtr;
2652   m_treenode_pool.getPtr(treeNodePtr, scanFragHandlePtr.p->m_treeNodePtrI);
2653   Ptr<Request> requestPtr;
2654   m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
2655   DEBUG("execSCAN_HBREP"
2656      << ", node: " << treeNodePtr.p->m_node_no
2657      << ", request: " << requestPtr.i
2658   );
2659 
2660   Uint32 ref = requestPtr.p->m_senderRef;
2661   signal->theData[0] = requestPtr.p->m_senderData;
2662   sendSignal(ref, GSN_SCAN_HBREP, signal, 3, JBB);
2663 }
2664 
2665 void
execSCAN_FRAGCONF(Signal * signal)2666 Dbspj::execSCAN_FRAGCONF(Signal* signal)
2667 {
2668   jamEntry();
2669 
2670   const ScanFragConf* conf = reinterpret_cast<const ScanFragConf*>(signal->getDataPtr());
2671 
2672 #ifdef DEBUG_SCAN_FRAGREQ
2673   ndbout_c("Dbspj::execSCAN_FRAGCONF() receiveing SCAN_FRAGCONF ");
2674   printSCAN_FRAGCONF(stdout, signal->getDataPtrSend(),
2675                      conf->total_len,
2676                      DBLQH);
2677 #endif
2678 
2679   Ptr<ScanFragHandle> scanFragHandlePtr;
2680   m_scanfraghandle_pool.getPtr(scanFragHandlePtr, conf->senderData);
2681   Ptr<TreeNode> treeNodePtr;
2682   m_treenode_pool.getPtr(treeNodePtr, scanFragHandlePtr.p->m_treeNodePtrI);
2683   Ptr<Request> requestPtr;
2684   m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
2685   ndbassert(!requestPtr.p->m_completed_nodes.get(treeNodePtr.p->m_node_no) ||
2686             requestPtr.p->m_state & Request::RS_ABORTING);
2687 
2688   DEBUG("execSCAN_FRAGCONF"
2689      << ", node: " << treeNodePtr.p->m_node_no
2690      << ", request: " << requestPtr.i
2691   );
2692 
2693   ndbrequire(treeNodePtr.p->m_info&&treeNodePtr.p->m_info->m_execSCAN_FRAGCONF);
2694   (this->*(treeNodePtr.p->m_info->m_execSCAN_FRAGCONF))(signal,
2695                                                         requestPtr,
2696                                                         treeNodePtr,
2697                                                         scanFragHandlePtr);
2698 }
2699 
2700 void
execSCAN_NEXTREQ(Signal * signal)2701 Dbspj::execSCAN_NEXTREQ(Signal* signal)
2702 {
2703   jamEntry();
2704   const ScanFragNextReq * req = (ScanFragNextReq*)&signal->theData[0];
2705 
2706 #ifdef DEBUG_SCAN_FRAGREQ
2707   DEBUG("Incomming SCAN_NEXTREQ");
2708   printSCANFRAGNEXTREQ(stdout, &signal->theData[0],
2709                        ScanFragNextReq::SignalLength, DBLQH);
2710 #endif
2711 
2712   Request key;
2713   key.m_transId[0] = req->transId1;
2714   key.m_transId[1] = req->transId2;
2715   key.m_senderData = req->senderData;
2716 
2717   Ptr<Request> requestPtr;
2718   if (unlikely(!m_scan_request_hash.find(requestPtr, key)))
2719   {
2720     jam();
2721     ndbrequire(ScanFragNextReq::getCloseFlag(req->requestInfo));
2722     return;
2723   }
2724   DEBUG("execSCAN_NEXTREQ, request: " << requestPtr.i);
2725 
2726 #ifdef SPJ_TRACE_TIME
2727   const NDB_TICKS now = NdbTick_getCurrentTicks();
2728   const NDB_TICKS then = requestPtr.p->m_save_time;
2729   const Uint64 diff = NdbTick_Elapsed(then,now).microSec();
2730   requestPtr.p->m_sum_waiting += Uint32(diff);
2731   requestPtr.p->m_save_time = now;
2732 #endif
2733 
2734   Uint32 state = requestPtr.p->m_state;
2735   requestPtr.p->m_state = state & ~Uint32(Request::RS_WAITING);
2736 
2737   if (unlikely(state == Request::RS_ABORTED))
2738   {
2739     jam();
2740     batchComplete(signal, requestPtr);
2741     return;
2742   }
2743 
2744   if (unlikely((state & Request::RS_ABORTING) != 0))
2745   {
2746     jam();
2747     /**
2748      * abort is already in progress...
2749      *   since RS_WAITING is cleared...it will end this request
2750      */
2751     return;
2752   }
2753 
2754   if (ScanFragNextReq::getCloseFlag(req->requestInfo)) // Requested close scan
2755   {
2756     jam();
2757     abort(signal, requestPtr, 0);
2758     return;
2759   }
2760 
2761   ndbrequire((state & Request::RS_WAITING) != 0);
2762   ndbrequire(requestPtr.p->m_outstanding == 0);
2763 
2764   {
2765     /**
2766      * Scroll all relevant cursors...
2767      */
2768     Ptr<TreeNode> treeNodePtr;
2769     Local_TreeNodeCursor_list list(m_treenode_pool,
2770                                    requestPtr.p->m_cursor_nodes);
2771     Uint32 cnt_active = 0;
2772 
2773     for (list.first(treeNodePtr); !treeNodePtr.isNull(); list.next(treeNodePtr))
2774     {
2775       if (treeNodePtr.p->m_state == TreeNode::TN_ACTIVE)
2776       {
2777         jam();
2778         DEBUG("SCAN_NEXTREQ on TreeNode: "
2779            << ", m_node_no: " << treeNodePtr.p->m_node_no
2780            << ", w/ m_parentPtrI: " << treeNodePtr.p->m_parentPtrI);
2781 
2782         ndbrequire(treeNodePtr.p->m_info != 0 &&
2783                    treeNodePtr.p->m_info->m_execSCAN_NEXTREQ != 0);
2784         (this->*(treeNodePtr.p->m_info->m_execSCAN_NEXTREQ))(signal,
2785                                                              requestPtr,
2786                                                              treeNodePtr);
2787         cnt_active++;
2788       }
2789       else
2790       {
2791         /**
2792          * Restart any other scans not being 'TN_ACTIVE'
2793          * (Only effective if 'RT_REPEAT_SCAN_RESULT')
2794          */
2795         jam();
2796         ndbrequire(requestPtr.p->m_bits & Request::RT_REPEAT_SCAN_RESULT);
2797         DEBUG("Restart TreeNode "
2798            << ", m_node_no: " << treeNodePtr.p->m_node_no
2799            << ", w/ m_parentPtrI: " << treeNodePtr.p->m_parentPtrI);
2800 
2801         ndbrequire(treeNodePtr.p->m_info != 0 &&
2802                    treeNodePtr.p->m_info->m_parent_batch_complete !=0 );
2803         (this->*(treeNodePtr.p->m_info->m_parent_batch_complete))(signal,
2804                                                                   requestPtr,
2805                                                                   treeNodePtr);
2806       }
2807       if (unlikely((requestPtr.p->m_state & Request::RS_ABORTING) != 0))
2808       {
2809         jam();
2810         break;
2811       }
2812     }// for all treeNodes in 'm_cursor_nodes'
2813 
2814     /* Expected only a single ACTIVE TreeNode among the cursors */
2815     ndbrequire(cnt_active == 1 ||
2816                !(requestPtr.p->m_bits & Request::RT_REPEAT_SCAN_RESULT));
2817   }
2818 }
2819 
2820 void
execTRANSID_AI(Signal * signal)2821 Dbspj::execTRANSID_AI(Signal* signal)
2822 {
2823   jamEntry();
2824   TransIdAI * req = (TransIdAI *)signal->getDataPtr();
2825   Uint32 ptrI = req->connectPtr;
2826   //Uint32 transId[2] = { req->transId[0], req->transId[1] };
2827 
2828   Ptr<TreeNode> treeNodePtr;
2829   m_treenode_pool.getPtr(treeNodePtr, ptrI);
2830   Ptr<Request> requestPtr;
2831   m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
2832   ndbassert(!requestPtr.p->m_completed_nodes.get(treeNodePtr.p->m_node_no));
2833 
2834   DEBUG("execTRANSID_AI"
2835      << ", node: " << treeNodePtr.p->m_node_no
2836      << ", request: " << requestPtr.i
2837   );
2838 
2839   ndbrequire(signal->getNoOfSections() != 0);
2840 
2841   SegmentedSectionPtr dataPtr;
2842   {
2843     SectionHandle handle(this, signal);
2844     handle.getSection(dataPtr, 0);
2845     handle.clear();
2846   }
2847 
2848 #if defined(DEBUG_LQHKEYREQ) || defined(DEBUG_SCAN_FRAGREQ)
2849   printf("execTRANSID_AI: ");
2850   print(dataPtr, stdout);
2851 #endif
2852 
2853   /**
2854    * build easy-access-array for row
2855    */
2856   Uint32 tmp[2+MAX_ATTRIBUTES_IN_TABLE];
2857   RowPtr::Header* header = CAST_PTR(RowPtr::Header, &tmp[0]);
2858 
2859   Uint32 cnt = buildRowHeader(header, dataPtr);
2860   ndbassert(header->m_len < NDB_ARRAY_SIZE(tmp));
2861 
2862   struct RowPtr row;
2863   row.m_type = RowPtr::RT_SECTION;
2864   row.m_src_node_ptrI = treeNodePtr.i;
2865   row.m_row_data.m_section.m_header = header;
2866   row.m_row_data.m_section.m_dataPtr.assign(dataPtr);
2867 
2868   getCorrelationData(row.m_row_data.m_section,
2869                      cnt - 1,
2870                      row.m_src_correlation);
2871 
2872   if (treeNodePtr.p->m_bits & TreeNode::T_ROW_BUFFER)
2873   {
2874     jam();
2875     Uint32 err;
2876 
2877     DEBUG("Need to storeRow"
2878       << ", node: " << treeNodePtr.p->m_node_no
2879     );
2880 
2881     if (ERROR_INSERTED(17120) ||
2882        (ERROR_INSERTED(17121) && treeNodePtr.p->m_parentPtrI != RNIL))
2883     {
2884       jam();
2885       CLEAR_ERROR_INSERT_VALUE;
2886       abort(signal, requestPtr, DbspjErr::OutOfRowMemory);
2887     }
2888     else if ((err = storeRow(treeNodePtr.p->m_rows, row)) != 0)
2889     {
2890       jam();
2891       abort(signal, requestPtr, err);
2892     }
2893   }
2894 
2895   ndbrequire(treeNodePtr.p->m_info&&treeNodePtr.p->m_info->m_execTRANSID_AI);
2896 
2897   (this->*(treeNodePtr.p->m_info->m_execTRANSID_AI))(signal,
2898                                                      requestPtr,
2899                                                      treeNodePtr,
2900                                                      row);
2901   release(dataPtr);
2902 }
2903 
2904 Uint32
storeRow(RowCollection & collection,RowPtr & row)2905 Dbspj::storeRow(RowCollection& collection, RowPtr &row)
2906 {
2907   ndbassert(row.m_type == RowPtr::RT_SECTION);
2908   SegmentedSectionPtr dataPtr = row.m_row_data.m_section.m_dataPtr;
2909   Uint32 * headptr = (Uint32*)row.m_row_data.m_section.m_header;
2910   Uint32 headlen = 1 + row.m_row_data.m_section.m_header->m_len;
2911 
2912   /**
2913    * Rows might be stored at an offset within the collection.
2914    */
2915   const Uint32 offset = collection.rowOffset();
2916 
2917   Uint32 totlen = 0;
2918   totlen += dataPtr.sz;
2919   totlen += headlen;
2920   totlen += offset;
2921 
2922   RowRef ref;
2923   Uint32* const dstptr = rowAlloc(*collection.m_base.m_rowBuffer, ref, totlen);
2924   if (unlikely(dstptr == 0))
2925   {
2926     jam();
2927     return DbspjErr::OutOfRowMemory;
2928   }
2929   memcpy(dstptr + offset, headptr, 4 * headlen);
2930   copy(dstptr + offset + headlen, dataPtr);
2931 
2932   if (collection.m_type == RowCollection::COLLECTION_LIST)
2933   {
2934     jam();
2935     NullRowRef.copyto_link(dstptr); // Null terminate list...
2936     add_to_list(collection.m_list, ref);
2937   }
2938   else
2939   {
2940     jam();
2941     Uint32 error = add_to_map(collection.m_map, row.m_src_correlation, ref);
2942     if (unlikely(error))
2943       return error;
2944   }
2945 
2946   /**
2947    * Refetch pointer to alloc'ed row memory  before creating RowPtr
2948    * as above add_to_xxx may mave reorganized memory causing
2949    * alloced row to be moved.
2950    */
2951   const Uint32* const rowptr = get_row_ptr(ref);
2952   setupRowPtr(collection, row, ref, rowptr);
2953   return 0;
2954 }
2955 
2956 void
setupRowPtr(const RowCollection & collection,RowPtr & row,RowRef ref,const Uint32 * src)2957 Dbspj::setupRowPtr(const RowCollection& collection,
2958                    RowPtr& row, RowRef ref, const Uint32 * src)
2959 {
2960   const Uint32 offset = collection.rowOffset();
2961   const RowPtr::Header * headptr = (RowPtr::Header*)(src + offset);
2962   Uint32 headlen = 1 + headptr->m_len;
2963 
2964   row.m_type = RowPtr::RT_LINEAR;
2965   row.m_row_data.m_linear.m_row_ref = ref;
2966   row.m_row_data.m_linear.m_header = headptr;
2967   row.m_row_data.m_linear.m_data = (Uint32*)headptr + headlen;
2968 }
2969 
2970 void
add_to_list(SLFifoRowList & list,RowRef rowref)2971 Dbspj::add_to_list(SLFifoRowList & list, RowRef rowref)
2972 {
2973   if (list.isNull())
2974   {
2975     jam();
2976     list.m_first_row_page_id = rowref.m_page_id;
2977     list.m_first_row_page_pos = rowref.m_page_pos;
2978   }
2979   else
2980   {
2981     jam();
2982     /**
2983      * add last to list
2984      */
2985     RowRef last;
2986     last.m_alloc_type = rowref.m_alloc_type;
2987     last.m_page_id = list.m_last_row_page_id;
2988     last.m_page_pos = list.m_last_row_page_pos;
2989     Uint32 * const rowptr = get_row_ptr(last);
2990     rowref.copyto_link(rowptr);
2991   }
2992 
2993   list.m_last_row_page_id = rowref.m_page_id;
2994   list.m_last_row_page_pos = rowref.m_page_pos;
2995 }
2996 
2997 Uint32 *
get_row_ptr(RowRef pos)2998 Dbspj::get_row_ptr(RowRef pos)
2999 {
3000   Ptr<RowPage> ptr;
3001   m_page_pool.getPtr(ptr, pos.m_page_id);
3002   if (pos.m_alloc_type == BUFFER_STACK) // ::stackAlloc() memory
3003   {
3004     jam();
3005     return ptr.p->m_data + pos.m_page_pos;
3006   }
3007   else                                 // ::varAlloc() memory
3008   {
3009     jam();
3010     ndbassert(pos.m_alloc_type == BUFFER_VAR);
3011     return ((Var_page*)ptr.p)->get_ptr(pos.m_page_pos);
3012   }
3013 }
3014 
3015 inline
3016 bool
first(const SLFifoRowList & list,SLFifoRowListIterator & iter)3017 Dbspj::first(const SLFifoRowList& list,
3018              SLFifoRowListIterator& iter)
3019 {
3020   if (list.isNull())
3021   {
3022     jam();
3023     iter.setNull();
3024     return false;
3025   }
3026 
3027   //  const Buffer_type allocator = list.m_rowBuffer->m_type;
3028   iter.m_ref.m_alloc_type = list.m_rowBuffer->m_type;
3029   iter.m_ref.m_page_id = list.m_first_row_page_id;
3030   iter.m_ref.m_page_pos = list.m_first_row_page_pos;
3031   iter.m_row_ptr = get_row_ptr(iter.m_ref);
3032   return true;
3033 }
3034 
3035 inline
3036 bool
next(SLFifoRowListIterator & iter)3037 Dbspj::next(SLFifoRowListIterator& iter)
3038 {
3039   iter.m_ref.assign_from_link(iter.m_row_ptr);
3040   if (iter.m_ref.isNull())
3041   {
3042     jam();
3043     return false;
3044   }
3045   iter.m_row_ptr = get_row_ptr(iter.m_ref);
3046   return true;
3047 }
3048 
3049 Uint32
add_to_map(RowMap & map,Uint32 corrVal,RowRef rowref)3050 Dbspj::add_to_map(RowMap& map,
3051                   Uint32 corrVal, RowRef rowref)
3052 {
3053   Uint32 * mapptr;
3054   if (map.isNull())
3055   {
3056     jam();
3057     ndbassert(map.m_size > 0);
3058     ndbassert(map.m_rowBuffer != NULL);
3059 
3060     Uint32 sz16 = RowMap::MAP_SIZE_PER_REF_16 * map.m_size;
3061     Uint32 sz32 = (sz16 + 1) / 2;
3062     RowRef ref;
3063     mapptr = rowAlloc(*map.m_rowBuffer, ref, sz32);
3064     if (unlikely(mapptr == 0))
3065     {
3066       jam();
3067       return DbspjErr::OutOfRowMemory;
3068     }
3069     map.assign(ref);
3070     map.m_elements = 0;
3071     map.clear(mapptr);
3072   }
3073   else
3074   {
3075     jam();
3076     RowRef ref;
3077     map.copyto(ref);
3078     mapptr = get_row_ptr(ref);
3079   }
3080 
3081   Uint32 pos = corrVal & 0xFFFF;
3082   ndbrequire(pos < map.m_size);
3083   ndbrequire(map.m_elements < map.m_size);
3084 
3085   if (1)
3086   {
3087     /**
3088      * Check that *pos* is empty
3089      */
3090     RowRef check;
3091     map.load(mapptr, pos, check);
3092     ndbrequire(check.m_page_pos == 0xFFFF);
3093   }
3094 
3095   map.store(mapptr, pos, rowref);
3096 
3097   return 0;
3098 }
3099 
3100 inline
3101 bool
first(const RowMap & map,RowMapIterator & iter)3102 Dbspj::first(const RowMap& map,
3103              RowMapIterator & iter)
3104 {
3105   if (map.isNull())
3106   {
3107     jam();
3108     iter.setNull();
3109     return false;
3110   }
3111 
3112   iter.m_map_ptr = get_row_ptr(map.m_map_ref);
3113   iter.m_size = map.m_size;
3114   iter.m_ref.m_alloc_type = map.m_rowBuffer->m_type;
3115 
3116   Uint32 pos = 0;
3117   while (RowMap::isNull(iter.m_map_ptr, pos) && pos < iter.m_size)
3118     pos++;
3119 
3120   if (pos == iter.m_size)
3121   {
3122     jam();
3123     iter.setNull();
3124     return false;
3125   }
3126   else
3127   {
3128     jam();
3129     RowMap::load(iter.m_map_ptr, pos, iter.m_ref);
3130     iter.m_element_no = pos;
3131     iter.m_row_ptr = get_row_ptr(iter.m_ref);
3132     return true;
3133   }
3134 }
3135 
3136 inline
3137 bool
next(RowMapIterator & iter)3138 Dbspj::next(RowMapIterator & iter)
3139 {
3140   Uint32 pos = iter.m_element_no + 1;
3141   while (RowMap::isNull(iter.m_map_ptr, pos) && pos < iter.m_size)
3142     pos++;
3143 
3144   if (pos == iter.m_size)
3145   {
3146     jam();
3147     iter.setNull();
3148     return false;
3149   }
3150   else
3151   {
3152     jam();
3153     RowMap::load(iter.m_map_ptr, pos, iter.m_ref);
3154     iter.m_element_no = pos;
3155     iter.m_row_ptr = get_row_ptr(iter.m_ref);
3156     return true;
3157   }
3158 }
3159 
3160 bool
first(const RowCollection & collection,RowIterator & iter)3161 Dbspj::first(const RowCollection& collection,
3162              RowIterator& iter)
3163 {
3164   iter.m_type = collection.m_type;
3165   if (iter.m_type == RowCollection::COLLECTION_LIST)
3166   {
3167     jam();
3168     return first(collection.m_list, iter.m_list);
3169   }
3170   else
3171   {
3172     jam();
3173     ndbassert(iter.m_type == RowCollection::COLLECTION_MAP);
3174     return first(collection.m_map, iter.m_map);
3175   }
3176 }
3177 
3178 bool
next(RowIterator & iter)3179 Dbspj::next(RowIterator& iter)
3180 {
3181   if (iter.m_type == RowCollection::COLLECTION_LIST)
3182   {
3183     jam();
3184     return next(iter.m_list);
3185   }
3186   else
3187   {
3188     jam();
3189     ndbassert(iter.m_type == RowCollection::COLLECTION_MAP);
3190     return next(iter.m_map);
3191   }
3192 }
3193 
3194 inline
3195 Uint32 *
stackAlloc(RowBuffer & buffer,RowRef & dst,Uint32 sz)3196 Dbspj::stackAlloc(RowBuffer & buffer, RowRef& dst, Uint32 sz)
3197 {
3198   Ptr<RowPage> ptr;
3199   LocalDLFifoList<RowPage> list(m_page_pool, buffer.m_page_list);
3200 
3201   Uint32 pos = buffer.m_stack.m_pos;
3202   const Uint32 SIZE = RowPage::SIZE;
3203   if (list.isEmpty() || (pos + sz) > SIZE)
3204   {
3205     jam();
3206     bool ret = allocPage(ptr);
3207     if (unlikely(ret == false))
3208     {
3209       jam();
3210       return 0;
3211     }
3212 
3213     pos = 0;
3214     list.addLast(ptr);
3215   }
3216   else
3217   {
3218     list.last(ptr);
3219   }
3220 
3221   dst.m_page_id = ptr.i;
3222   dst.m_page_pos = pos;
3223   dst.m_alloc_type = BUFFER_STACK;
3224   buffer.m_stack.m_pos = pos + sz;
3225   return ptr.p->m_data + pos;
3226 }
3227 
3228 inline
3229 Uint32 *
varAlloc(RowBuffer & buffer,RowRef & dst,Uint32 sz)3230 Dbspj::varAlloc(RowBuffer & buffer, RowRef& dst, Uint32 sz)
3231 {
3232   Ptr<RowPage> ptr;
3233   LocalDLFifoList<RowPage> list(m_page_pool, buffer.m_page_list);
3234 
3235   Uint32 free_space = buffer.m_var.m_free;
3236   if (list.isEmpty() || free_space < (sz + 1))
3237   {
3238     jam();
3239     bool ret = allocPage(ptr);
3240     if (unlikely(ret == false))
3241     {
3242       jam();
3243       return 0;
3244     }
3245 
3246     list.addLast(ptr);
3247     ((Var_page*)ptr.p)->init();
3248   }
3249   else
3250   {
3251     jam();
3252     list.last(ptr);
3253   }
3254 
3255   Var_page * vp = (Var_page*)ptr.p;
3256   Uint32 pos = vp->alloc_record(sz, (Var_page*)m_buffer0, Var_page::CHAIN);
3257 
3258   dst.m_page_id = ptr.i;
3259   dst.m_page_pos = pos;
3260   dst.m_alloc_type = BUFFER_VAR;
3261   buffer.m_var.m_free = vp->free_space;
3262   return vp->get_ptr(pos);
3263 }
3264 
3265 Uint32 *
rowAlloc(RowBuffer & rowBuffer,RowRef & dst,Uint32 sz)3266 Dbspj::rowAlloc(RowBuffer& rowBuffer, RowRef& dst, Uint32 sz)
3267 {
3268   if (rowBuffer.m_type == BUFFER_STACK)
3269   {
3270     jam();
3271     return stackAlloc(rowBuffer, dst, sz);
3272   }
3273   else if (rowBuffer.m_type == BUFFER_VAR)
3274   {
3275     jam();
3276     return varAlloc(rowBuffer, dst, sz);
3277   }
3278   else
3279   {
3280     jam();
3281     ndbrequire(false);
3282     return NULL;
3283   }
3284 }
3285 
3286 bool
allocPage(Ptr<RowPage> & ptr)3287 Dbspj::allocPage(Ptr<RowPage> & ptr)
3288 {
3289   if (m_free_page_list.isEmpty())
3290   {
3291     jam();
3292     if (ERROR_INSERTED_CLEAR(17003))
3293     {
3294       jam();
3295       ndbout_c("Injecting failed '::allocPage', error 17003 at line %d file %s",
3296                __LINE__,  __FILE__);
3297       return false;
3298     }
3299     ptr.p = (RowPage*)m_ctx.m_mm.alloc_page(RT_SPJ_DATABUFFER,
3300                                             &ptr.i,
3301                                             Ndbd_mem_manager::NDB_ZONE_ANY);
3302     if (ptr.p == 0)
3303     {
3304       jam();
3305       return false;
3306     }
3307     return true;
3308   }
3309   else
3310   {
3311     jam();
3312     LocalSLList<RowPage> list(m_page_pool, m_free_page_list);
3313     bool ret = list.removeFirst(ptr);
3314     ndbrequire(ret);
3315     return ret;
3316   }
3317 }
3318 
3319 void
releasePage(Ptr<RowPage> ptr)3320 Dbspj::releasePage(Ptr<RowPage> ptr)
3321 {
3322   LocalSLList<RowPage> list(m_page_pool, m_free_page_list);
3323   list.addFirst(ptr);
3324 }
3325 
3326 void
releaseGlobal(Signal * signal)3327 Dbspj::releaseGlobal(Signal * signal)
3328 {
3329   Uint32 delay = 100;
3330   LocalSLList<RowPage> list(m_page_pool, m_free_page_list);
3331   if (list.isEmpty())
3332   {
3333     jam();
3334     delay = 300;
3335   }
3336   else
3337   {
3338     Ptr<RowPage> ptr;
3339     list.removeFirst(ptr);
3340     m_ctx.m_mm.release_page(RT_SPJ_DATABUFFER, ptr.i);
3341   }
3342 
3343   signal->theData[0] = 0;
3344   sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, delay, 1);
3345 }
3346 
3347 Uint32
checkTableError(Ptr<TreeNode> treeNodePtr) const3348 Dbspj::checkTableError(Ptr<TreeNode> treeNodePtr) const
3349 {
3350   jam();
3351   if (treeNodePtr.p->m_tableOrIndexId >= c_tabrecFilesize)
3352   {
3353     jam();
3354     ndbassert(c_tabrecFilesize > 0);
3355     return DbspjErr::NoSuchTable;
3356   }
3357 
3358   TableRecordPtr tablePtr;
3359   tablePtr.i = treeNodePtr.p->m_tableOrIndexId;
3360   ptrAss(tablePtr, m_tableRecord);
3361   Uint32 err = tablePtr.p->checkTableError(treeNodePtr.p->m_schemaVersion);
3362   if (unlikely(err))
3363   {
3364     DEBUG_DICT("Dbsp::checkTableError"
3365               << ", m_node_no: " << treeNodePtr.p->m_node_no
3366               << ", tableOrIndexId: " << treeNodePtr.p->m_tableOrIndexId
3367               << ", error: " << err);
3368   }
3369   if (ERROR_INSERTED(17520) ||
3370       (ERROR_INSERTED(17521) && (rand() % 7) == 0))
3371   {
3372     jam();
3373     CLEAR_ERROR_INSERT_VALUE;
3374     ndbout_c("::checkTableError, injecting NoSuchTable error at line %d file %s",
3375               __LINE__,  __FILE__);
3376     return DbspjErr::NoSuchTable;
3377   }
3378   return err;
3379 }
3380 
3381 /**
3382  * END - MODULE GENERIC
3383  */
3384 
3385 void
common_execTRANSID_AI(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,const RowPtr & rowRef)3386 Dbspj::common_execTRANSID_AI(Signal* signal,
3387                              Ptr<Request> requestPtr,
3388                              Ptr<TreeNode> treeNodePtr,
3389                              const RowPtr & rowRef)
3390 {
3391   jam();
3392 
3393   if (likely((requestPtr.p->m_state & Request::RS_ABORTING) == 0))
3394   {
3395     LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
3396     Local_dependency_map list(pool, treeNodePtr.p->m_dependent_nodes);
3397     Dependency_map::ConstDataBufferIterator it;
3398 
3399     /**
3400      * Activate child operations in two steps:
3401      * 1) Any child operations requiring T_EXEC_SEQUENTIAL are
3402      *    prepared for exec by appending rowRefs to the deferred
3403      *    list.
3404      * 2) Start executing non-T_EXEC_SEQUENTIAL child operations.
3405      */
3406     for (list.first(it); !it.isNull(); list.next(it))
3407     {
3408       Ptr<TreeNode> childPtr;
3409       m_treenode_pool.getPtr(childPtr, * it.data);
3410 
3411       if (childPtr.p->m_bits & TreeNode::T_EXEC_SEQUENTIAL)
3412       {
3413         jam();
3414         DEBUG("T_EXEC_SEQUENTIAL --> child exec deferred");
3415 
3416         /**
3417          * Append correlation values of deferred child operations
3418          * to a list / fifo. Upon resume, we will then be able to
3419          * relocate all parent rows for which to resume operations.
3420          */
3421         LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
3422         Local_pattern_store correlations(pool, childPtr.p->m_deferred.m_correlations);
3423         if (!correlations.append(&rowRef.m_src_correlation, 1))
3424         {
3425           jam();
3426           abort(signal, requestPtr, DbspjErr::OutOfQueryMemory);
3427           return;
3428         }
3429 
3430         // As there are pending deferred operations we are not complete
3431         requestPtr.p->m_completed_nodes.clear(childPtr.p->m_node_no);
3432       }
3433     }
3434 
3435     for (list.first(it); !it.isNull(); list.next(it))
3436     {
3437       Ptr<TreeNode> childPtr;
3438       m_treenode_pool.getPtr(childPtr, * it.data);
3439       if ((childPtr.p->m_bits & TreeNode::T_EXEC_SEQUENTIAL) == 0)
3440       {
3441         jam();
3442         ndbrequire(childPtr.p->m_info!=0 && childPtr.p->m_info->m_parent_row!=0);
3443 
3444         (this->*(childPtr.p->m_info->m_parent_row))(signal,
3445                                                     requestPtr, childPtr, rowRef);
3446 
3447         /* Recheck RS_ABORTING as child operation might have aborted */
3448         if (unlikely(requestPtr.p->m_state & Request::RS_ABORTING))
3449         {
3450           jam();
3451           return;
3452         }
3453       }
3454     }
3455   }
3456 }
3457 
3458 
3459 /**
3460  * MODULE LOOKUP
3461  */
3462 const Dbspj::OpInfo
3463 Dbspj::g_LookupOpInfo =
3464 {
3465   &Dbspj::lookup_build,
3466   0, // prepare
3467   &Dbspj::lookup_start,
3468   &Dbspj::lookup_execTRANSID_AI,
3469   &Dbspj::lookup_execLQHKEYREF,
3470   &Dbspj::lookup_execLQHKEYCONF,
3471   0, // execSCAN_FRAGREF
3472   0, // execSCAN_FRAGCONF
3473   &Dbspj::lookup_parent_row,
3474   0, // Dbspj::lookup_parent_batch_complete,
3475   0, // Dbspj::lookup_parent_batch_repeat,
3476   0, // Dbspj::lookup_parent_batch_cleanup,
3477   0, // Dbspj::lookup_execSCAN_NEXTREQ
3478   0, // Dbspj::lookup_complete
3479   &Dbspj::lookup_abort,
3480   &Dbspj::lookup_execNODE_FAILREP,
3481   &Dbspj::lookup_cleanup
3482 };
3483 
3484 Uint32
lookup_build(Build_context & ctx,Ptr<Request> requestPtr,const QueryNode * qn,const QueryNodeParameters * qp)3485 Dbspj::lookup_build(Build_context& ctx,
3486                     Ptr<Request> requestPtr,
3487                     const QueryNode* qn,
3488                     const QueryNodeParameters* qp)
3489 {
3490   Uint32 err = 0;
3491   Ptr<TreeNode> treeNodePtr;
3492   const QN_LookupNode * node = (const QN_LookupNode*)qn;
3493   const QN_LookupParameters * param = (const QN_LookupParameters*)qp;
3494   do
3495   {
3496     err = DbspjErr::InvalidTreeNodeSpecification;
3497     if (unlikely(node->len < QN_LookupNode::NodeSize))
3498     {
3499       jam();
3500       break;
3501     }
3502 
3503     err = DbspjErr::InvalidTreeParametersSpecification;
3504     DEBUG("param len: " << param->len);
3505     if (unlikely(param->len < QN_LookupParameters::NodeSize))
3506     {
3507       jam();
3508       break;
3509     }
3510 
3511     err = createNode(ctx, requestPtr, treeNodePtr);
3512     if (unlikely(err != 0))
3513     {
3514       jam();
3515       break;
3516     }
3517 
3518     treeNodePtr.p->m_tableOrIndexId = node->tableId;
3519     treeNodePtr.p->m_primaryTableId = node->tableId;
3520     treeNodePtr.p->m_schemaVersion = node->tableVersion;
3521     treeNodePtr.p->m_info = &g_LookupOpInfo;
3522     Uint32 transId1 = requestPtr.p->m_transId[0];
3523     Uint32 transId2 = requestPtr.p->m_transId[1];
3524     Uint32 savePointId = ctx.m_savepointId;
3525 
3526     Uint32 treeBits = node->requestInfo;
3527     Uint32 paramBits = param->requestInfo;
3528     //ndbout_c("Dbspj::lookup_build() treeBits=%.8x paramBits=%.8x",
3529     //         treeBits, paramBits);
3530     LqhKeyReq* dst = (LqhKeyReq*)treeNodePtr.p->m_lookup_data.m_lqhKeyReq;
3531     {
3532       /**
3533        * static variables
3534        */
3535       dst->tcBlockref = reference();
3536       dst->clientConnectPtr = treeNodePtr.i;
3537 
3538       /**
3539        * TODO reference()+treeNodePtr.i is passed twice
3540        *   this can likely be optimized using the requestInfo-bits
3541        * UPDATE: This can be accomplished by *not* setApplicationAddressFlag
3542        *         and patch LQH to then instead use tcBlockref/clientConnectPtr
3543        */
3544       dst->transId1 = transId1;
3545       dst->transId2 = transId2;
3546       dst->savePointId = savePointId;
3547       dst->scanInfo = 0;
3548       dst->attrLen = 0;
3549       /** Initialy set reply ref to client, do_send will set SPJ refs if non-LEAF */
3550       dst->variableData[0] = ctx.m_resultRef;
3551       dst->variableData[1] = param->resultData;
3552       Uint32 requestInfo = 0;
3553       LqhKeyReq::setOperation(requestInfo, ZREAD);
3554       LqhKeyReq::setApplicationAddressFlag(requestInfo, 1);
3555       LqhKeyReq::setDirtyFlag(requestInfo, 1);
3556       LqhKeyReq::setSimpleFlag(requestInfo, 1);
3557       LqhKeyReq::setNormalProtocolFlag(requestInfo, 0);  // Assume T_LEAF
3558       LqhKeyReq::setCorrFactorFlag(requestInfo, 1);
3559       LqhKeyReq::setNoDiskFlag(requestInfo,
3560                                (treeBits & DABits::NI_LINKED_DISK) == 0 &&
3561                                (paramBits & DABits::PI_DISK_ATTR) == 0);
3562       dst->requestInfo = requestInfo;
3563     }
3564 
3565     if (treeBits & QN_LookupNode::L_UNIQUE_INDEX)
3566     {
3567       jam();
3568       treeNodePtr.p->m_bits |= TreeNode::T_UNIQUE_INDEX_LOOKUP;
3569     }
3570 
3571     Uint32 tableId = node->tableId;
3572     Uint32 schemaVersion = node->tableVersion;
3573 
3574     Uint32 tableSchemaVersion = tableId + ((schemaVersion << 16) & 0xFFFF0000);
3575     dst->tableSchemaVersion = tableSchemaVersion;
3576 
3577     ctx.m_resultData = param->resultData;
3578     treeNodePtr.p->m_lookup_data.m_api_resultRef = ctx.m_resultRef;
3579     treeNodePtr.p->m_lookup_data.m_api_resultData = param->resultData;
3580     treeNodePtr.p->m_lookup_data.m_outstanding = 0;
3581 
3582     /**
3583      * Parse stuff common lookup/scan-frag
3584      */
3585     struct DABuffer nodeDA, paramDA;
3586     nodeDA.ptr = node->optional;
3587     nodeDA.end = nodeDA.ptr + (node->len - QN_LookupNode::NodeSize);
3588     paramDA.ptr = param->optional;
3589     paramDA.end = paramDA.ptr + (param->len - QN_LookupParameters::NodeSize);
3590     err = parseDA(ctx, requestPtr, treeNodePtr,
3591                   nodeDA, treeBits, paramDA, paramBits);
3592     if (unlikely(err != 0))
3593     {
3594       jam();
3595       break;
3596     }
3597 
3598     if (treeNodePtr.p->m_bits & TreeNode::T_ATTR_INTERPRETED)
3599     {
3600       jam();
3601       LqhKeyReq::setInterpretedFlag(dst->requestInfo, 1);
3602     }
3603 
3604     /**
3605      * Inherit batch size from parent
3606      */
3607     treeNodePtr.p->m_batch_size = 1;
3608     if (treeNodePtr.p->m_parentPtrI != RNIL)
3609     {
3610       jam();
3611       Ptr<TreeNode> parentPtr;
3612       m_treenode_pool.getPtr(parentPtr, treeNodePtr.p->m_parentPtrI);
3613       treeNodePtr.p->m_batch_size = parentPtr.p->m_batch_size;
3614     }
3615 
3616     if (ctx.m_start_signal)
3617     {
3618       jam();
3619       Signal * signal = ctx.m_start_signal;
3620       const LqhKeyReq* src = (const LqhKeyReq*)signal->getDataPtr();
3621 #if NOT_YET
3622       Uint32 instanceNo =
3623         blockToInstance(signal->header.theReceiversBlockNumber);
3624       treeNodePtr.p->m_send.m_ref = numberToRef(DBLQH,
3625                                                 instanceNo, getOwnNodeId());
3626 #else
3627       treeNodePtr.p->m_send.m_ref =
3628         numberToRef(DBLQH, getInstanceKey(src->tableSchemaVersion & 0xFFFF,
3629                                           src->fragmentData & 0xFFFF),
3630                     getOwnNodeId());
3631 #endif
3632 
3633       Uint32 hashValue = src->hashValue;
3634       Uint32 fragId = src->fragmentData;
3635       Uint32 attrLen = src->attrLen; // fragdist-key is in here
3636 
3637       /**
3638        * assertions
3639        */
3640 #ifdef VM_TRACE
3641       Uint32 requestInfo = src->requestInfo;
3642       ndbassert(LqhKeyReq::getAttrLen(attrLen) == 0);         // Only long
3643       ndbassert(LqhKeyReq::getScanTakeOverFlag(attrLen) == 0);// Not supported
3644       ndbassert(LqhKeyReq::getReorgFlag(attrLen) == ScanFragReq::REORG_ALL);       // Not supported
3645       ndbassert(LqhKeyReq::getOperation(requestInfo) == ZREAD);
3646       ndbassert(LqhKeyReq::getKeyLen(requestInfo) == 0);      // Only long
3647       ndbassert(LqhKeyReq::getMarkerFlag(requestInfo) == 0);  // Only read
3648       ndbassert(LqhKeyReq::getAIInLqhKeyReq(requestInfo) == 0);
3649       ndbassert(LqhKeyReq::getSeqNoReplica(requestInfo) == 0);
3650       ndbassert(LqhKeyReq::getLastReplicaNo(requestInfo) == 0);
3651       ndbassert(LqhKeyReq::getApplicationAddressFlag(requestInfo) != 0);
3652       ndbassert(LqhKeyReq::getSameClientAndTcFlag(requestInfo) == 0);
3653 #endif
3654 
3655 #if TODO
3656       /**
3657        * Handle various lock-modes
3658        */
3659       static Uint8 getDirtyFlag(const UintR & requestInfo);
3660       static Uint8 getSimpleFlag(const UintR & requestInfo);
3661 #endif
3662 
3663 #ifdef VM_TRACE
3664       Uint32 dst_requestInfo = dst->requestInfo;
3665       ndbassert(LqhKeyReq::getInterpretedFlag(requestInfo) ==
3666                 LqhKeyReq::getInterpretedFlag(dst_requestInfo));
3667       ndbassert(LqhKeyReq::getNoDiskFlag(requestInfo) ==
3668                 LqhKeyReq::getNoDiskFlag(dst_requestInfo));
3669 #endif
3670 
3671       dst->hashValue = hashValue;
3672       dst->fragmentData = fragId;
3673       dst->attrLen = attrLen; // fragdist is in here
3674 
3675       treeNodePtr.p->m_bits |= TreeNode::T_ONE_SHOT;
3676     }
3677     return 0;
3678   } while (0);
3679 
3680   return err;
3681 }
3682 
3683 void
lookup_start(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)3684 Dbspj::lookup_start(Signal* signal,
3685                     Ptr<Request> requestPtr,
3686                     Ptr<TreeNode> treeNodePtr)
3687 {
3688   lookup_send(signal, requestPtr, treeNodePtr);
3689 }
3690 
3691 void
lookup_send(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)3692 Dbspj::lookup_send(Signal* signal,
3693                    Ptr<Request> requestPtr,
3694                    Ptr<TreeNode> treeNodePtr)
3695 {
3696   jam();
3697   if (!ERROR_INSERTED(17521)) // Avoid emulated rnd errors
3698   {
3699     // ::checkTableError() should be handled before we reach this far
3700     ndbassert(checkTableError(treeNodePtr) == 0);
3701   }
3702 
3703   Uint32 cnt = 2;
3704   if (treeNodePtr.p->isLeaf())
3705   {
3706     jam();
3707     if (requestPtr.p->isLookup())
3708     {
3709       jam();
3710       cnt = 0;
3711     }
3712     else
3713     {
3714       jam();
3715       cnt = 1;
3716     }
3717   }
3718 
3719   LqhKeyReq* req = reinterpret_cast<LqhKeyReq*>(signal->getDataPtrSend());
3720 
3721   memcpy(req, treeNodePtr.p->m_lookup_data.m_lqhKeyReq,
3722          sizeof(treeNodePtr.p->m_lookup_data.m_lqhKeyReq));
3723   req->variableData[2] = treeNodePtr.p->m_send.m_correlation;
3724   req->variableData[3] = requestPtr.p->m_rootResultData;
3725 
3726   if (!(requestPtr.p->isLookup() && treeNodePtr.p->isLeaf()))
3727   {
3728     // Non-LEAF want reply to SPJ instead of ApiClient.
3729     LqhKeyReq::setNormalProtocolFlag(req->requestInfo, 1);
3730     req->variableData[0] = reference();
3731     req->variableData[1] = treeNodePtr.i;
3732   }
3733   else
3734   {
3735     jam();
3736     /**
3737      * Fake that TC sent this request,
3738      *   so that it can route a maybe TCKEYREF
3739      */
3740     req->tcBlockref = requestPtr.p->m_senderRef;
3741   }
3742 
3743   SectionHandle handle(this);
3744 
3745   Uint32 ref = treeNodePtr.p->m_send.m_ref;
3746   Uint32 keyInfoPtrI = treeNodePtr.p->m_send.m_keyInfoPtrI;
3747   Uint32 attrInfoPtrI = treeNodePtr.p->m_send.m_attrInfoPtrI;
3748 
3749   Uint32 err = 0;
3750 
3751   do
3752   {
3753     if (treeNodePtr.p->m_bits & TreeNode::T_ONE_SHOT)
3754     {
3755       jam();
3756       /**
3757        * Pass sections to send
3758        */
3759       treeNodePtr.p->m_send.m_attrInfoPtrI = RNIL;
3760       treeNodePtr.p->m_send.m_keyInfoPtrI = RNIL;
3761     }
3762     else
3763     {
3764       if ((treeNodePtr.p->m_bits & TreeNode::T_KEYINFO_CONSTRUCTED) == 0)
3765       {
3766         jam();
3767         Uint32 tmp = RNIL;
3768         if (!dupSection(tmp, keyInfoPtrI))
3769         {
3770           jam();
3771           ndbassert(tmp == RNIL);  // Guard for memleak
3772           err = DbspjErr::OutOfSectionMemory;
3773           break;
3774         }
3775 
3776         keyInfoPtrI = tmp;
3777       }
3778       else
3779       {
3780         jam();
3781         treeNodePtr.p->m_send.m_keyInfoPtrI = RNIL;
3782       }
3783 
3784       if ((treeNodePtr.p->m_bits & TreeNode::T_ATTRINFO_CONSTRUCTED) == 0)
3785       {
3786         jam();
3787         Uint32 tmp = RNIL;
3788 
3789         /**
3790          * Test execution terminated due to 'OutOfSectionMemory' which
3791          * may happen for different treeNodes in the request:
3792          * - 17070: Fail on any lookup_send()
3793          * - 17071: Fail on lookup_send() if 'isLeaf'
3794          * - 17072: Fail on lookup_send() if treeNode not root
3795          */
3796 
3797         if (ERROR_INSERTED(17070) ||
3798            (ERROR_INSERTED(17071) && treeNodePtr.p->isLeaf()) ||
3799            (ERROR_INSERTED(17072) && treeNodePtr.p->m_parentPtrI != RNIL))
3800         {
3801           jam();
3802           CLEAR_ERROR_INSERT_VALUE;
3803           ndbout_c("Injecting OutOfSectionMemory error at line %d file %s",
3804                    __LINE__,  __FILE__);
3805           releaseSection(keyInfoPtrI);
3806           err = DbspjErr::OutOfSectionMemory;
3807           break;
3808         }
3809 
3810         if (!dupSection(tmp, attrInfoPtrI))
3811         {
3812           jam();
3813           ndbassert(tmp == RNIL);  // Guard for memleak
3814           releaseSection(keyInfoPtrI);
3815           err = DbspjErr::OutOfSectionMemory;
3816           break;
3817         }
3818 
3819         attrInfoPtrI = tmp;
3820       }
3821       else
3822       {
3823         jam();
3824         treeNodePtr.p->m_send.m_attrInfoPtrI = RNIL;
3825       }
3826     }
3827 
3828     getSection(handle.m_ptr[0], keyInfoPtrI);
3829     getSection(handle.m_ptr[1], attrInfoPtrI);
3830     handle.m_cnt = 2;
3831 
3832     /**
3833      * Inject error to test LQHKEYREF handling:
3834      * Tampering with tableSchemaVersion such that LQH will
3835      * return LQHKEYREF('1227: Invalid schema version')
3836      * May happen for different treeNodes in the request:
3837      * - 17030: Fail on any lookup_send()
3838      * - 17031: Fail on lookup_send() if 'isLeaf'
3839      * - 17032: Fail on lookup_send() if treeNode not root
3840      */
3841     if (ERROR_INSERTED(17030) ||
3842        (ERROR_INSERTED(17031) && treeNodePtr.p->isLeaf()) ||
3843        (ERROR_INSERTED(17032) && treeNodePtr.p->m_parentPtrI != RNIL))
3844     {
3845       jam();
3846       CLEAR_ERROR_INSERT_VALUE;
3847       req->tableSchemaVersion += (1 << 16); // Provoke 'Invalid schema version'
3848     }
3849 
3850 #if defined DEBUG_LQHKEYREQ
3851     ndbout_c("LQHKEYREQ to %x", ref);
3852     printLQHKEYREQ(stdout, signal->getDataPtrSend(),
3853                    NDB_ARRAY_SIZE(treeNodePtr.p->m_lookup_data.m_lqhKeyReq),
3854                    DBLQH);
3855     printf("KEYINFO: ");
3856     print(handle.m_ptr[0], stdout);
3857     printf("ATTRINFO: ");
3858     print(handle.m_ptr[1], stdout);
3859 #endif
3860 
3861     Uint32 Tnode = refToNode(ref);
3862     if (Tnode == getOwnNodeId())
3863     {
3864       c_Counters.incr_counter(CI_LOCAL_READS_SENT, 1);
3865     }
3866     else
3867     {
3868       c_Counters.incr_counter(CI_REMOTE_READS_SENT, 1);
3869     }
3870 
3871     /**
3872      * Test execution terminated due to 'NodeFailure' which
3873      * may happen for different treeNodes in the request:
3874      * - 17020: Fail on any lookup_send()
3875      * - 17021: Fail on lookup_send() if 'isLeaf'
3876      * - 17022: Fail on lookup_send() if treeNode not root
3877      */
3878     if (ERROR_INSERTED(17020) ||
3879        (ERROR_INSERTED(17021) && treeNodePtr.p->isLeaf()) ||
3880        (ERROR_INSERTED(17022) && treeNodePtr.p->m_parentPtrI != RNIL))
3881     {
3882       jam();
3883       CLEAR_ERROR_INSERT_VALUE;
3884       releaseSections(handle);
3885       err = DbspjErr::NodeFailure;
3886       break;
3887     }
3888     // Test for online downgrade.
3889     if (unlikely(!ndb_join_pushdown(getNodeInfo(Tnode).m_version)))
3890     {
3891       jam();
3892       releaseSections(handle);
3893       err = 4003; // Function not implemented.
3894       break;
3895     }
3896 
3897     if (unlikely(!c_alive_nodes.get(Tnode)))
3898     {
3899       jam();
3900       releaseSections(handle);
3901       err = DbspjErr::NodeFailure;
3902       break;
3903     }
3904     else if (! (treeNodePtr.p->isLeaf() && requestPtr.p->isLookup()))
3905     {
3906       jam();
3907       ndbassert(Tnode < NDB_ARRAY_SIZE(requestPtr.p->m_lookup_node_data));
3908       requestPtr.p->m_completed_nodes.clear(treeNodePtr.p->m_node_no);
3909       requestPtr.p->m_outstanding += cnt;
3910       requestPtr.p->m_lookup_node_data[Tnode] += cnt;
3911       // number wrapped
3912       ndbrequire(! (requestPtr.p->m_lookup_node_data[Tnode] == 0));
3913     }
3914 
3915     sendSignal(ref, GSN_LQHKEYREQ, signal,
3916                NDB_ARRAY_SIZE(treeNodePtr.p->m_lookup_data.m_lqhKeyReq),
3917                JBB, &handle);
3918 
3919     treeNodePtr.p->m_lookup_data.m_outstanding += cnt;
3920     if (requestPtr.p->isLookup() && treeNodePtr.p->isLeaf())
3921     {
3922       jam();
3923       /**
3924        * Send TCKEYCONF with DirtyReadBit + Tnode,
3925        *   so that API can discover if Tnode died while waiting for result
3926        */
3927       lookup_sendLeafCONF(signal, requestPtr, treeNodePtr, Tnode);
3928     }
3929     return;
3930   }
3931   while (0);
3932 
3933   ndbrequire(err);
3934   jam();
3935   abort(signal, requestPtr, err);
3936 } //Dbspj::lookup_send
3937 
3938 void
lookup_execTRANSID_AI(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,const RowPtr & rowRef)3939 Dbspj::lookup_execTRANSID_AI(Signal* signal,
3940                              Ptr<Request> requestPtr,
3941                              Ptr<TreeNode> treeNodePtr,
3942                              const RowPtr & rowRef)
3943 {
3944   jam();
3945 
3946   Uint32 Tnode = refToNode(signal->getSendersBlockRef());
3947   ndbrequire(!(requestPtr.p->isLookup() && treeNodePtr.p->isLeaf()));
3948 
3949   common_execTRANSID_AI(signal, requestPtr, treeNodePtr, rowRef);
3950 
3951   ndbassert(requestPtr.p->m_lookup_node_data[Tnode] >= 1);
3952   requestPtr.p->m_lookup_node_data[Tnode] -= 1;
3953 
3954   treeNodePtr.p->m_lookup_data.m_outstanding--;
3955 
3956   if (treeNodePtr.p->m_lookup_data.m_outstanding == 0
3957       && treeNodePtr.p->m_deferred.isEmpty())
3958   {
3959     jam();
3960     // We have received all rows for this treeNode in this batch.
3961     handleTreeNodeComplete(signal, requestPtr, treeNodePtr);
3962   }
3963 
3964   checkBatchComplete(signal, requestPtr, 1);
3965 }
3966 
3967 void
lookup_execLQHKEYREF(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)3968 Dbspj::lookup_execLQHKEYREF(Signal* signal,
3969                             Ptr<Request> requestPtr,
3970                             Ptr<TreeNode> treeNodePtr)
3971 {
3972   const LqhKeyRef * rep = (LqhKeyRef*)signal->getDataPtr();
3973   Uint32 errCode = rep->errorCode;
3974   Uint32 Tnode = refToNode(signal->getSendersBlockRef());
3975 
3976   c_Counters.incr_counter(CI_READS_NOT_FOUND, 1);
3977 
3978   DEBUG("lookup_execLQHKEYREF, errorCode:" << errCode);
3979 
3980   /**
3981    * If Request is still actively running: API need to
3982    * be informed about error.
3983    * Error code may either indicate a 'hard error' which should
3984    * terminate the query execution, or a 'soft error' which
3985    * should be signaled NDBAPI, and execution continued.
3986    */
3987   if (likely((requestPtr.p->m_state & Request::RS_ABORTING) == 0))
3988   {
3989     switch(errCode){
3990     case 626: // 'Soft error' : Row not found
3991     case 899: // 'Soft error' : Interpreter_exit_nok
3992 
3993       jam();
3994       /**
3995        * Only Lookup-request need to send TCKEYREF...
3996        */
3997       if (requestPtr.p->isLookup())
3998       {
3999         jam();
4000         lookup_stop_branch(signal, requestPtr, treeNodePtr, errCode);
4001       }
4002       break;
4003 
4004     default: // 'Hard error' : abort query
4005       jam();
4006       abort(signal, requestPtr, errCode);
4007     }
4008   }
4009 
4010   Uint32 cnt = (treeNodePtr.p->isLeaf()) ? 1 : 2;
4011   ndbassert(requestPtr.p->m_lookup_node_data[Tnode] >= cnt);
4012   requestPtr.p->m_lookup_node_data[Tnode] -= cnt;
4013 
4014   treeNodePtr.p->m_lookup_data.m_outstanding -= cnt;
4015 
4016   /**
4017    * Another TreeNode awaited for completion of this request
4018    * before it could resume its operation.
4019    */
4020   if (treeNodePtr.p->m_resumeEvents & TreeNode::TN_RESUME_REF)
4021   {
4022     jam();
4023     ndbassert(treeNodePtr.p->m_resumePtrI != RNIL);
4024     Ptr<TreeNode> resumeTreeNodePtr;
4025     m_treenode_pool.getPtr(resumeTreeNodePtr, treeNodePtr.p->m_resumePtrI);
4026     lookup_resume(signal, requestPtr, resumeTreeNodePtr);
4027   }
4028 
4029   if (treeNodePtr.p->m_lookup_data.m_outstanding == 0
4030       && treeNodePtr.p->m_deferred.isEmpty())
4031   {
4032     jam();
4033     // We have received all rows for this treeNode in this batch.
4034     handleTreeNodeComplete(signal, requestPtr, treeNodePtr);
4035   }
4036 
4037   checkBatchComplete(signal, requestPtr, cnt);
4038 }
4039 
4040 /**
4041  * lookup_stop_branch() will send required signals to the API
4042  * to inform that the query branch starting with 'treeNodePtr'
4043  * will not be executed due to 'errCode'.
4044  *
4045  * NOTE: 'errCode'is expected to be a 'soft error', like
4046  *       'row not found', and is *not* intended to abort
4047  *       entire query.
4048  */
4049 void
lookup_stop_branch(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,Uint32 errCode)4050 Dbspj::lookup_stop_branch(Signal* signal,
4051                           Ptr<Request> requestPtr,
4052                           Ptr<TreeNode> treeNodePtr,
4053                           Uint32 errCode)
4054 {
4055   ndbassert(requestPtr.p->isLookup());
4056   DEBUG("::lookup_stop_branch"
4057      << ", node: " << treeNodePtr.p->m_node_no
4058   );
4059 
4060   /**
4061    * If this is a "leaf" node, either on its own, or
4062    * indirectly through an unique index lookup:
4063    * Ordinary operation would have emited extra TCKEYCONF
4064    * required for nodefail handling.
4065    * (In case of nodefails during final leaf REQs).
4066    * As API cant, or at least does not try to, tell whether
4067    * leaf operation is REFed by SPJ or LQH, we still have to
4068    * send this extra CONF as required by protocoll.
4069    */
4070   if (treeNodePtr.p->isLeaf())
4071   {
4072     jam();
4073     DEBUG("  Leaf-lookup: sending extra 'CONF' for nodefail handling");
4074     lookup_sendLeafCONF(signal, requestPtr, treeNodePtr, getOwnNodeId());
4075   }
4076 
4077   else if (treeNodePtr.p->m_bits & TreeNode::T_UNIQUE_INDEX_LOOKUP)
4078   {
4079     /**
4080      * UNIQUE_INDEX lookups are represented with an additional
4081      * child which does the lookup from UQ-index into the table
4082      * itself. Has to check this child for being 'leaf'.
4083      */
4084     LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
4085     Local_dependency_map list(pool, treeNodePtr.p->m_dependent_nodes);
4086     Dependency_map::ConstDataBufferIterator it;
4087     ndbrequire(list.first(it));
4088     ndbrequire(list.getSize() == 1); // should only be 1 child
4089     Ptr<TreeNode> childPtr;
4090     m_treenode_pool.getPtr(childPtr, * it.data);
4091     if (childPtr.p->m_bits & TreeNode::T_LEAF)
4092     {
4093       jam();
4094       DEBUG("  UNUQUE_INDEX-Leaf-lookup: sending extra 'CONF' "
4095             "for nodefail handling");
4096       lookup_sendLeafCONF(signal, requestPtr, childPtr, getOwnNodeId());
4097     }
4098   }
4099 
4100   /**
4101    * Then produce the REF(errCode) which terminates this
4102    * tree branch.
4103    */
4104   Uint32 resultRef = treeNodePtr.p->m_lookup_data.m_api_resultRef;
4105   Uint32 resultData = treeNodePtr.p->m_lookup_data.m_api_resultData;
4106   TcKeyRef* ref = (TcKeyRef*)signal->getDataPtr();
4107   ref->connectPtr = resultData;
4108   ref->transId[0] = requestPtr.p->m_transId[0];
4109   ref->transId[1] = requestPtr.p->m_transId[1];
4110   ref->errorCode = errCode;
4111   ref->errorData = 0;
4112 
4113   DEBUG("  send TCKEYREF");
4114   sendTCKEYREF(signal, resultRef, requestPtr.p->m_senderRef);
4115 }
4116 
4117 /**
4118  * Lookup leafs in lookup requests will not receive CONF/REF
4119  * back to SPJ when LQH request has completed. Instead we
4120  * will cleanup() the request when the last leafnode KEYREQ
4121  * has been sent. If any of the REQuested datanodes fails
4122  * after this, SPJ will not detect this and be able to
4123  * send appropriate signals to the API to awake it from the
4124  * 'wait' state.
4125  * To get around this, we instead send an extra CONF
4126  * to the API which inform is about which 'node' it should
4127  * expect a result from. API can then discover if this
4128  * 'node' died while waiting for results.
4129  */
4130 void
lookup_sendLeafCONF(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,Uint32 node)4131 Dbspj::lookup_sendLeafCONF(Signal* signal,
4132                            Ptr<Request> requestPtr,
4133                            Ptr<TreeNode> treeNodePtr,
4134                            Uint32 node)
4135 {
4136   ndbassert(treeNodePtr.p->isLeaf());
4137 
4138   const Uint32 resultRef = treeNodePtr.p->m_lookup_data.m_api_resultRef;
4139   const Uint32 resultData = treeNodePtr.p->m_lookup_data.m_api_resultData;
4140   TcKeyConf* const conf = (TcKeyConf*)signal->getDataPtr();
4141   conf->apiConnectPtr = RNIL;
4142   conf->confInfo = 0;
4143   conf->gci_hi = 0;
4144   TcKeyConf::setNoOfOperations(conf->confInfo, 1);
4145   conf->transId1 = requestPtr.p->m_transId[0];
4146   conf->transId2 = requestPtr.p->m_transId[1];
4147   conf->operations[0].apiOperationPtr = resultData;
4148   conf->operations[0].attrInfoLen =
4149     TcKeyConf::DirtyReadBit | node;
4150   const Uint32 sigLen = TcKeyConf::StaticLength + TcKeyConf::OperationLength;
4151   sendTCKEYCONF(signal, sigLen, resultRef, requestPtr.p->m_senderRef);
4152 }
4153 
4154 
4155 void
lookup_execLQHKEYCONF(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)4156 Dbspj::lookup_execLQHKEYCONF(Signal* signal,
4157                              Ptr<Request> requestPtr,
4158                              Ptr<TreeNode> treeNodePtr)
4159 {
4160   ndbrequire(!(requestPtr.p->isLookup() && treeNodePtr.p->isLeaf()));
4161 
4162   Uint32 Tnode = refToNode(signal->getSendersBlockRef());
4163 
4164   if (treeNodePtr.p->m_bits & TreeNode::T_USER_PROJECTION)
4165   {
4166     jam();
4167     requestPtr.p->m_rows++;
4168   }
4169 
4170   ndbassert(requestPtr.p->m_lookup_node_data[Tnode] >= 1);
4171   requestPtr.p->m_lookup_node_data[Tnode] -= 1;
4172 
4173   treeNodePtr.p->m_lookup_data.m_outstanding--;
4174 
4175   /**
4176    * Another TreeNode awaited for completion of this request
4177    * before it could resume its operation.
4178    */
4179   if (treeNodePtr.p->m_resumeEvents & TreeNode::TN_RESUME_CONF)
4180   {
4181     jam();
4182     ndbassert(treeNodePtr.p->m_resumePtrI != RNIL);
4183     Ptr<TreeNode> resumeTreeNodePtr;
4184     m_treenode_pool.getPtr(resumeTreeNodePtr, treeNodePtr.p->m_resumePtrI);
4185     lookup_resume(signal, requestPtr, resumeTreeNodePtr);
4186   }
4187 
4188   if (treeNodePtr.p->m_lookup_data.m_outstanding == 0
4189       && treeNodePtr.p->m_deferred.isEmpty())
4190   {
4191     jam();
4192     // We have received all rows for this treeNode in this batch.
4193     handleTreeNodeComplete(signal, requestPtr, treeNodePtr);
4194   }
4195 
4196   checkBatchComplete(signal, requestPtr, 1);
4197 }
4198 
4199 void
lookup_parent_row(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,const RowPtr & rowRef)4200 Dbspj::lookup_parent_row(Signal* signal,
4201                          Ptr<Request> requestPtr,
4202                          Ptr<TreeNode> treeNodePtr,
4203                          const RowPtr & rowRef)
4204 {
4205   jam();
4206 
4207   DEBUG("::lookup_parent_row"
4208      << ", node: " << treeNodePtr.p->m_node_no);
4209 
4210   ndbassert((treeNodePtr.p->m_bits & TreeNode::T_EXEC_SEQUENTIAL) == 0);
4211   lookup_row(signal, requestPtr, treeNodePtr, rowRef);
4212 } // Dbspj::lookup_parent_row()
4213 
4214 /**
4215  * lookup_resume() is a delayed lookup_parent_row.
4216  * It will locate the next parent row now allowed to execute,
4217  * and create a child lookup request for that row.
4218  */
4219 void
lookup_resume(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)4220 Dbspj::lookup_resume(Signal* signal,
4221                      Ptr<Request> requestPtr,
4222                      Ptr<TreeNode> treeNodePtr)
4223 {
4224   jam();
4225   DEBUG("::lookup_resume"
4226      << ", node: " << treeNodePtr.p->m_node_no
4227   );
4228 
4229   ndbassert(treeNodePtr.p->m_bits & TreeNode::T_EXEC_SEQUENTIAL);
4230   ndbassert(treeNodePtr.p->m_parentPtrI != RNIL);
4231   ndbassert(!treeNodePtr.p->m_deferred.isEmpty());
4232   ndbassert(!requestPtr.p->m_completed_nodes.get(treeNodePtr.p->m_node_no));
4233 
4234   if (unlikely(requestPtr.p->m_state & Request::RS_ABORTING))
4235   {
4236     jam();
4237     return;
4238   }
4239 
4240   Uint32 corrVal;
4241   {
4242     LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
4243     Local_pattern_store correlations(pool, treeNodePtr.p->m_deferred.m_correlations);
4244 
4245     Local_pattern_store::DataBufferIterator it;
4246     const bool valid = correlations.position(it, (Uint32)(treeNodePtr.p->m_deferred.m_pos++));
4247     (void)valid; ndbassert(valid);
4248     corrVal = *it.data;
4249   }
4250 
4251   Ptr<TreeNode> parentPtr;
4252   m_treenode_pool.getPtr(parentPtr, treeNodePtr.p->m_parentPtrI);
4253 
4254   // Set up RowPtr & RowRef for this parent row
4255   RowPtr row;
4256   row.m_src_node_ptrI = parentPtr.i;
4257   row.m_src_correlation = corrVal;
4258 
4259   ndbassert(parentPtr.p->m_rows.m_type == RowCollection::COLLECTION_MAP);
4260   RowRef ref;
4261   parentPtr.p->m_rows.m_map.copyto(ref);
4262   const Uint32* const mapptr = get_row_ptr(ref);
4263 
4264   // Relocate parent row from correlation value.
4265   const Uint32 rowId = (corrVal & 0xFFFF);
4266   parentPtr.p->m_rows.m_map.load(mapptr, rowId, ref);
4267 
4268   const Uint32* const rowptr = get_row_ptr(ref);
4269   setupRowPtr(parentPtr.p->m_rows, row, ref, rowptr);
4270 
4271   lookup_row(signal, requestPtr, treeNodePtr, row);
4272 } // Dbspj::lookup_resume()
4273 
4274 void
lookup_row(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,const RowPtr & rowRef)4275 Dbspj::lookup_row(Signal* signal,
4276                          Ptr<Request> requestPtr,
4277                          Ptr<TreeNode> treeNodePtr,
4278                          const RowPtr & rowRef)
4279 {
4280   jam();
4281 
4282   /**
4283    * Here we need to...
4284    *   1) construct a key
4285    *   2) compute hash     (normally TC)
4286    *   3) get node for row (normally TC)
4287    */
4288   Uint32 err = 0;
4289   const Uint32 tableId = treeNodePtr.p->m_tableOrIndexId;
4290   const Uint32 corrVal = rowRef.m_src_correlation;
4291 
4292   DEBUG("::lookup_row"
4293      << ", node: " << treeNodePtr.p->m_node_no);
4294 
4295   do
4296   {
4297     err = checkTableError(treeNodePtr);
4298     if (unlikely(err != 0))
4299     {
4300       jam();
4301       break;
4302     }
4303 
4304     /**
4305      * Test execution terminated due to 'OutOfQueryMemory' which
4306      * may happen multiple places below:
4307      * - 17040: Fail on any lookup_parent_row()
4308      * - 17041: Fail on lookup_parent_row() if 'isLeaf'
4309      * - 17042: Fail on lookup_parent_row() if treeNode not root
4310      */
4311     if (ERROR_INSERTED(17040) ||
4312        (ERROR_INSERTED(17041) && treeNodePtr.p->isLeaf()) ||
4313        (ERROR_INSERTED(17042) && treeNodePtr.p->m_parentPtrI != RNIL))
4314     {
4315       jam();
4316       CLEAR_ERROR_INSERT_VALUE;
4317       err = DbspjErr::OutOfQueryMemory;
4318       break;
4319     }
4320 
4321     Uint32 ptrI = RNIL;
4322     if (treeNodePtr.p->m_bits & TreeNode::T_KEYINFO_CONSTRUCTED)
4323     {
4324       jam();
4325       DEBUG("parent_row w/ T_KEYINFO_CONSTRUCTED");
4326       /**
4327        * Get key-pattern
4328        */
4329       LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
4330       Local_pattern_store pattern(pool, treeNodePtr.p->m_keyPattern);
4331 
4332       bool keyIsNull;
4333       err = expand(ptrI, pattern, rowRef, keyIsNull);
4334       if (unlikely(err != 0))
4335       {
4336         jam();
4337         releaseSection(ptrI);
4338         break;
4339       }
4340 
4341       if (keyIsNull)
4342       {
4343         /**
4344          * When the key contains NULL values, an EQ-match is impossible!
4345          * Entire lookup request can therefore be eliminate as it is known
4346          * to be REFused with errorCode = 626 (Row not found).
4347          *
4348          * Scan requests can simply ignore these child LQHKEYREQs
4349          * as REFs are not needed, either by the API protocoll,
4350          * or in order to handle TN_RESUME_REF.
4351          *
4352          * Lookup requests has to send the same KEYREFs as would have
4353          * been produced by LQH.
4354          */
4355         jam();
4356         DEBUG("Key contain NULL values: Ignore impossible KEYREQ");
4357         releaseSection(ptrI);
4358         ptrI = RNIL;
4359 
4360         /* Send KEYREF(errCode=626) as required by lookup request protocol */
4361         if (requestPtr.p->isLookup())
4362         {
4363           jam();
4364           lookup_stop_branch(signal, requestPtr, treeNodePtr, 626);
4365         }
4366 
4367         /**
4368          * Another TreeNode awaited completion of this treeNode
4369          * or sub-branch before it could resume its operation.
4370          */
4371         if ((treeNodePtr.p->m_resumeEvents & TreeNode::TN_RESUME_REF))
4372         {
4373           jam();
4374           DEBUG("handling TN_RESUME_REF");
4375           ndbassert(treeNodePtr.p->m_resumePtrI != RNIL);
4376           Ptr<TreeNode> resumeTreeNodePtr;
4377           m_treenode_pool.getPtr(resumeTreeNodePtr, treeNodePtr.p->m_resumePtrI);
4378           lookup_resume(signal, requestPtr, resumeTreeNodePtr);
4379         }
4380 
4381         return;  // Bailout, KEYREQ would have returned KEYREF(626) anyway
4382       } // keyIsNull
4383 
4384       ndbassert(ptrI != RNIL);
4385       treeNodePtr.p->m_send.m_keyInfoPtrI = ptrI;
4386     } //T_KEYINFO_CONSTRUCTED
4387 
4388     BuildKeyReq tmp;
4389     err = computeHash(signal, tmp, tableId, treeNodePtr.p->m_send.m_keyInfoPtrI);
4390     if (unlikely(err != 0))
4391       break;
4392 
4393     err = getNodes(signal, tmp, tableId);
4394     if (unlikely(err != 0))
4395       break;
4396 
4397     Uint32 attrInfoPtrI = treeNodePtr.p->m_send.m_attrInfoPtrI;
4398     if (treeNodePtr.p->m_bits & TreeNode::T_ATTRINFO_CONSTRUCTED)
4399     {
4400       jam();
4401       Uint32 tmp = RNIL;
4402 
4403       /**
4404        * Test execution terminated due to 'OutOfSectionMemory' which
4405        * may happen for different treeNodes in the request:
4406        * - 17080: Fail on lookup_parent_row
4407        * - 17081: Fail on lookup_parent_row:  if 'isLeaf'
4408        * - 17082: Fail on lookup_parent_row: if treeNode not root
4409        */
4410 
4411       if (ERROR_INSERTED(17080) ||
4412          (ERROR_INSERTED(17081) && treeNodePtr.p->isLeaf()) ||
4413          (ERROR_INSERTED(17082) && treeNodePtr.p->m_parentPtrI != RNIL))
4414       {
4415         jam();
4416         CLEAR_ERROR_INSERT_VALUE;
4417         ndbout_c("Injecting OutOfSectionMemory error at line %d file %s",
4418                  __LINE__,  __FILE__);
4419         err = DbspjErr::OutOfSectionMemory;
4420         break;
4421       }
4422 
4423       if (!dupSection(tmp, attrInfoPtrI))
4424       {
4425         jam();
4426         ndbassert(tmp == RNIL);  // Guard for memleak
4427         err = DbspjErr::OutOfSectionMemory;
4428         break;
4429       }
4430 
4431       Uint32 org_size;
4432       {
4433         SegmentedSectionPtr ptr;
4434         getSection(ptr, tmp);
4435         org_size = ptr.sz;
4436       }
4437 
4438       bool hasNull;
4439       LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
4440       Local_pattern_store pattern(pool, treeNodePtr.p->m_attrParamPattern);
4441       err = expand(tmp, pattern, rowRef, hasNull);
4442       if (unlikely(err != 0))
4443       {
4444         jam();
4445         releaseSection(tmp);
4446         break;
4447       }
4448 //    ndbrequire(!hasNull);
4449 
4450       /**
4451        * Update size of subsrouting section, which contains arguments
4452        */
4453       SegmentedSectionPtr ptr;
4454       getSection(ptr, tmp);
4455       Uint32 new_size = ptr.sz;
4456       Uint32 * sectionptrs = ptr.p->theData;
4457       sectionptrs[4] = new_size - org_size;
4458 
4459       treeNodePtr.p->m_send.m_attrInfoPtrI = tmp;
4460     }
4461 
4462     /**
4463      * Now send...
4464      */
4465 
4466     /**
4467      * TODO merge better with lookup_start (refactor)
4468      */
4469     {
4470       /* We set the upper half word of m_correlation to the tuple ID
4471        * of the parent, such that the API can match this tuple with its
4472        * parent.
4473        * Then we re-use the tuple ID of the parent as the
4474        * tuple ID for this tuple also. Since the tuple ID
4475        * is unique within this batch and SPJ block for the parent operation,
4476        * it must also be unique for this operation.
4477        * This ensures that lookup operations with no user projection will
4478        * work, since such operations will have the same tuple ID as their
4479        * parents. The API will then be able to match a tuple with its
4480        * grandparent, even if it gets no tuple for the parent operation.*/
4481       treeNodePtr.p->m_send.m_correlation =
4482         (corrVal << 16) + (corrVal & 0xffff);
4483 
4484       treeNodePtr.p->m_send.m_ref = tmp.receiverRef;
4485       LqhKeyReq * dst = (LqhKeyReq*)treeNodePtr.p->m_lookup_data.m_lqhKeyReq;
4486       dst->hashValue = tmp.hashInfo[0];
4487       dst->fragmentData = tmp.fragId;
4488       Uint32 attrLen = 0;
4489       LqhKeyReq::setDistributionKey(attrLen, tmp.fragDistKey);
4490       dst->attrLen = attrLen;
4491       lookup_send(signal, requestPtr, treeNodePtr);
4492 
4493       if (treeNodePtr.p->m_bits & TreeNode::T_ATTRINFO_CONSTRUCTED)
4494       {
4495         jam();
4496         // restore
4497         treeNodePtr.p->m_send.m_attrInfoPtrI = attrInfoPtrI;
4498       }
4499     }
4500     return;
4501   } while (0);
4502 
4503   // If we fail it will always be a 'hard error' -> abort
4504   ndbrequire(err);
4505   jam();
4506   abort(signal, requestPtr, err);
4507 }
4508 
4509 void
lookup_abort(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)4510 Dbspj::lookup_abort(Signal* signal,
4511                     Ptr<Request> requestPtr,
4512                     Ptr<TreeNode> treeNodePtr)
4513 {
4514   jam();
4515 }
4516 
4517 Uint32
lookup_execNODE_FAILREP(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,NdbNodeBitmask mask)4518 Dbspj::lookup_execNODE_FAILREP(Signal* signal,
4519                                Ptr<Request> requestPtr,
4520                                Ptr<TreeNode> treeNodePtr,
4521                                NdbNodeBitmask mask)
4522 {
4523   jam();
4524   Uint32 node = 0;
4525   Uint32 sum = 0;
4526   while (requestPtr.p->m_outstanding &&
4527          ((node = mask.find(node + 1)) != NdbNodeBitmask::NotFound))
4528   {
4529     Uint32 cnt = requestPtr.p->m_lookup_node_data[node];
4530     sum += cnt;
4531     requestPtr.p->m_lookup_node_data[node] = 0;
4532   }
4533 
4534   if (sum)
4535   {
4536     jam();
4537     ndbrequire(requestPtr.p->m_outstanding >= sum);
4538     requestPtr.p->m_outstanding -= sum;
4539   }
4540 
4541   return sum;
4542 }
4543 
4544 void
lookup_cleanup(Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)4545 Dbspj::lookup_cleanup(Ptr<Request> requestPtr,
4546                       Ptr<TreeNode> treeNodePtr)
4547 {
4548   cleanup_common(requestPtr, treeNodePtr);
4549 }
4550 
4551 
4552 Uint32
handle_special_hash(Uint32 tableId,Uint32 dstHash[4],const Uint64 * src,Uint32 srcLen,const KeyDescriptor * desc)4553 Dbspj::handle_special_hash(Uint32 tableId, Uint32 dstHash[4],
4554                            const Uint64* src,
4555                            Uint32 srcLen,       // Len in #32bit words
4556                            const KeyDescriptor* desc)
4557 {
4558   const Uint32 MAX_KEY_SIZE_IN_LONG_WORDS=
4559     (MAX_KEY_SIZE_IN_WORDS + 1) / 2;
4560   Uint64 alignedWorkspace[MAX_KEY_SIZE_IN_LONG_WORDS * MAX_XFRM_MULTIPLY];
4561   const bool hasVarKeys = desc->noOfVarKeys > 0;
4562   const bool hasCharAttr = desc->hasCharAttr;
4563   const bool compute_distkey = desc->noOfDistrKeys > 0;
4564 
4565   const Uint64 *hashInput = 0;
4566   Uint32 inputLen = 0;
4567   Uint32 keyPartLen[MAX_ATTRIBUTES_IN_INDEX];
4568   Uint32 * keyPartLenPtr;
4569 
4570   /* Normalise KeyInfo into workspace if necessary */
4571   if (hasCharAttr || (compute_distkey && hasVarKeys))
4572   {
4573     hashInput = alignedWorkspace;
4574     keyPartLenPtr = keyPartLen;
4575     inputLen = xfrm_key(tableId,
4576                         (Uint32*)src,
4577                         (Uint32*)alignedWorkspace,
4578                         sizeof(alignedWorkspace) >> 2,
4579                         keyPartLenPtr);
4580     if (unlikely(inputLen == 0))
4581     {
4582       return 290;  // 'Corrupt key in TC, unable to xfrm'
4583     }
4584   }
4585   else
4586   {
4587     /* Keyinfo already suitable for hash */
4588     hashInput = src;
4589     inputLen = srcLen;
4590     keyPartLenPtr = 0;
4591   }
4592 
4593   /* Calculate primary key hash */
4594   md5_hash(dstHash, hashInput, inputLen);
4595 
4596   /* If the distribution key != primary key then we have to
4597    * form a distribution key from the primary key and calculate
4598    * a separate distribution hash based on this
4599    */
4600   if (compute_distkey)
4601   {
4602     jam();
4603 
4604     Uint32 distrKeyHash[4];
4605     /* Reshuffle primary key columns to get just distribution key */
4606     Uint32 len = create_distr_key(tableId, (Uint32*)hashInput, (Uint32*)alignedWorkspace, keyPartLenPtr);
4607     /* Calculate distribution key hash */
4608     md5_hash(distrKeyHash, alignedWorkspace, len);
4609 
4610     /* Just one word used for distribution */
4611     dstHash[1] = distrKeyHash[1];
4612   }
4613   return 0;
4614 }
4615 
4616 Uint32
computeHash(Signal * signal,BuildKeyReq & dst,Uint32 tableId,Uint32 ptrI)4617 Dbspj::computeHash(Signal* signal,
4618                    BuildKeyReq& dst, Uint32 tableId, Uint32 ptrI)
4619 {
4620   /**
4621    * Essentially the same code as in Dbtc::hash().
4622    * The code for user defined partitioning has been removed though.
4623    */
4624   SegmentedSectionPtr ptr;
4625   getSection(ptr, ptrI);
4626 
4627   /* NOTE:  md5_hash below require 64-bit alignment
4628    */
4629   const Uint32 MAX_KEY_SIZE_IN_LONG_WORDS=
4630     (MAX_KEY_SIZE_IN_WORDS + 1) / 2;
4631   Uint64 tmp64[MAX_KEY_SIZE_IN_LONG_WORDS];
4632   Uint32 *tmp32 = (Uint32*)tmp64;
4633   ndbassert(ptr.sz <= MAX_KEY_SIZE_IN_WORDS);
4634   copy(tmp32, ptr);
4635 
4636   const KeyDescriptor* desc = g_key_descriptor_pool.getPtr(tableId);
4637   ndbrequire(desc != NULL);
4638 
4639   bool need_special_hash = desc->hasCharAttr | (desc->noOfDistrKeys > 0);
4640   if (need_special_hash)
4641   {
4642     jam();
4643     return handle_special_hash(tableId, dst.hashInfo, tmp64, ptr.sz, desc);
4644   }
4645   else
4646   {
4647     jam();
4648     md5_hash(dst.hashInfo, tmp64, ptr.sz);
4649     return 0;
4650   }
4651 }
4652 
4653 /**
4654  * This function differs from computeHash in that *ptrI*
4655  * only contains partition key (packed) and not full primary key
4656  */
4657 Uint32
computePartitionHash(Signal * signal,BuildKeyReq & dst,Uint32 tableId,Uint32 ptrI)4658 Dbspj::computePartitionHash(Signal* signal,
4659                             BuildKeyReq& dst, Uint32 tableId, Uint32 ptrI)
4660 {
4661   SegmentedSectionPtr ptr;
4662   getSection(ptr, ptrI);
4663 
4664   /* NOTE:  md5_hash below require 64-bit alignment
4665    */
4666   const Uint32 MAX_KEY_SIZE_IN_LONG_WORDS=
4667     (MAX_KEY_SIZE_IN_WORDS + 1) / 2;
4668   Uint64 _space[MAX_KEY_SIZE_IN_LONG_WORDS];
4669   Uint64 *tmp64 = _space;
4670   Uint32 *tmp32 = (Uint32*)tmp64;
4671   Uint32 sz = ptr.sz;
4672   ndbassert(ptr.sz <= MAX_KEY_SIZE_IN_WORDS);
4673   copy(tmp32, ptr);
4674 
4675   const KeyDescriptor* desc = g_key_descriptor_pool.getPtr(tableId);
4676   ndbrequire(desc != NULL);
4677 
4678   bool need_xfrm = desc->hasCharAttr || desc->noOfVarKeys;
4679   if (need_xfrm)
4680   {
4681     jam();
4682     /**
4683      * xfrm distribution key
4684      */
4685     Uint32 srcPos = 0;
4686     Uint32 dstPos = 0;
4687     Uint32 * src = tmp32;
4688     Uint32 * dst = signal->theData+24;
4689     for (Uint32 i = 0; i < desc->noOfKeyAttr; i++)
4690     {
4691       const KeyDescriptor::KeyAttr& keyAttr = desc->keyAttr[i];
4692       if (AttributeDescriptor::getDKey(keyAttr.attributeDescriptor))
4693       {
4694         Uint32 attrLen =
4695         xfrm_attr(keyAttr.attributeDescriptor, keyAttr.charsetInfo,
4696                   src, srcPos, dst, dstPos,
4697                   NDB_ARRAY_SIZE(signal->theData) - 24);
4698         if (unlikely(attrLen == 0))
4699         {
4700           DEBUG_CRASH();
4701           return 290;  // 'Corrupt key in TC, unable to xfrm'
4702         }
4703       }
4704     }
4705     tmp64 = (Uint64*)dst;
4706     sz = dstPos;
4707   }
4708 
4709   md5_hash(dst.hashInfo, tmp64, sz);
4710   return 0;
4711 }
4712 
4713 Uint32
getNodes(Signal * signal,BuildKeyReq & dst,Uint32 tableId)4714 Dbspj::getNodes(Signal* signal, BuildKeyReq& dst, Uint32 tableId)
4715 {
4716   DiGetNodesReq * req = (DiGetNodesReq *)&signal->theData[0];
4717   req->tableId = tableId;
4718   req->hashValue = dst.hashInfo[1];
4719   req->distr_key_indicator = 0; // userDefinedPartitioning not supported!
4720   req->jamBufferPtr = jamBuffer();
4721 
4722 #if 1
4723   EXECUTE_DIRECT(DBDIH, GSN_DIGETNODESREQ, signal,
4724                  DiGetNodesReq::SignalLength, 0);
4725 #else
4726   sendSignal(DBDIH_REF, GSN_DIGETNODESREQ, signal,
4727              DiGetNodesReq::SignalLength, JBB);
4728   jamEntry();
4729 
4730 #endif
4731 
4732   DiGetNodesConf * conf = (DiGetNodesConf *)&signal->theData[0];
4733   const Uint32 err = signal->theData[0] ? signal->theData[1] : 0;
4734   Uint32 Tdata2 = conf->reqinfo;
4735   Uint32 nodeId = conf->nodes[0];
4736   Uint32 instanceKey = (Tdata2 >> 24) & 127;
4737 
4738   DEBUG("HASH to nodeId:" << nodeId << ", instanceKey:" << instanceKey);
4739 
4740   jamEntry();
4741   if (unlikely(err != 0))
4742   {
4743     jam();
4744     goto error;
4745   }
4746   dst.fragId = conf->fragId;
4747   dst.fragDistKey = (Tdata2 >> 16) & 255;
4748   dst.receiverRef = numberToRef(DBLQH, instanceKey, nodeId);
4749 
4750   return 0;
4751 
4752 error:
4753   return err;
4754 }
4755 
4756 /**
4757  * END - MODULE LOOKUP
4758  */
4759 
4760 /**
4761  * MODULE SCAN FRAG
4762  *
4763  * NOTE: This may only be root node
4764  */
4765 const Dbspj::OpInfo
4766 Dbspj::g_ScanFragOpInfo =
4767 {
4768   &Dbspj::scanFrag_build,
4769   0, // prepare
4770   &Dbspj::scanFrag_start,
4771   &Dbspj::scanFrag_execTRANSID_AI,
4772   0, // execLQHKEYREF
4773   0, // execLQHKEYCONF
4774   &Dbspj::scanFrag_execSCAN_FRAGREF,
4775   &Dbspj::scanFrag_execSCAN_FRAGCONF,
4776   0, // parent row
4777   0, // parent batch complete
4778   0, // parent batch repeat
4779   0, // Dbspj::scanFrag_parent_batch_cleanup,
4780   &Dbspj::scanFrag_execSCAN_NEXTREQ,
4781   0, // Dbspj::scanFrag_complete
4782   &Dbspj::scanFrag_abort,
4783   0, // execNODE_FAILREP,
4784   &Dbspj::scanFrag_cleanup
4785 };
4786 
4787 Uint32
scanFrag_build(Build_context & ctx,Ptr<Request> requestPtr,const QueryNode * qn,const QueryNodeParameters * qp)4788 Dbspj::scanFrag_build(Build_context& ctx,
4789                       Ptr<Request> requestPtr,
4790                       const QueryNode* qn,
4791                       const QueryNodeParameters* qp)
4792 {
4793   Uint32 err = 0;
4794   Ptr<TreeNode> treeNodePtr;
4795   const QN_ScanFragNode * node = (const QN_ScanFragNode*)qn;
4796   const QN_ScanFragParameters * param = (const QN_ScanFragParameters*)qp;
4797 
4798   do
4799   {
4800     err = DbspjErr::InvalidTreeNodeSpecification;
4801     DEBUG("scanFrag_build: len=" << node->len);
4802     if (unlikely(node->len < QN_ScanFragNode::NodeSize))
4803     {
4804       jam();
4805       break;
4806     }
4807 
4808     err = DbspjErr::InvalidTreeParametersSpecification;
4809     DEBUG("param len: " << param->len);
4810     if (unlikely(param->len < QN_ScanFragParameters::NodeSize))
4811     {
4812       jam();
4813       break;
4814     }
4815 
4816     err = createNode(ctx, requestPtr, treeNodePtr);
4817     if (unlikely(err != 0))
4818     {
4819       jam();
4820       break;
4821     }
4822 
4823     treeNodePtr.p->m_info = &g_ScanFragOpInfo;
4824     treeNodePtr.p->m_tableOrIndexId = node->tableId;
4825     treeNodePtr.p->m_primaryTableId = node->tableId;
4826     treeNodePtr.p->m_schemaVersion = node->tableVersion;
4827     treeNodePtr.p->m_scanfrag_data.m_scanFragHandlePtrI = RNIL;
4828     Ptr<ScanFragHandle> scanFragHandlePtr;
4829     if (ERROR_INSERTED_CLEAR(17004))
4830     {
4831       jam();
4832       ndbout_c("Injecting OutOfQueryMemory error 17004 at line %d file %s",
4833                __LINE__,  __FILE__);
4834       err = DbspjErr::OutOfQueryMemory;
4835       break;
4836     }
4837     if (unlikely(m_scanfraghandle_pool.seize(requestPtr.p->m_arena,
4838                                              scanFragHandlePtr) != true))
4839     {
4840       err = DbspjErr::OutOfQueryMemory;
4841       jam();
4842       break;
4843     }
4844 
4845     scanFragHandlePtr.p->m_treeNodePtrI = treeNodePtr.i;
4846     scanFragHandlePtr.p->m_state = ScanFragHandle::SFH_NOT_STARTED;
4847     treeNodePtr.p->m_scanfrag_data.m_scanFragHandlePtrI = scanFragHandlePtr.i;
4848 
4849     requestPtr.p->m_bits |= Request::RT_SCAN;
4850     treeNodePtr.p->m_bits |= TreeNode::T_ATTR_INTERPRETED;
4851     treeNodePtr.p->m_batch_size = ctx.m_batch_size_rows;
4852 
4853     ScanFragReq*dst=(ScanFragReq*)treeNodePtr.p->m_scanfrag_data.m_scanFragReq;
4854     dst->senderData = scanFragHandlePtr.i;
4855     dst->resultRef = reference();
4856     dst->resultData = treeNodePtr.i;
4857     dst->savePointId = ctx.m_savepointId;
4858 
4859     Uint32 transId1 = requestPtr.p->m_transId[0];
4860     Uint32 transId2 = requestPtr.p->m_transId[1];
4861     dst->transId1 = transId1;
4862     dst->transId2 = transId2;
4863 
4864     Uint32 treeBits = node->requestInfo;
4865     Uint32 paramBits = param->requestInfo;
4866     //ndbout_c("Dbspj::scanFrag_build() treeBits=%.8x paramBits=%.8x",
4867     //         treeBits, paramBits);
4868     Uint32 requestInfo = 0;
4869     ScanFragReq::setReadCommittedFlag(requestInfo, 1);
4870     ScanFragReq::setScanPrio(requestInfo, ctx.m_scanPrio);
4871     ScanFragReq::setCorrFactorFlag(requestInfo, 1);
4872     ScanFragReq::setNoDiskFlag(requestInfo,
4873                                (treeBits & DABits::NI_LINKED_DISK) == 0 &&
4874                                (paramBits & DABits::PI_DISK_ATTR) == 0);
4875     dst->requestInfo = requestInfo;
4876     dst->tableId = node->tableId;
4877     dst->schemaVersion = node->tableVersion;
4878 
4879     ctx.m_resultData = param->resultData;
4880 
4881     /**
4882      * Parse stuff common lookup/scan-frag
4883      */
4884     struct DABuffer nodeDA, paramDA;
4885     nodeDA.ptr = node->optional;
4886     nodeDA.end = nodeDA.ptr + (node->len - QN_ScanFragNode::NodeSize);
4887     paramDA.ptr = param->optional;
4888     paramDA.end = paramDA.ptr + (param->len - QN_ScanFragParameters::NodeSize);
4889     err = parseDA(ctx, requestPtr, treeNodePtr,
4890                   nodeDA, treeBits, paramDA, paramBits);
4891     if (unlikely(err != 0))
4892     {
4893       jam();
4894       break;
4895     }
4896 
4897     ctx.m_scan_cnt++;
4898     ctx.m_scans.set(treeNodePtr.p->m_node_no);
4899 
4900     if (ctx.m_start_signal)
4901     {
4902       jam();
4903       Signal* signal = ctx.m_start_signal;
4904       const ScanFragReq* src = (const ScanFragReq*)(signal->getDataPtr());
4905 
4906 #if NOT_YET
4907       Uint32 instanceNo =
4908         blockToInstance(signal->header.theReceiversBlockNumber);
4909       treeNodePtr.p->m_send.m_ref = numberToRef(DBLQH,
4910                                                 instanceNo, getOwnNodeId());
4911 #else
4912       treeNodePtr.p->m_send.m_ref =
4913         numberToRef(DBLQH, getInstanceKey(src->tableId,
4914                                           src->fragmentNoKeyLen),
4915                     getOwnNodeId());
4916 #endif
4917 
4918       Uint32 fragId = src->fragmentNoKeyLen;
4919       Uint32 requestInfo = src->requestInfo;
4920       Uint32 batch_size_bytes = src->batch_size_bytes;
4921       Uint32 batch_size_rows = src->batch_size_rows;
4922 
4923 #ifdef VM_TRACE
4924       Uint32 savePointId = src->savePointId;
4925       Uint32 tableId = src->tableId;
4926       Uint32 schemaVersion = src->schemaVersion;
4927       Uint32 transId1 = src->transId1;
4928       Uint32 transId2 = src->transId2;
4929 #endif
4930       ndbassert(ScanFragReq::getLockMode(requestInfo) == 0);
4931       ndbassert(ScanFragReq::getHoldLockFlag(requestInfo) == 0);
4932       ndbassert(ScanFragReq::getKeyinfoFlag(requestInfo) == 0);
4933       ndbassert(ScanFragReq::getReadCommittedFlag(requestInfo) == 1);
4934       ndbassert(ScanFragReq::getLcpScanFlag(requestInfo) == 0);
4935       //ScanFragReq::getAttrLen(requestInfo); // ignore
4936       ndbassert(ScanFragReq::getReorgFlag(requestInfo) == ScanFragReq::REORG_ALL);
4937 
4938       Uint32 tupScanFlag = ScanFragReq::getTupScanFlag(requestInfo);
4939       Uint32 rangeScanFlag = ScanFragReq::getRangeScanFlag(requestInfo);
4940       Uint32 descendingFlag = ScanFragReq::getDescendingFlag(requestInfo);
4941       Uint32 scanPrio = ScanFragReq::getScanPrio(requestInfo);
4942 
4943       Uint32 dst_requestInfo = dst->requestInfo;
4944 
4945       ScanFragReq::setTupScanFlag(dst_requestInfo,tupScanFlag);
4946       ScanFragReq::setRangeScanFlag(dst_requestInfo,rangeScanFlag);
4947       ScanFragReq::setDescendingFlag(dst_requestInfo,descendingFlag);
4948       ScanFragReq::setScanPrio(dst_requestInfo,scanPrio);
4949 
4950       /**
4951        * 'NoDiskFlag' should agree with information in treeNode
4952        */
4953       ndbassert(ScanFragReq::getNoDiskFlag(requestInfo) ==
4954                 ScanFragReq::getNoDiskFlag(dst_requestInfo));
4955 
4956       dst->fragmentNoKeyLen = fragId;
4957       dst->requestInfo = dst_requestInfo;
4958       dst->batch_size_bytes = batch_size_bytes;
4959       dst->batch_size_rows = batch_size_rows;
4960 
4961 #ifdef VM_TRACE
4962       ndbassert(dst->savePointId == savePointId);
4963       ndbassert(dst->tableId == tableId);
4964       ndbassert(dst->schemaVersion == schemaVersion);
4965       ndbassert(dst->transId1 == transId1);
4966       ndbassert(dst->transId2 == transId2);
4967 #endif
4968 
4969       treeNodePtr.p->m_bits |= TreeNode::T_ONE_SHOT;
4970 
4971       if (rangeScanFlag)
4972       {
4973         c_Counters.incr_counter(CI_RANGE_SCANS_RECEIVED, 1);
4974       }
4975       else
4976       {
4977         c_Counters.incr_counter(CI_TABLE_SCANS_RECEIVED, 1);
4978       }
4979     }
4980     else
4981     {
4982       ndbrequire(false);
4983     }
4984 
4985     return 0;
4986   } while (0);
4987 
4988   return err;
4989 }
4990 
4991 void
scanFrag_start(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)4992 Dbspj::scanFrag_start(Signal* signal,
4993                       Ptr<Request> requestPtr,
4994                       Ptr<TreeNode> treeNodePtr)
4995 {
4996   scanFrag_send(signal, requestPtr, treeNodePtr);
4997 }
4998 
4999 void
scanFrag_send(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)5000 Dbspj::scanFrag_send(Signal* signal,
5001                      Ptr<Request> requestPtr,
5002                      Ptr<TreeNode> treeNodePtr)
5003 {
5004   jam();
5005   if (!ERROR_INSERTED(17521)) // Avoid emulated rnd errors
5006   {
5007     // ::checkTableError() should be handled before we reach this far
5008     ndbassert(checkTableError(treeNodePtr) == 0);
5009   }
5010 
5011   Ptr<ScanFragHandle> scanFragHandlePtr;
5012   m_scanfraghandle_pool.getPtr(scanFragHandlePtr, treeNodePtr.p->
5013                                m_scanfrag_data.m_scanFragHandlePtrI);
5014 
5015   ScanFragReq* req = reinterpret_cast<ScanFragReq*>(signal->getDataPtrSend());
5016 
5017   memcpy(req, treeNodePtr.p->m_scanfrag_data.m_scanFragReq,
5018          sizeof(treeNodePtr.p->m_scanfrag_data.m_scanFragReq));
5019   req->variableData[0] = treeNodePtr.p->m_send.m_correlation;
5020   req->variableData[1] = requestPtr.p->m_rootResultData;
5021 
5022   SectionHandle handle(this);
5023 
5024   Uint32 ref = treeNodePtr.p->m_send.m_ref;
5025   Uint32 keyInfoPtrI = treeNodePtr.p->m_send.m_keyInfoPtrI;
5026   Uint32 attrInfoPtrI = treeNodePtr.p->m_send.m_attrInfoPtrI;
5027 
5028   /**
5029    * ScanFrag may only be used as root-node, i.e T_ONE_SHOT
5030    */
5031   ndbrequire(treeNodePtr.p->m_bits & TreeNode::T_ONE_SHOT);
5032 
5033   /**
5034    * Pass sections to send
5035    */
5036   treeNodePtr.p->m_send.m_attrInfoPtrI = RNIL;
5037   treeNodePtr.p->m_send.m_keyInfoPtrI = RNIL;
5038 
5039   getSection(handle.m_ptr[0], attrInfoPtrI);
5040   handle.m_cnt = 1;
5041 
5042   if (keyInfoPtrI != RNIL)
5043   {
5044     jam();
5045     getSection(handle.m_ptr[1], keyInfoPtrI);
5046     handle.m_cnt = 2;
5047   }
5048 
5049 #ifdef DEBUG_SCAN_FRAGREQ
5050   ndbout_c("SCAN_FRAGREQ to %x", ref);
5051   printSCAN_FRAGREQ(stdout, signal->getDataPtrSend(),
5052                     NDB_ARRAY_SIZE(treeNodePtr.p->m_scanfrag_data.m_scanFragReq),
5053                     DBLQH);
5054   printf("ATTRINFO: ");
5055   print(handle.m_ptr[0], stdout);
5056   if (handle.m_cnt > 1)
5057   {
5058     printf("KEYINFO: ");
5059     print(handle.m_ptr[1], stdout);
5060   }
5061 #endif
5062 
5063   if (ScanFragReq::getRangeScanFlag(req->requestInfo))
5064   {
5065     c_Counters.incr_counter(CI_LOCAL_RANGE_SCANS_SENT, 1);
5066   }
5067   else
5068   {
5069     c_Counters.incr_counter(CI_LOCAL_TABLE_SCANS_SENT, 1);
5070   }
5071 
5072   if (ERROR_INSERTED_CLEAR(17100))
5073   {
5074     jam();
5075     ndbout_c("Injecting invalid schema version error at line %d file %s",
5076              __LINE__,  __FILE__);
5077     // Provoke 'Invalid schema version' in order to receive SCAN_FRAGREF
5078     req->schemaVersion++;
5079   }
5080 
5081   ndbrequire(refToNode(ref) == getOwnNodeId());
5082   {
5083     FragmentSendInfo fragSendInfo;
5084     // See comment about fragmented send pattern in ::scanIndex_send()
5085     sendFirstFragment(fragSendInfo,
5086                       ref,
5087                       GSN_SCAN_FRAGREQ,
5088                       signal,
5089                       NDB_ARRAY_SIZE(treeNodePtr.p->m_scanfrag_data.m_scanFragReq),
5090                       JBB,
5091                       &handle, false);
5092 
5093     while (fragSendInfo.m_status != FragmentSendInfo::SendComplete) // SendNotComplete
5094     {
5095       jam();
5096       // Send remaining fragments
5097       sendNextSegmentedFragment(signal, fragSendInfo);
5098     }
5099   }
5100 
5101   requestPtr.p->m_completed_nodes.clear(treeNodePtr.p->m_node_no);
5102   requestPtr.p->m_outstanding++;
5103   requestPtr.p->m_cnt_active++;
5104   treeNodePtr.p->m_state = TreeNode::TN_ACTIVE;
5105 
5106   scanFragHandlePtr.p->m_state = ScanFragHandle::SFH_SCANNING;
5107   treeNodePtr.p->m_scanfrag_data.m_rows_received = 0;
5108   treeNodePtr.p->m_scanfrag_data.m_rows_expecting = ~Uint32(0);
5109 }
5110 
5111 void
scanFrag_execTRANSID_AI(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,const RowPtr & rowRef)5112 Dbspj::scanFrag_execTRANSID_AI(Signal* signal,
5113                                Ptr<Request> requestPtr,
5114                                Ptr<TreeNode> treeNodePtr,
5115                                const RowPtr & rowRef)
5116 {
5117   jam();
5118   treeNodePtr.p->m_scanfrag_data.m_rows_received++;
5119 
5120   common_execTRANSID_AI(signal, requestPtr, treeNodePtr, rowRef);
5121 
5122   ndbassert(treeNodePtr.p->m_resumePtrI == RNIL);
5123 
5124   if (treeNodePtr.p->m_scanfrag_data.m_rows_received ==
5125       treeNodePtr.p->m_scanfrag_data.m_rows_expecting)
5126   {
5127     jam();
5128     handleTreeNodeComplete(signal, requestPtr, treeNodePtr);
5129 
5130     checkBatchComplete(signal, requestPtr, 1);
5131     return;
5132   }
5133 }
5134 
5135 void
scanFrag_execSCAN_FRAGREF(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,Ptr<ScanFragHandle> scanFragHandlePtr)5136 Dbspj::scanFrag_execSCAN_FRAGREF(Signal* signal,
5137                                  Ptr<Request> requestPtr,
5138                                  Ptr<TreeNode> treeNodePtr,
5139                                  Ptr<ScanFragHandle> scanFragHandlePtr)
5140 {
5141   jam();
5142 
5143   const ScanFragRef* rep =
5144     reinterpret_cast<const ScanFragRef*>(signal->getDataPtr());
5145   Uint32 errCode = rep->errorCode;
5146 
5147   DEBUG("scanFrag_execSCAN_FRAGREF, rep->senderData:" << rep->senderData
5148         << ", requestPtr.p->m_senderData:" << requestPtr.p->m_senderData);
5149   scanFragHandlePtr.p->m_state = ScanFragHandle::SFH_COMPLETE;
5150   ndbrequire(treeNodePtr.p->m_state == TreeNode::TN_ACTIVE);
5151   ndbrequire(requestPtr.p->m_cnt_active);
5152   requestPtr.p->m_cnt_active--;
5153   ndbrequire(requestPtr.p->m_outstanding);
5154   requestPtr.p->m_outstanding--;
5155   treeNodePtr.p->m_state = TreeNode::TN_INACTIVE;
5156 
5157   abort(signal, requestPtr, errCode);
5158 }
5159 
5160 
5161 void
scanFrag_execSCAN_FRAGCONF(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,Ptr<ScanFragHandle> scanFragHandlePtr)5162 Dbspj::scanFrag_execSCAN_FRAGCONF(Signal* signal,
5163                                   Ptr<Request> requestPtr,
5164                                   Ptr<TreeNode> treeNodePtr,
5165                                   Ptr<ScanFragHandle> scanFragHandlePtr)
5166 {
5167   const ScanFragConf * conf =
5168     reinterpret_cast<const ScanFragConf*>(signal->getDataPtr());
5169   Uint32 rows = conf->completedOps;
5170   Uint32 done = conf->fragmentCompleted;
5171 
5172   Uint32 state = scanFragHandlePtr.p->m_state;
5173   if (state == ScanFragHandle::SFH_WAIT_CLOSE && done == 0)
5174   {
5175     jam();
5176     /**
5177      * We sent an explicit close request...ignore this...a close will come later
5178      */
5179     return;
5180   }
5181 
5182   ndbrequire(done <= 2); // 0, 1, 2 (=ZSCAN_FRAG_CLOSED)
5183 
5184   ndbassert(treeNodePtr.p->m_scanfrag_data.m_rows_expecting == ~Uint32(0));
5185   treeNodePtr.p->m_scanfrag_data.m_rows_expecting = rows;
5186   if (treeNodePtr.p->isLeaf())
5187   {
5188     /**
5189      * If this is a leaf node, then no rows will be sent to the SPJ block,
5190      * as there are no child operations to instantiate.
5191      */
5192     treeNodePtr.p->m_scanfrag_data.m_rows_received = rows;
5193   }
5194 
5195   requestPtr.p->m_rows += rows;
5196   if (done)
5197   {
5198     jam();
5199 
5200     ndbrequire(requestPtr.p->m_cnt_active);
5201     requestPtr.p->m_cnt_active--;
5202     treeNodePtr.p->m_state = TreeNode::TN_INACTIVE;
5203     scanFragHandlePtr.p->m_state = ScanFragHandle::SFH_COMPLETE;
5204   }
5205   else
5206   {
5207     jam();
5208     scanFragHandlePtr.p->m_state = ScanFragHandle::SFH_WAIT_NEXTREQ;
5209   }
5210 
5211   if (treeNodePtr.p->m_scanfrag_data.m_rows_expecting ==
5212       treeNodePtr.p->m_scanfrag_data.m_rows_received ||
5213       (state == ScanFragHandle::SFH_WAIT_CLOSE))
5214   {
5215     jam();
5216     handleTreeNodeComplete(signal, requestPtr, treeNodePtr);
5217 
5218     checkBatchComplete(signal, requestPtr, 1);
5219     return;
5220   }
5221 }
5222 
5223 void
scanFrag_execSCAN_NEXTREQ(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)5224 Dbspj::scanFrag_execSCAN_NEXTREQ(Signal* signal,
5225                                  Ptr<Request> requestPtr,
5226                                  Ptr<TreeNode> treeNodePtr)
5227 {
5228   jam();
5229   Uint32 err = checkTableError(treeNodePtr);
5230   if (unlikely(err))
5231   {
5232     jam();
5233     abort(signal, requestPtr, err);
5234     return;
5235   }
5236 
5237   Ptr<ScanFragHandle> scanFragHandlePtr;
5238   m_scanfraghandle_pool.getPtr(scanFragHandlePtr, treeNodePtr.p->
5239                                m_scanfrag_data.m_scanFragHandlePtrI);
5240 
5241   const ScanFragReq * org =
5242     (ScanFragReq*)treeNodePtr.p->m_scanfrag_data.m_scanFragReq;
5243 
5244   ScanFragNextReq* req =
5245     reinterpret_cast<ScanFragNextReq*>(signal->getDataPtrSend());
5246   req->senderData = treeNodePtr.p->m_scanfrag_data.m_scanFragHandlePtrI;
5247   req->requestInfo = 0;
5248   req->transId1 = requestPtr.p->m_transId[0];
5249   req->transId2 = requestPtr.p->m_transId[1];
5250   req->batch_size_rows = org->batch_size_rows;
5251   req->batch_size_bytes = org->batch_size_bytes;
5252 
5253   DEBUG("scanFrag_execSCAN_NEXTREQ to: " << hex << treeNodePtr.p->m_send.m_ref
5254         << ", senderData: " << req->senderData);
5255 #ifdef DEBUG_SCAN_FRAGREQ
5256   printSCANFRAGNEXTREQ(stdout, &signal->theData[0],
5257                        ScanFragNextReq::SignalLength, DBLQH);
5258 #endif
5259 
5260   sendSignal(treeNodePtr.p->m_send.m_ref,
5261              GSN_SCAN_NEXTREQ,
5262              signal,
5263              ScanFragNextReq::SignalLength,
5264              JBB);
5265 
5266   treeNodePtr.p->m_scanfrag_data.m_rows_received = 0;
5267   treeNodePtr.p->m_scanfrag_data.m_rows_expecting = ~Uint32(0);
5268   requestPtr.p->m_outstanding++;
5269   requestPtr.p->m_completed_nodes.clear(treeNodePtr.p->m_node_no);
5270   scanFragHandlePtr.p->m_state = ScanFragHandle::SFH_SCANNING;
5271 }//Dbspj::scanFrag_execSCAN_NEXTREQ()
5272 
5273 void
scanFrag_abort(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)5274 Dbspj::scanFrag_abort(Signal* signal,
5275                       Ptr<Request> requestPtr,
5276                       Ptr<TreeNode> treeNodePtr)
5277 {
5278   jam();
5279 
5280   if (treeNodePtr.p->m_state == TreeNode::TN_ACTIVE)
5281   {
5282     jam();
5283     Ptr<ScanFragHandle> scanFragHandlePtr;
5284     m_scanfraghandle_pool.getPtr(scanFragHandlePtr, treeNodePtr.p->
5285                                  m_scanfrag_data.m_scanFragHandlePtrI);
5286 
5287     switch(scanFragHandlePtr.p->m_state){
5288     case ScanFragHandle::SFH_NOT_STARTED:
5289     case ScanFragHandle::SFH_COMPLETE:
5290       ndbrequire(false); // we shouldnt be TN_ACTIVE then...
5291 
5292     case ScanFragHandle::SFH_WAIT_CLOSE:
5293       jam();
5294       // close already sent
5295       return;
5296     case ScanFragHandle::SFH_WAIT_NEXTREQ:
5297       jam();
5298       // we were idle
5299       requestPtr.p->m_outstanding++;
5300       break;
5301     case ScanFragHandle::SFH_SCANNING:
5302       jam();
5303       break;
5304     }
5305 
5306     treeNodePtr.p->m_scanfrag_data.m_rows_expecting = ~Uint32(0);
5307     scanFragHandlePtr.p->m_state = ScanFragHandle::SFH_WAIT_CLOSE;
5308 
5309     ScanFragNextReq* req =
5310       reinterpret_cast<ScanFragNextReq*>(signal->getDataPtrSend());
5311     req->senderData = treeNodePtr.p->m_scanfrag_data.m_scanFragHandlePtrI;
5312     req->requestInfo = 0;
5313     ScanFragNextReq::setCloseFlag(req->requestInfo, 1);
5314     req->transId1 = requestPtr.p->m_transId[0];
5315     req->transId2 = requestPtr.p->m_transId[1];
5316     req->batch_size_rows = 0;
5317     req->batch_size_bytes = 0;
5318 
5319     sendSignal(treeNodePtr.p->m_send.m_ref,
5320                GSN_SCAN_NEXTREQ,
5321                signal,
5322                ScanFragNextReq::SignalLength,
5323                JBB);
5324   }
5325 }
5326 
5327 
5328 void
scanFrag_cleanup(Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)5329 Dbspj::scanFrag_cleanup(Ptr<Request> requestPtr,
5330                         Ptr<TreeNode> treeNodePtr)
5331 {
5332   Uint32 ptrI = treeNodePtr.p->m_scanfrag_data.m_scanFragHandlePtrI;
5333   if (ptrI != RNIL)
5334   {
5335     m_scanfraghandle_pool.release(ptrI);
5336   }
5337   cleanup_common(requestPtr, treeNodePtr);
5338 }
5339 
5340 /**
5341  * END - MODULE SCAN FRAG
5342  */
5343 
5344 /**
5345  * MODULE SCAN INDEX
5346  *
5347  * NOTE: This may not be root-node
5348  */
5349 const Dbspj::OpInfo
5350 Dbspj::g_ScanIndexOpInfo =
5351 {
5352   &Dbspj::scanIndex_build,
5353   &Dbspj::scanIndex_prepare,
5354   0, // start
5355   &Dbspj::scanIndex_execTRANSID_AI,
5356   0, // execLQHKEYREF
5357   0, // execLQHKEYCONF
5358   &Dbspj::scanIndex_execSCAN_FRAGREF,
5359   &Dbspj::scanIndex_execSCAN_FRAGCONF,
5360   &Dbspj::scanIndex_parent_row,
5361   &Dbspj::scanIndex_parent_batch_complete,
5362   &Dbspj::scanIndex_parent_batch_repeat,
5363   &Dbspj::scanIndex_parent_batch_cleanup,
5364   &Dbspj::scanIndex_execSCAN_NEXTREQ,
5365   &Dbspj::scanIndex_complete,
5366   &Dbspj::scanIndex_abort,
5367   &Dbspj::scanIndex_execNODE_FAILREP,
5368   &Dbspj::scanIndex_cleanup
5369 };
5370 
5371 Uint32
scanIndex_build(Build_context & ctx,Ptr<Request> requestPtr,const QueryNode * qn,const QueryNodeParameters * qp)5372 Dbspj::scanIndex_build(Build_context& ctx,
5373                        Ptr<Request> requestPtr,
5374                        const QueryNode* qn,
5375                        const QueryNodeParameters* qp)
5376 {
5377   Uint32 err = 0;
5378   Ptr<TreeNode> treeNodePtr;
5379   const QN_ScanIndexNode * node = (const QN_ScanIndexNode*)qn;
5380   const QN_ScanIndexParameters * param = (const QN_ScanIndexParameters*)qp;
5381 
5382   do
5383   {
5384     err = DbspjErr::InvalidTreeNodeSpecification;
5385     DEBUG("scanIndex_build: len=" << node->len);
5386     if (unlikely(node->len < QN_ScanIndexNode::NodeSize))
5387     {
5388       jam();
5389       break;
5390     }
5391 
5392     err = DbspjErr::InvalidTreeParametersSpecification;
5393     DEBUG("param len: " << param->len);
5394     if (unlikely(param->len < QN_ScanIndexParameters::NodeSize))
5395     {
5396       jam();
5397       break;
5398     }
5399 
5400     err = createNode(ctx, requestPtr, treeNodePtr);
5401     if (unlikely(err != 0))
5402     {
5403       jam();
5404       break;
5405     }
5406 
5407     Uint32 batchSize = param->batchSize;
5408 
5409     requestPtr.p->m_bits |= Request::RT_SCAN;
5410     requestPtr.p->m_bits |= Request::RT_NEED_PREPARE;
5411     requestPtr.p->m_bits |= Request::RT_NEED_COMPLETE;
5412 
5413     Uint32 indexId = node->tableId;
5414     Uint32 tableId = g_key_descriptor_pool.getPtr(indexId)->primaryTableId;
5415 
5416     treeNodePtr.p->m_info = &g_ScanIndexOpInfo;
5417     treeNodePtr.p->m_tableOrIndexId = indexId;
5418     treeNodePtr.p->m_primaryTableId = tableId;
5419     treeNodePtr.p->m_schemaVersion = node->tableVersion;
5420     treeNodePtr.p->m_bits |= TreeNode::T_ATTR_INTERPRETED;
5421     treeNodePtr.p->m_bits |= TreeNode::T_NEED_REPORT_BATCH_COMPLETED;
5422     treeNodePtr.p->m_batch_size =
5423       batchSize & ~(0xFFFFFFFF << QN_ScanIndexParameters::BatchRowBits);
5424 
5425     ScanFragReq*dst=(ScanFragReq*)treeNodePtr.p->m_scanindex_data.m_scanFragReq;
5426     dst->senderData = treeNodePtr.i;
5427     dst->resultRef = reference();
5428     dst->resultData = treeNodePtr.i;
5429     dst->savePointId = ctx.m_savepointId;
5430     dst->batch_size_rows  =
5431       batchSize & ~(0xFFFFFFFF << QN_ScanIndexParameters::BatchRowBits);
5432     dst->batch_size_bytes = batchSize >> QN_ScanIndexParameters::BatchRowBits;
5433 
5434     Uint32 transId1 = requestPtr.p->m_transId[0];
5435     Uint32 transId2 = requestPtr.p->m_transId[1];
5436     dst->transId1 = transId1;
5437     dst->transId2 = transId2;
5438 
5439     Uint32 treeBits = node->requestInfo;
5440     Uint32 paramBits = param->requestInfo;
5441     Uint32 requestInfo = 0;
5442     ScanFragReq::setRangeScanFlag(requestInfo, 1);
5443     ScanFragReq::setReadCommittedFlag(requestInfo, 1);
5444     ScanFragReq::setScanPrio(requestInfo, ctx.m_scanPrio);
5445     ScanFragReq::setNoDiskFlag(requestInfo,
5446                                (treeBits & DABits::NI_LINKED_DISK) == 0 &&
5447                                (paramBits & DABits::PI_DISK_ATTR) == 0);
5448     ScanFragReq::setCorrFactorFlag(requestInfo, 1);
5449     dst->requestInfo = requestInfo;
5450     dst->tableId = node->tableId;
5451     dst->schemaVersion = node->tableVersion;
5452 
5453     ctx.m_resultData = param->resultData;
5454 
5455     /**
5456      * Parse stuff
5457      */
5458     struct DABuffer nodeDA, paramDA;
5459     nodeDA.ptr = node->optional;
5460     nodeDA.end = nodeDA.ptr + (node->len - QN_ScanIndexNode::NodeSize);
5461     paramDA.ptr = param->optional;
5462     paramDA.end = paramDA.ptr + (param->len - QN_ScanIndexParameters::NodeSize);
5463 
5464     err = parseScanIndex(ctx, requestPtr, treeNodePtr,
5465                          nodeDA, treeBits, paramDA, paramBits);
5466 
5467     if (unlikely(err != 0))
5468     {
5469       jam();
5470       break;
5471     }
5472 
5473     /**
5474      * Since we T_NEED_REPORT_BATCH_COMPLETED, all ancestors
5475      *   have to T_REPORT_BATCH_COMPLETE to its siblings
5476      */
5477     Ptr<TreeNode> nodePtr;
5478     nodePtr.i = treeNodePtr.p->m_parentPtrI;
5479     while (nodePtr.i != RNIL)
5480     {
5481       jam();
5482       m_treenode_pool.getPtr(nodePtr);
5483       nodePtr.p->m_bits |= TreeNode::T_REPORT_BATCH_COMPLETE;
5484       nodePtr.i = nodePtr.p->m_parentPtrI;
5485     }
5486 
5487     /**
5488      * If there exists other scan TreeNodes not being among
5489      * my ancestors, results from this scanIndex may be repeated
5490      * as part of an X-scan.
5491      *
5492      * NOTE: The scan nodes being along the left deep ancestor chain
5493      *       are not 'repeatable' as they are driving the
5494      *       repeated X-scan and are thus not repeated themself.
5495      */
5496     if (requestPtr.p->m_bits & Request::RT_REPEAT_SCAN_RESULT &&
5497        !treeNodePtr.p->m_ancestors.contains(ctx.m_scans))
5498     {
5499       treeNodePtr.p->m_bits |= TreeNode::T_SCAN_REPEATABLE;
5500     }
5501 
5502     ctx.m_scan_cnt++;
5503     ctx.m_scans.set(treeNodePtr.p->m_node_no);
5504 
5505     return 0;
5506   } while (0);
5507 
5508   return err;
5509 }
5510 
5511 Uint32
parseScanIndex(Build_context & ctx,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,DABuffer tree,Uint32 treeBits,DABuffer param,Uint32 paramBits)5512 Dbspj::parseScanIndex(Build_context& ctx,
5513                       Ptr<Request> requestPtr,
5514                       Ptr<TreeNode> treeNodePtr,
5515                       DABuffer tree, Uint32 treeBits,
5516                       DABuffer param, Uint32 paramBits)
5517 {
5518   Uint32 err = 0;
5519 
5520   typedef QN_ScanIndexNode Node;
5521   typedef QN_ScanIndexParameters Params;
5522 
5523   do
5524   {
5525     jam();
5526 
5527     ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
5528     data.m_fragments.init();
5529     data.m_frags_outstanding = 0;
5530     data.m_frags_complete = 0;
5531     data.m_frags_not_started = 0;
5532     data.m_parallelismStat.init();
5533     data.m_firstExecution = true;
5534     data.m_batch_chunks = 0;
5535 
5536     /**
5537      * We will need to look at the parameters again if the scan is pruned and the prune
5538      * key uses parameter values. Therefore, we keep a reference to the start of the
5539      * parameter buffer.
5540      */
5541     DABuffer origParam = param;
5542     err = parseDA(ctx, requestPtr, treeNodePtr,
5543                   tree, treeBits, param, paramBits);
5544     if (unlikely(err != 0))
5545       break;
5546 
5547     if (treeBits & Node::SI_PRUNE_PATTERN)
5548     {
5549       Uint32 len_cnt = * tree.ptr ++;
5550       Uint32 len = len_cnt & 0xFFFF; // length of pattern in words
5551       Uint32 cnt = len_cnt >> 16;    // no of parameters
5552 
5553       LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
5554       ndbrequire((cnt==0) == ((treeBits & Node::SI_PRUNE_PARAMS) ==0));
5555       ndbrequire((cnt==0) == ((paramBits & Params::SIP_PRUNE_PARAMS)==0));
5556 
5557       if (treeBits & Node::SI_PRUNE_LINKED)
5558       {
5559         jam();
5560         DEBUG("LINKED-PRUNE PATTERN w/ " << cnt << " PARAM values");
5561 
5562         data.m_prunePattern.init();
5563         Local_pattern_store pattern(pool, data.m_prunePattern);
5564 
5565         /**
5566          * Expand pattern into a new pattern (with linked values)
5567          */
5568         err = expand(pattern, treeNodePtr, tree, len, origParam, cnt);
5569         if (unlikely(err != 0))
5570         {
5571           jam();
5572           break;
5573         }
5574         treeNodePtr.p->m_bits |= TreeNode::T_PRUNE_PATTERN;
5575         c_Counters.incr_counter(CI_PRUNED_RANGE_SCANS_RECEIVED, 1);
5576       }
5577       else
5578       {
5579         jam();
5580         DEBUG("FIXED-PRUNE w/ " << cnt << " PARAM values");
5581 
5582         /**
5583          * Expand pattern directly into
5584          *   This means a "fixed" pruning from here on
5585          *   i.e guaranteed single partition
5586          */
5587         Uint32 prunePtrI = RNIL;
5588         bool hasNull;
5589         err = expand(prunePtrI, tree, len, origParam, cnt, hasNull);
5590         if (unlikely(err != 0))
5591         {
5592           jam();
5593           releaseSection(prunePtrI);
5594           break;
5595         }
5596 
5597         if (unlikely(hasNull))
5598         {
5599           /* API should have elliminated requests w/ const-NULL keys */
5600           jam();
5601           DEBUG("BEWARE: T_CONST_PRUNE-key contain NULL values");
5602           releaseSection(prunePtrI);
5603 //        treeNodePtr.p->m_bits |= TreeNode::T_NULL_PRUNE;
5604 //        break;
5605           ndbrequire(false);
5606         }
5607         ndbrequire(prunePtrI != RNIL);  /* todo: can we allow / take advantage of NULLs in range scan? */
5608         data.m_constPrunePtrI = prunePtrI;
5609 
5610         /**
5611          * We may not compute the partition for the hash-key here
5612          *   as we have not yet opened a read-view
5613          */
5614         treeNodePtr.p->m_bits |= TreeNode::T_CONST_PRUNE;
5615         c_Counters.incr_counter(CI_CONST_PRUNED_RANGE_SCANS_RECEIVED, 1);
5616       }
5617     } //SI_PRUNE_PATTERN
5618 
5619     if ((treeNodePtr.p->m_bits & TreeNode::T_CONST_PRUNE) == 0 &&
5620         ((treeBits & Node::SI_PARALLEL) ||
5621          ((paramBits & Params::SIP_PARALLEL))))
5622     {
5623       jam();
5624       treeNodePtr.p->m_bits |= TreeNode::T_SCAN_PARALLEL;
5625     }
5626 
5627     return 0;
5628   } while(0);
5629 
5630   jam();
5631   return err;
5632 }
5633 
5634 void
scanIndex_prepare(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)5635 Dbspj::scanIndex_prepare(Signal * signal,
5636                          Ptr<Request> requestPtr, Ptr<TreeNode> treeNodePtr)
5637 {
5638   jam();
5639 
5640   if (!ERROR_INSERTED(17521)) // Avoid emulated rnd errors
5641   {
5642     // ::checkTableError() should be handled before we reach this far
5643     ndbassert(checkTableError(treeNodePtr) == 0); //Handled in Dbspj::start
5644   }
5645   treeNodePtr.p->m_state = TreeNode::TN_PREPARING;
5646 
5647   DihScanTabReq * req = (DihScanTabReq*)signal->getDataPtrSend();
5648   req->senderRef = reference();
5649   req->senderData = treeNodePtr.i;
5650   req->tableId = treeNodePtr.p->m_tableOrIndexId;
5651   req->schemaTransId = 0;
5652   sendSignal(DBDIH_REF, GSN_DIH_SCAN_TAB_REQ, signal,
5653              DihScanTabReq::SignalLength, JBB);
5654 
5655   requestPtr.p->m_outstanding++;
5656 }
5657 
5658 void
execDIH_SCAN_TAB_REF(Signal * signal)5659 Dbspj::execDIH_SCAN_TAB_REF(Signal* signal)
5660 {
5661   jamEntry();
5662   ndbrequire(false);
5663 }
5664 
5665 void
execDIH_SCAN_TAB_CONF(Signal * signal)5666 Dbspj::execDIH_SCAN_TAB_CONF(Signal* signal)
5667 {
5668   jamEntry();
5669   DihScanTabConf * conf = (DihScanTabConf*)signal->getDataPtr();
5670 
5671   Ptr<TreeNode> treeNodePtr;
5672   m_treenode_pool.getPtr(treeNodePtr, conf->senderData);
5673   ndbrequire(treeNodePtr.p->m_info == &g_ScanIndexOpInfo);
5674 
5675   ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
5676 
5677   Uint32 cookie = conf->scanCookie;
5678   Uint32 fragCount = conf->fragmentCount;
5679 
5680   if (conf->reorgFlag)
5681   {
5682     jam();
5683     ScanFragReq * dst = (ScanFragReq*)data.m_scanFragReq;
5684     ScanFragReq::setReorgFlag(dst->requestInfo, ScanFragReq::REORG_NOT_MOVED);
5685   }
5686   if (treeNodePtr.p->m_bits & TreeNode::T_CONST_PRUNE)
5687   {
5688     jam();
5689     fragCount = 1;
5690   }
5691   data.m_fragCount = fragCount;
5692   data.m_scanCookie = cookie;
5693 
5694   const Uint32 prunemask = TreeNode::T_PRUNE_PATTERN | TreeNode::T_CONST_PRUNE;
5695   bool pruned = (treeNodePtr.p->m_bits & prunemask) != 0;
5696 
5697   Ptr<Request> requestPtr;
5698   m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
5699 
5700   // Add a skew in the fragment lists such that we don't scan
5701   // the same subset of frags fram all SPJ requests in case of
5702   // the scan not being ' T_SCAN_PARALLEL'
5703   Uint16 fragNoOffs = requestPtr.p->m_rootFragId % fragCount;
5704   Uint32 err = 0;
5705 
5706   do
5707   {
5708     Ptr<ScanFragHandle> fragPtr;
5709 
5710     /** Allocate & init all 'fragCnt' fragment desriptors */
5711     {
5712       Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments);
5713 
5714       err = checkTableError(treeNodePtr);
5715       if (unlikely(err != 0))
5716       {
5717         jam();
5718         break;
5719       }
5720       for (Uint32 i = 0; i<fragCount; i++)
5721       {
5722         jam();
5723         Ptr<ScanFragHandle> fragPtr;
5724         Uint16 fragNo = (fragNoOffs+i) % fragCount;
5725 
5726         if (!ERROR_INSERTED_CLEAR(17012) &&
5727             likely(m_scanfraghandle_pool.seize(requestPtr.p->m_arena, fragPtr)))
5728         {
5729           jam();
5730           fragPtr.p->init(fragNo);
5731           fragPtr.p->m_treeNodePtrI = treeNodePtr.i;
5732           list.addLast(fragPtr);
5733         }
5734         else
5735         {
5736           jam();
5737           err = DbspjErr::OutOfQueryMemory;
5738           goto error;
5739         }
5740       }
5741       list.first(fragPtr); // Needed if T_CONST_PRUNE
5742     } // end 'Alloc scope'
5743 
5744     if (treeNodePtr.p->m_bits & TreeNode::T_CONST_PRUNE)
5745     {
5746       jam();
5747 
5748       // TODO we need a different variant of computeHash here,
5749       // since m_constPrunePtrI does not contain full primary key
5750       // but only parts in distribution key
5751 
5752       BuildKeyReq tmp;
5753       Uint32 tableId = treeNodePtr.p->m_primaryTableId;
5754       err = computePartitionHash(signal, tmp, tableId, data.m_constPrunePtrI);
5755       if (unlikely(err != 0))
5756       {
5757         jam();
5758         break;
5759       }
5760 
5761       releaseSection(data.m_constPrunePtrI);
5762       data.m_constPrunePtrI = RNIL;
5763 
5764       err = getNodes(signal, tmp, tableId);
5765       if (unlikely(err != 0))
5766       {
5767         jam();
5768         break;
5769       }
5770 
5771       fragPtr.p->m_fragId = tmp.fragId;
5772       fragPtr.p->m_ref = tmp.receiverRef;
5773       ndbassert(data.m_fragCount == 1);
5774     }
5775     else if (fragCount == 1)
5776     {
5777       jam();
5778       /**
5779        * This is roughly equivalent to T_CONST_PRUNE
5780        *   pretend that it is const-pruned
5781        */
5782       if (treeNodePtr.p->m_bits & TreeNode::T_PRUNE_PATTERN)
5783       {
5784         jam();
5785         LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
5786         Local_pattern_store pattern(pool, data.m_prunePattern);
5787         pattern.release();
5788       }
5789       data.m_constPrunePtrI = RNIL;
5790       Uint32 clear = TreeNode::T_PRUNE_PATTERN | TreeNode::T_SCAN_PARALLEL;
5791       treeNodePtr.p->m_bits &= ~clear;
5792       treeNodePtr.p->m_bits |= TreeNode::T_CONST_PRUNE;
5793 
5794       /**
5795        * We must get fragPtr.p->m_ref...so set pruned=false
5796        */
5797       pruned = false;
5798     }
5799     data.m_frags_complete = data.m_fragCount;
5800 
5801     if (!pruned)
5802     {
5803       /** Start requesting node info from DIH */
5804       jam();
5805       err = scanindex_sendDihGetNodesReq(signal, requestPtr, treeNodePtr);
5806       if (unlikely(err != 0))
5807       {
5808         jam();
5809         break;
5810       }
5811       requestPtr.p->m_outstanding++;
5812     }
5813     else
5814     {
5815       jam();
5816       treeNodePtr.p->m_state = TreeNode::TN_INACTIVE;
5817     }
5818   } while (0);
5819 
5820   if (likely(err==0))
5821   {
5822     jam();
5823     checkPrepareComplete(signal, requestPtr, 1);
5824     return;
5825   }
5826 error:
5827   ndbrequire(requestPtr.p->isScan());
5828   ndbrequire(requestPtr.p->m_outstanding >= 1);
5829   requestPtr.p->m_outstanding -= 1;
5830   abort(signal, requestPtr, err);
5831 }
5832 
5833 /**
5834  * Will check the fragment list for fragments which need to
5835  * get node info to construct 'fragPtr.p->m_ref' from DIH.
5836  *
5837  * In order to avoid CPU starvation, or unmanagable huge FragItem[],
5838  * max MAX_DIH_FRAG_REQS are requested in a single signal.
5839  * If there are more fragments, we have to repeatable call this
5840  * function when CONF for the first fragment set is received.
5841  */
5842 Uint32
scanindex_sendDihGetNodesReq(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)5843 Dbspj::scanindex_sendDihGetNodesReq(Signal* signal,
5844                                     Ptr<Request> requestPtr,
5845                                     Ptr<TreeNode> treeNodePtr)
5846 {
5847   jam();
5848   ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
5849   Ptr<ScanFragHandle> fragPtr;
5850   Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments);
5851 
5852   DihScanGetNodesReq * req = (DihScanGetNodesReq*)signal->getDataPtrSend();
5853   Uint32 fragCnt = 0;
5854   for (list.first(fragPtr);
5855        !fragPtr.isNull() && fragCnt < DihScanGetNodesReq::MAX_DIH_FRAG_REQS;
5856        list.next(fragPtr))
5857   {
5858     jam();
5859     if (fragPtr.p->m_ref == 0) // Need GSN_DIH_SCAN_GET_NODES_REQ
5860     {
5861       jam();
5862       req->fragItem[fragCnt].senderData = fragPtr.i;
5863       req->fragItem[fragCnt].fragId = fragPtr.p->m_fragId;
5864       fragCnt++;
5865     }
5866   }
5867 
5868   if (fragCnt > 0)
5869   {
5870     jam();
5871     Uint32 tableId = treeNodePtr.p->m_tableOrIndexId;
5872     req->senderRef = reference();
5873     req->tableId = tableId;
5874     req->scanCookie = data.m_scanCookie;
5875     req->fragCnt = fragCnt;
5876 
5877     /** Always send as a long signal, even if a short would
5878      *  have been sufficient in the (rare) case of 'fragCnt==1'
5879      */
5880     Ptr<SectionSegment> fragReq;
5881     Uint32 len = fragCnt*DihScanGetNodesReq::FragItem::Length;
5882     if (ERROR_INSERTED_CLEAR(17130) ||
5883         unlikely(!import(fragReq, (Uint32*)req->fragItem, len)))
5884     {
5885       jam();
5886       return DbspjErr::OutOfSectionMemory;
5887     }
5888 
5889     SectionHandle handle(this, fragReq.i);
5890     sendSignal(DBDIH_REF, GSN_DIH_SCAN_GET_NODES_REQ, signal,
5891                DihScanGetNodesReq::FixedSignalLength,
5892                JBB, &handle);
5893 
5894     data.m_frags_outstanding += fragCnt;
5895   }
5896   return 0;
5897 } //Dbspj::scanindex_sendDihGetNodesReq
5898 
5899 void
execDIH_SCAN_GET_NODES_REF(Signal * signal)5900 Dbspj::execDIH_SCAN_GET_NODES_REF(Signal* signal)
5901 {
5902   jamEntry();
5903   const DihScanGetNodesRef* ref = (DihScanGetNodesRef*)signal->getDataPtr();
5904 //const Uint32 tableId = ref->tableId;
5905   const Uint32 fragCnt = ref->fragCnt;
5906   const Uint32 errCode = ref->errCode;
5907   ndbassert(errCode != 0);
5908 
5909   if (signal->getNoOfSections() > 0)
5910   {
5911     // Long signal: FragItems listed in first section
5912     jam();
5913     SectionHandle handle(this, signal);
5914     ndbassert(handle.m_cnt==1);
5915     SegmentedSectionPtr fragRefSection;
5916     ndbrequire(handle.getSection(fragRefSection,0));
5917     ndbassert(fragRefSection.p->m_sz == (fragCnt*DihScanGetNodesRef::FragItem::Length));
5918     ndbassert(fragCnt <= DihScanGetNodesReq::MAX_DIH_FRAG_REQS);
5919     copy((Uint32*)ref->fragItem, fragRefSection);
5920     releaseSections(handle);
5921   }
5922   else                  // Short signal, single frag in ref->fragItem[0]
5923   {
5924     ndbassert(fragCnt == 1);
5925     ndbassert(signal->getLength()
5926               == DihScanGetNodesRef::FixedSignalLength + DihScanGetNodesRef::FragItem::Length);
5927   }
5928 
5929   UintR treeNodePtrI = RNIL;
5930   for (Uint32 i=0; i < fragCnt; i++)
5931   {
5932     jam();
5933     const Uint32 senderData = ref->fragItem[i].senderData;
5934 
5935     Ptr<ScanFragHandle> fragPtr;
5936     m_scanfraghandle_pool.getPtr(fragPtr, senderData);
5937 
5938     // All fragItem[] should be for same TreeNode
5939     ndbassert (treeNodePtrI == RNIL || treeNodePtrI == fragPtr.p->m_treeNodePtrI);
5940     treeNodePtrI = fragPtr.p->m_treeNodePtrI;
5941   } //for
5942 
5943   ndbassert(treeNodePtrI != RNIL);  // fragCnt > 0 above
5944   Ptr<TreeNode> treeNodePtr;
5945   m_treenode_pool.getPtr(treeNodePtr, treeNodePtrI);
5946 
5947   ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
5948   ndbassert(data.m_frags_outstanding == fragCnt);
5949   data.m_frags_outstanding -= fragCnt;
5950 
5951   Ptr<Request> requestPtr;
5952   m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
5953   abort(signal, requestPtr, errCode);
5954 
5955   if (data.m_frags_outstanding == 0)
5956   {
5957     jam();
5958     treeNodePtr.p->m_state = TreeNode::TN_INACTIVE;
5959     checkPrepareComplete(signal, requestPtr, 1);
5960   }
5961 }//Dbspj::execDIH_SCAN_GET_NODES_REF
5962 
5963 void
execDIH_SCAN_GET_NODES_CONF(Signal * signal)5964 Dbspj::execDIH_SCAN_GET_NODES_CONF(Signal* signal)
5965 {
5966   jamEntry();
5967   const DihScanGetNodesConf * conf = (DihScanGetNodesConf*)signal->getDataPtr();
5968   const Uint32 fragCnt = conf->fragCnt;
5969 
5970   if (signal->getNoOfSections() > 0)
5971   {
5972     // Unpack long signal
5973     jam();
5974     SectionHandle handle(this, signal);
5975     SegmentedSectionPtr fragConfSection;
5976     ndbrequire(handle.getSection(fragConfSection,0));
5977     ndbassert(fragConfSection.p->m_sz == (fragCnt*DihScanGetNodesConf::FragItem::Length));
5978     copy((Uint32*)conf->fragItem, fragConfSection);
5979     releaseSections(handle);
5980   }
5981   else   // Short signal, with single FragItem
5982   {
5983     jam();
5984     ndbassert(fragCnt == 1);
5985     ndbassert(signal->getLength()
5986               == DihScanGetNodesConf::FixedSignalLength + DihScanGetNodesConf::FragItem::Length);
5987   }
5988 
5989   UintR treeNodePtrI = RNIL;
5990   for (Uint32 i=0; i < fragCnt; i++)
5991   {
5992     jam();
5993     const Uint32 senderData = conf->fragItem[i].senderData;
5994     const Uint32 node = conf->fragItem[i].nodes[0];
5995     const Uint32 instanceKey = conf->fragItem[i].instanceKey;
5996 
5997     Ptr<ScanFragHandle> fragPtr;
5998     m_scanfraghandle_pool.getPtr(fragPtr, senderData);
5999 
6000     // All fragItem[] should be for same TreeNode
6001     ndbassert (treeNodePtrI == RNIL || treeNodePtrI == fragPtr.p->m_treeNodePtrI);
6002     treeNodePtrI = fragPtr.p->m_treeNodePtrI;
6003 
6004     fragPtr.p->m_ref = numberToRef(DBLQH, instanceKey, node);
6005   } //for
6006 
6007   ndbassert(treeNodePtrI != RNIL);  // fragCnt > 0 above
6008   Ptr<TreeNode> treeNodePtr;
6009   m_treenode_pool.getPtr(treeNodePtr, treeNodePtrI);
6010 
6011   ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
6012   ndbassert(data.m_frags_outstanding == fragCnt);
6013   data.m_frags_outstanding -= fragCnt;
6014 
6015   Ptr<Request> requestPtr;
6016   m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
6017 
6018   /** Check if we need to send more GSN_DIH_SCAN_GET_NODES_REQ */
6019   Uint32 err = scanindex_sendDihGetNodesReq(signal, requestPtr, treeNodePtr);
6020   if (unlikely(err != 0))
6021   {
6022     jam();
6023     abort(signal, requestPtr, err);
6024   }
6025 
6026   if (data.m_frags_outstanding == 0)
6027   {
6028     jam();
6029     treeNodePtr.p->m_state = TreeNode::TN_INACTIVE;
6030     checkPrepareComplete(signal, requestPtr, 1);
6031   }
6032 }//Dbspj::execDIH_SCAN_GET_NODES_CONF
6033 
6034 Uint32
scanIndex_findFrag(Local_ScanFragHandle_list & list,Ptr<ScanFragHandle> & fragPtr,Uint32 fragId)6035 Dbspj::scanIndex_findFrag(Local_ScanFragHandle_list & list,
6036                           Ptr<ScanFragHandle> & fragPtr, Uint32 fragId)
6037 {
6038   for (list.first(fragPtr); !fragPtr.isNull(); list.next(fragPtr))
6039   {
6040     jam();
6041     if (fragPtr.p->m_fragId == fragId)
6042     {
6043       jam();
6044       return 0;
6045     }
6046   }
6047 
6048   return DbspjErr::IndexFragNotFound;
6049 }
6050 
6051 void
scanIndex_parent_row(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,const RowPtr & rowRef)6052 Dbspj::scanIndex_parent_row(Signal* signal,
6053                             Ptr<Request> requestPtr,
6054                             Ptr<TreeNode> treeNodePtr,
6055                             const RowPtr & rowRef)
6056 {
6057   jam();
6058   DEBUG("::scanIndex_parent_row"
6059      << ", node: " << treeNodePtr.p->m_node_no);
6060 
6061   Uint32 err;
6062   ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
6063 
6064   /**
6065    * Construct range definition,
6066    *   and if prune pattern enabled
6067    *   stuff it onto correct scanindexFrag
6068    */
6069   do
6070   {
6071     Ptr<ScanFragHandle> fragPtr;
6072     Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments);
6073     LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
6074 
6075     err = checkTableError(treeNodePtr);
6076     if (unlikely(err != 0))
6077     {
6078       jam();
6079       break;
6080     }
6081 
6082     if (treeNodePtr.p->m_bits & TreeNode::T_PRUNE_PATTERN)
6083     {
6084       jam();
6085 
6086       /**
6087        * TODO: Expand into linear memory instead
6088        *       of expanding into sections, and then copy
6089        *       section into linear
6090        */
6091       Local_pattern_store pattern(pool, data.m_prunePattern);
6092       Uint32 pruneKeyPtrI = RNIL;
6093       bool hasNull;
6094       err = expand(pruneKeyPtrI, pattern, rowRef, hasNull);
6095       if (unlikely(err != 0))
6096       {
6097         jam();
6098         releaseSection(pruneKeyPtrI);
6099         break;
6100       }
6101 
6102       if (unlikely(hasNull))
6103       {
6104         jam();
6105         DEBUG("T_PRUNE_PATTERN-key contain NULL values");
6106 
6107         // Ignore this request as 'NULL == <column>' will never give a match
6108         releaseSection(pruneKeyPtrI);
6109         return;  // Bailout, SCANREQ would have returned 0 rows anyway
6110       }
6111 
6112       BuildKeyReq tmp;
6113       Uint32 tableId = treeNodePtr.p->m_primaryTableId;
6114       err = computePartitionHash(signal, tmp, tableId, pruneKeyPtrI);
6115       releaseSection(pruneKeyPtrI);
6116       if (unlikely(err != 0))
6117       {
6118         jam();
6119         break;
6120       }
6121 
6122       err = getNodes(signal, tmp, tableId);
6123       if (unlikely(err != 0))
6124       {
6125         jam();
6126         break;
6127       }
6128 
6129       err = scanIndex_findFrag(list, fragPtr, tmp.fragId);
6130       if (unlikely(err != 0))
6131       {
6132         DEBUG_CRASH();
6133         break;
6134       }
6135 
6136       /**
6137        * NOTE: We can get different receiverRef's here
6138        *       for different keys. E.g during node-recovery where
6139        *       primary-fragment is switched.
6140        *
6141        *       Use latest that we receive
6142        *
6143        * TODO: Also double check table-reorg
6144        */
6145       fragPtr.p->m_ref = tmp.receiverRef;
6146     }
6147     else
6148     {
6149       jam();
6150       /**
6151        * If const prune, or no-prune, store on first fragment,
6152        * and send to 1 or all resp.
6153        */
6154       list.first(fragPtr);
6155     }
6156 
6157     bool hasNull = false;
6158     if (treeNodePtr.p->m_bits & TreeNode::T_KEYINFO_CONSTRUCTED)
6159     {
6160       jam();
6161       Local_pattern_store pattern(pool, treeNodePtr.p->m_keyPattern);
6162 
6163       /**
6164        * Test execution terminated due to 'OutOfSectionMemory':
6165        * - 17060: Fail on scanIndex_parent_row at first call
6166        * - 17061: Fail on scanIndex_parent_row if 'isLeaf'
6167        * - 17062: Fail on scanIndex_parent_row if treeNode not root
6168        * - 17063: Fail on scanIndex_parent_row at a random node of the query tree
6169        */
6170       if (ERROR_INSERTED(17060) ||
6171          (ERROR_INSERTED(17061) && (treeNodePtr.p->isLeaf())) ||
6172          (ERROR_INSERTED(17062) && (treeNodePtr.p->m_parentPtrI != RNIL)) ||
6173          (ERROR_INSERTED(17063) && (rand() % 7) == 0))
6174       {
6175         jam();
6176         CLEAR_ERROR_INSERT_VALUE;
6177         ndbout_c("Injecting OutOfSectionMemory error at line %d file %s",
6178                  __LINE__,  __FILE__);
6179         err = DbspjErr::OutOfSectionMemory;
6180         break;
6181       }
6182 
6183       err = expand(fragPtr.p->m_rangePtrI, pattern, rowRef, hasNull);
6184       if (unlikely(err != 0))
6185       {
6186         jam();
6187         break;
6188       }
6189     }
6190     else
6191     {
6192       jam();
6193       // Fixed key...fix later...
6194       ndbrequire(false);
6195     }
6196 //  ndbrequire(!hasNull);  // FIXME, can't ignore request as we already added it to keyPattern
6197     scanIndex_fixupBound(fragPtr, fragPtr.p->m_rangePtrI, rowRef.m_src_correlation);
6198 
6199     if (treeNodePtr.p->m_bits & TreeNode::T_ONE_SHOT)
6200     {
6201       jam();
6202       /**
6203        * We being a T_ONE_SHOT means that we're only be called
6204        *   with parent_row once, i.e batch is complete
6205        */
6206       scanIndex_parent_batch_complete(signal, requestPtr, treeNodePtr);
6207     }
6208 
6209     return;
6210   } while (0);
6211 
6212   ndbrequire(err);
6213   jam();
6214   abort(signal, requestPtr, err);
6215 }
6216 
6217 
6218 void
scanIndex_fixupBound(Ptr<ScanFragHandle> fragPtr,Uint32 ptrI,Uint32 corrVal)6219 Dbspj::scanIndex_fixupBound(Ptr<ScanFragHandle> fragPtr,
6220                             Uint32 ptrI, Uint32 corrVal)
6221 {
6222   /**
6223    * Index bounds...need special tender and care...
6224    *
6225    * 1) Set #bound no, bound-size, and renumber attributes
6226    */
6227   SectionReader r0(ptrI, getSectionSegmentPool());
6228   ndbrequire(r0.step(fragPtr.p->m_range_builder.m_range_size));
6229   Uint32 boundsz = r0.getSize() - fragPtr.p->m_range_builder.m_range_size;
6230   Uint32 boundno = fragPtr.p->m_range_builder.m_range_cnt + 1;
6231 
6232   Uint32 tmp;
6233   ndbrequire(r0.peekWord(&tmp));
6234   tmp |= (boundsz << 16) | ((corrVal & 0xFFF) << 4);
6235   ndbrequire(r0.updateWord(tmp));
6236   ndbrequire(r0.step(1));    // Skip first BoundType
6237 
6238   // TODO: Renumbering below assume there are only EQ-bounds !!
6239   Uint32 id = 0;
6240   Uint32 len32;
6241   do
6242   {
6243     ndbrequire(r0.peekWord(&tmp));
6244     AttributeHeader ah(tmp);
6245     Uint32 len = ah.getByteSize();
6246     AttributeHeader::init(&tmp, id++, len);
6247     ndbrequire(r0.updateWord(tmp));
6248     len32 = (len + 3) >> 2;
6249   } while (r0.step(2 + len32));  // Skip AttributeHeader(1) + Attribute(len32) + next BoundType(1)
6250 
6251   fragPtr.p->m_range_builder.m_range_cnt = boundno;
6252   fragPtr.p->m_range_builder.m_range_size = r0.getSize();
6253 }
6254 
6255 void
scanIndex_parent_batch_complete(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)6256 Dbspj::scanIndex_parent_batch_complete(Signal* signal,
6257                                        Ptr<Request> requestPtr,
6258                                        Ptr<TreeNode> treeNodePtr)
6259 {
6260   jam();
6261 
6262   ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
6263   data.m_rows_received = 0;
6264   data.m_rows_expecting = 0;
6265   ndbassert(data.m_frags_outstanding == 0);
6266   ndbassert(data.m_frags_complete == data.m_fragCount);
6267   data.m_frags_complete = 0;
6268 
6269   Ptr<ScanFragHandle> fragPtr;
6270   {
6271     Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments);
6272     list.first(fragPtr);
6273 
6274     if ((treeNodePtr.p->m_bits & TreeNode::T_PRUNE_PATTERN) == 0)
6275     {
6276       if (fragPtr.p->m_rangePtrI == RNIL)
6277       {
6278         // No keys found
6279         jam();
6280         data.m_frags_complete = data.m_fragCount;
6281       }
6282     }
6283     else
6284     {
6285       while(!fragPtr.isNull())
6286       {
6287         if (fragPtr.p->m_rangePtrI == RNIL)
6288         {
6289           jam();
6290           /**
6291            * This is a pruned scan, so we must scan those fragments that
6292            * some distribution key hashed to.
6293            */
6294           fragPtr.p->m_state = ScanFragHandle::SFH_COMPLETE;
6295           data.m_frags_complete++;
6296         }
6297         list.next(fragPtr);
6298       }
6299     }
6300   }
6301   data.m_frags_not_started = data.m_fragCount - data.m_frags_complete;
6302 
6303   if (data.m_frags_complete == data.m_fragCount)
6304   {
6305     jam();
6306     /**
6307      * No keys was produced...
6308      */
6309     return;
6310   }
6311 
6312   /**
6313    * When parent's batch is complete, we send our batch
6314    */
6315   const ScanFragReq * org = (const ScanFragReq*)data.m_scanFragReq;
6316   ndbrequire(org->batch_size_rows > 0);
6317 
6318   data.m_firstBatch = true;
6319   if (treeNodePtr.p->m_bits & TreeNode::T_SCAN_PARALLEL)
6320   {
6321     jam();
6322     data.m_parallelism = MIN(data.m_fragCount - data.m_frags_complete,
6323                              org->batch_size_rows);
6324   }
6325   else if (data.m_firstExecution)
6326   {
6327     /**
6328      * Having a high parallelism would allow us to fetch data from many
6329      * fragments in parallel and thus reduce the number of round trips.
6330      * On the other hand, we should set parallelism so low that we can fetch
6331      * all data from a fragment in one batch if possible.
6332      * Since this is the first execution, we do not know how many rows or bytes
6333      * this operation is likely to return. Therefore we set parallelism to 1,
6334      * since this gives the lowest penalty if our guess is wrong.
6335      */
6336     jam();
6337     data.m_parallelism = 1;
6338   }
6339   else
6340   {
6341     jam();
6342     /**
6343      * Use statistics from earlier runs of this operation to estimate the
6344      * initial parallelism. We use the mean minus two times the standard
6345      * deviation to have a low risk of setting parallelism to high (as erring
6346      * in the other direction is more costly).
6347      */
6348     Int32 parallelism =
6349       static_cast<Int32>(MIN(data.m_parallelismStat.getMean()
6350                              // Add 0.5 to get proper rounding.
6351                              - 2 * data.m_parallelismStat.getStdDev() + 0.5,
6352                              org->batch_size_rows));
6353 
6354     if (parallelism < 1)
6355     {
6356       jam();
6357       parallelism = 1;
6358     }
6359     else if ((data.m_fragCount - data.m_frags_complete) % parallelism != 0)
6360     {
6361       jam();
6362       /**
6363        * Set parallelism such that we can expect to have similar
6364        * parallelism in each batch. For example if there are 8 remaining
6365        * fragments, then we should fecth 2 times 4 fragments rather than
6366        * 7+1.
6367        */
6368       const Int32 roundTrips =
6369         1 + (data.m_fragCount - data.m_frags_complete) / parallelism;
6370       parallelism = (data.m_fragCount - data.m_frags_complete) / roundTrips;
6371     }
6372 
6373     ndbassert(parallelism >= 1);
6374     ndbassert((Uint32)parallelism + data.m_frags_complete <= data.m_fragCount);
6375     data.m_parallelism = static_cast<Uint32>(parallelism);
6376 
6377 #ifdef DEBUG_SCAN_FRAGREQ
6378     DEBUG("::scanIndex_parent_batch_complete() starting index scan with parallelism="
6379           << data.m_parallelism);
6380 #endif
6381   }
6382   ndbrequire(data.m_parallelism > 0);
6383 
6384   const Uint32 bs_rows = org->batch_size_rows/ data.m_parallelism;
6385   const Uint32 bs_bytes = org->batch_size_bytes / data.m_parallelism;
6386   ndbassert(bs_rows > 0);
6387   ndbassert(bs_bytes > 0);
6388 
6389   data.m_largestBatchRows = 0;
6390   data.m_largestBatchBytes = 0;
6391   data.m_totalRows = 0;
6392   data.m_totalBytes = 0;
6393 
6394   {
6395     Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments);
6396     Ptr<ScanFragHandle> fragPtr;
6397     list.first(fragPtr);
6398 
6399     while(!fragPtr.isNull())
6400     {
6401       ndbassert(fragPtr.p->m_state == ScanFragHandle::SFH_NOT_STARTED ||
6402                 fragPtr.p->m_state == ScanFragHandle::SFH_COMPLETE);
6403       fragPtr.p->m_state = ScanFragHandle::SFH_NOT_STARTED;
6404       list.next(fragPtr);
6405     }
6406   }
6407 
6408   Uint32 batchRange = 0;
6409   Uint32 frags_started =
6410     scanIndex_send(signal,
6411                    requestPtr,
6412                    treeNodePtr,
6413                    data.m_parallelism,
6414                    bs_bytes,
6415                    bs_rows,
6416                    batchRange);
6417 
6418   /**
6419    * scanIndex_send might fail to send (errors?):
6420    * Check that we really did send something before
6421    * updating outstanding & active.
6422    */
6423   if (likely(frags_started > 0))
6424   {
6425     jam();
6426     data.m_firstExecution = false;
6427 
6428     ndbrequire(static_cast<Uint32>(data.m_frags_outstanding +
6429                                    data.m_frags_complete) <=
6430                data.m_fragCount);
6431 
6432     data.m_batch_chunks = 1;
6433     requestPtr.p->m_cnt_active++;
6434     requestPtr.p->m_outstanding++;
6435     requestPtr.p->m_completed_nodes.clear(treeNodePtr.p->m_node_no);
6436     treeNodePtr.p->m_state = TreeNode::TN_ACTIVE;
6437   }
6438 }
6439 
6440 void
scanIndex_parent_batch_repeat(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)6441 Dbspj::scanIndex_parent_batch_repeat(Signal* signal,
6442                                       Ptr<Request> requestPtr,
6443                                       Ptr<TreeNode> treeNodePtr)
6444 {
6445   jam();
6446   ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
6447 
6448   DEBUG("scanIndex_parent_batch_repeat(), m_node_no: " << treeNodePtr.p->m_node_no
6449         << ", m_batch_chunks: " << data.m_batch_chunks);
6450 
6451   ndbassert(treeNodePtr.p->m_bits & TreeNode::T_SCAN_REPEATABLE);
6452 
6453   /**
6454    * Register index-scans to be restarted if we didn't get all
6455    * previously fetched parent related child rows in a single batch.
6456    */
6457   if (data.m_batch_chunks > 1)
6458   {
6459     jam();
6460     DEBUG("Register TreeNode for restart, m_node_no: " << treeNodePtr.p->m_node_no);
6461     ndbrequire(treeNodePtr.p->m_state != TreeNode::TN_ACTIVE);
6462     registerActiveCursor(requestPtr, treeNodePtr);
6463     data.m_batch_chunks = 0;
6464   }
6465 }
6466 
6467 /**
6468  * Ask for the first batch for a number of fragments.
6469  *
6470  * Returns how many fragments we did request the
6471  * 'first batch' from. (<= noOfFrags)
6472  */
6473 Uint32
scanIndex_send(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,Uint32 noOfFrags,Uint32 bs_bytes,Uint32 bs_rows,Uint32 & batchRange)6474 Dbspj::scanIndex_send(Signal* signal,
6475                       Ptr<Request> requestPtr,
6476                       Ptr<TreeNode> treeNodePtr,
6477                       Uint32 noOfFrags,
6478                       Uint32 bs_bytes,
6479                       Uint32 bs_rows,
6480                       Uint32& batchRange)
6481 {
6482   jam();
6483   ndbassert(bs_bytes > 0);
6484   ndbassert(bs_rows > 0);
6485   ndbassert(bs_rows <= bs_bytes);
6486   /**
6487    * if (m_bits & prunemask):
6488    * - Range keys sliced out to each ScanFragHandle
6489    * - Else, range keys kept on first (and only) ScanFragHandle
6490    */
6491   const bool prune = treeNodePtr.p->m_bits &
6492     (TreeNode::T_PRUNE_PATTERN | TreeNode::T_CONST_PRUNE);
6493 
6494   /**
6495    * If scan is repeatable, we must make sure not to release range keys so
6496    * that we canuse them again in the next repetition.
6497    */
6498   const bool repeatable =
6499     (treeNodePtr.p->m_bits & TreeNode::T_SCAN_REPEATABLE) != 0;
6500 
6501   ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
6502   ndbassert(noOfFrags > 0);
6503   ndbassert(data.m_frags_not_started >= noOfFrags);
6504   ScanFragReq* const req =
6505     reinterpret_cast<ScanFragReq*>(signal->getDataPtrSend());
6506   const ScanFragReq * const org
6507     = reinterpret_cast<ScanFragReq*>(data.m_scanFragReq);
6508   memcpy(req, org, sizeof(data.m_scanFragReq));
6509   // req->variableData[0] // set below
6510   req->variableData[1] = requestPtr.p->m_rootResultData;
6511   req->batch_size_bytes = bs_bytes;
6512   req->batch_size_rows = bs_rows;
6513 
6514   Uint32 requestsSent = 0;
6515   Uint32 err = checkTableError(treeNodePtr);
6516   if (likely(err == 0))
6517   {
6518     Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments);
6519     Ptr<ScanFragHandle> fragPtr;
6520     list.first(fragPtr);
6521     Uint32 keyInfoPtrI = fragPtr.p->m_rangePtrI;
6522     ndbrequire(prune || keyInfoPtrI != RNIL);
6523     /**
6524      * Iterate over the list of fragments until we have sent as many
6525      * SCAN_FRAGREQs as we should.
6526      */
6527     while (requestsSent < noOfFrags)
6528     {
6529       jam();
6530       ndbassert(!fragPtr.isNull());
6531 
6532       if (fragPtr.p->m_state != ScanFragHandle::SFH_NOT_STARTED)
6533       {
6534         // Skip forward to the frags that we should send.
6535         jam();
6536         list.next(fragPtr);
6537         continue;
6538       }
6539 
6540       const Uint32 ref = fragPtr.p->m_ref;
6541 
6542       if (noOfFrags==1 && !prune &&
6543           data.m_frags_not_started == data.m_fragCount &&
6544           refToNode(ref) != getOwnNodeId() &&
6545           list.hasNext(fragPtr))
6546       {
6547         /**
6548          * If we are doing a scan with adaptive parallelism and start with
6549          * parallelism=1 then it makes sense to fetch a batch from a fragment on
6550          * the local data node. The reason for this is that if that fragment
6551          * contains few rows, we may be able to read from several fragments in
6552          * parallel. Then we minimize the total number of round trips (to remote
6553          * data nodes) if we fetch the first fragment batch locally.
6554          */
6555         jam();
6556         list.next(fragPtr);
6557         continue;
6558       }
6559 
6560       Uint32 attrInfoPtrI = treeNodePtr.p->m_send.m_attrInfoPtrI;
6561 
6562       /**
6563        * Set data specific for this fragment
6564        */
6565       req->senderData = fragPtr.i;
6566       req->fragmentNoKeyLen = fragPtr.p->m_fragId;
6567       req->variableData[0] = batchRange;
6568 
6569       // Test for online downgrade.
6570       if (unlikely(ref != 0 &&
6571                    !ndb_join_pushdown(getNodeInfo(refToNode(ref)).m_version)))
6572       {
6573         jam();
6574         err = 4003; // Function not implemented.
6575         break;
6576       }
6577 
6578       if (prune)
6579       {
6580         jam();
6581         keyInfoPtrI = fragPtr.p->m_rangePtrI;
6582         if (keyInfoPtrI == RNIL)
6583         {
6584           /**
6585            * Since we use pruning, we can see that no parent rows would hash
6586            * to this fragment.
6587            */
6588           jam();
6589           fragPtr.p->m_state = ScanFragHandle::SFH_COMPLETE;
6590           list.next(fragPtr);
6591           continue;
6592         }
6593 
6594         if (!repeatable)
6595         {
6596           /**
6597            * If we'll use sendSignal() and we need to send the attrInfo several
6598            * times, we need to copy them. (For repeatable or unpruned scans
6599            * we use sendSignalNoRelease(), so then we do not need to copy.)
6600            */
6601           jam();
6602           Uint32 tmp = RNIL;
6603 
6604           /**
6605            * Test execution terminated due to 'OutOfSectionMemory' which
6606            * may happen for different treeNodes in the request:
6607            * - 17090: Fail on any scanIndex_send()
6608            * - 17091: Fail after sending SCAN_FRAGREQ to some fragments
6609            * - 17092: Fail on scanIndex_send() if 'isLeaf'
6610            * - 17093: Fail on scanIndex_send() if treeNode not root
6611            */
6612 
6613           if (ERROR_INSERTED(17090) ||
6614              (ERROR_INSERTED(17091) && requestsSent > 1) ||
6615              (ERROR_INSERTED(17092) && treeNodePtr.p->isLeaf()) ||
6616              (ERROR_INSERTED(17093) && treeNodePtr.p->m_parentPtrI != RNIL))
6617           {
6618             jam();
6619             CLEAR_ERROR_INSERT_VALUE;
6620             ndbout_c("Injecting OutOfSectionMemory error at line %d file %s",
6621                      __LINE__,  __FILE__);
6622             err = DbspjErr::OutOfSectionMemory;
6623             break;
6624           }
6625 
6626           if (!dupSection(tmp, attrInfoPtrI))
6627           {
6628             jam();
6629             ndbassert(tmp == RNIL);  // Guard for memleak
6630             err = DbspjErr::OutOfSectionMemory;
6631             break;
6632           }
6633 
6634           attrInfoPtrI = tmp;
6635         }
6636       }
6637 
6638       SectionHandle handle(this);
6639       getSection(handle.m_ptr[0], attrInfoPtrI);
6640       getSection(handle.m_ptr[1], keyInfoPtrI);
6641       handle.m_cnt = 2;
6642 
6643 #if defined DEBUG_SCAN_FRAGREQ
6644       ndbout_c("SCAN_FRAGREQ to %x", ref);
6645       printSCAN_FRAGREQ(stdout, signal->getDataPtrSend(),
6646                         NDB_ARRAY_SIZE(treeNodePtr.p->m_scanfrag_data.m_scanFragReq),
6647                         DBLQH);
6648       printf("ATTRINFO: ");
6649       print(handle.m_ptr[0], stdout);
6650       printf("KEYINFO: ");
6651       print(handle.m_ptr[1], stdout);
6652 #endif
6653 
6654       if (refToNode(ref) == getOwnNodeId())
6655       {
6656         c_Counters.incr_counter(CI_LOCAL_RANGE_SCANS_SENT, 1);
6657       }
6658       else
6659       {
6660         c_Counters.incr_counter(CI_REMOTE_RANGE_SCANS_SENT, 1);
6661       }
6662 
6663       /**
6664        * For a non-repeatable pruned scan, key info is unique for each
6665        * fragment and therefore cannot be reused, so we release key info
6666        * right away.
6667        */
6668 
6669       if (ERROR_INSERTED(17110) ||
6670          (ERROR_INSERTED(17111) && treeNodePtr.p->isLeaf()) ||
6671          (ERROR_INSERTED(17112) && treeNodePtr.p->m_parentPtrI != RNIL))
6672       {
6673         jam();
6674         CLEAR_ERROR_INSERT_VALUE;
6675         ndbout_c("Injecting invalid schema version error at line %d file %s",
6676                  __LINE__,  __FILE__);
6677         // Provoke 'Invalid schema version' in order to receive SCAN_FRAGREF
6678         req->schemaVersion++;
6679       }
6680 
6681       /**
6682        * To reduce the copy burden we want to keep hold of the
6683        * AttrInfo and KeyInfo sections after sending them to
6684        * LQH.  To do this we perform the fragmented send inline,
6685        * so that all fragments are sent *now*.  This avoids any
6686        * problems with the fragmented send CONTINUE 'thread' using
6687        * the section while we hold or even release it.  The
6688        * signal receiver can still take realtime breaks when
6689        * receiving.
6690        *
6691        * Indicate to sendFirstFragment that we want to keep the
6692        * fragments, so it must not free them, unless this is the
6693        * last request in which case they can be freed. If the
6694        * last request is a local send then a copy is avoided.
6695        */
6696       const bool release = prune && !repeatable;
6697       {
6698         FragmentSendInfo fragSendInfo;
6699         sendFirstFragment(fragSendInfo,
6700                           ref,
6701                           GSN_SCAN_FRAGREQ,
6702                           signal,
6703                           NDB_ARRAY_SIZE(data.m_scanFragReq),
6704                           JBB,
6705                           &handle,
6706                           !release);  // Keep sent sections unless
6707                                       // last send
6708 
6709         while (fragSendInfo.m_status != FragmentSendInfo::SendComplete)
6710         {
6711           jam();
6712           // Send remaining fragments
6713           sendNextSegmentedFragment(signal, fragSendInfo);
6714         }
6715       }
6716 
6717       if (release)
6718       {
6719         jam();
6720         fragPtr.p->m_rangePtrI = RNIL;
6721         fragPtr.p->reset_ranges();
6722       }
6723       handle.clear();
6724 
6725       fragPtr.p->m_state = ScanFragHandle::SFH_SCANNING; // running
6726       data.m_frags_outstanding++;
6727       data.m_frags_not_started--;
6728       batchRange += bs_rows;
6729       requestsSent++;
6730       list.next(fragPtr);
6731     } // while (requestsSent < noOfFrags)
6732   }
6733   if (err)
6734   {
6735     jam();
6736     abort(signal, requestPtr, err);
6737   }
6738 
6739   return requestsSent;
6740 }
6741 
6742 void
scanIndex_execTRANSID_AI(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,const RowPtr & rowRef)6743 Dbspj::scanIndex_execTRANSID_AI(Signal* signal,
6744                                 Ptr<Request> requestPtr,
6745                                 Ptr<TreeNode> treeNodePtr,
6746                                 const RowPtr & rowRef)
6747 {
6748   jam();
6749 
6750   common_execTRANSID_AI(signal, requestPtr, treeNodePtr, rowRef);
6751 
6752   ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
6753   data.m_rows_received++;
6754   ndbassert(treeNodePtr.p->m_resumePtrI == RNIL);
6755 
6756   if (data.m_frags_outstanding == 0 &&
6757       data.m_rows_received == data.m_rows_expecting)
6758   {
6759     jam();
6760     handleTreeNodeComplete(signal, requestPtr, treeNodePtr);
6761 
6762     checkBatchComplete(signal, requestPtr, 1);
6763     return;
6764   }
6765 }
6766 
6767 void
scanIndex_execSCAN_FRAGCONF(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,Ptr<ScanFragHandle> fragPtr)6768 Dbspj::scanIndex_execSCAN_FRAGCONF(Signal* signal,
6769                                    Ptr<Request> requestPtr,
6770                                    Ptr<TreeNode> treeNodePtr,
6771                                    Ptr<ScanFragHandle> fragPtr)
6772 {
6773   jam();
6774 
6775   const ScanFragConf * conf = (const ScanFragConf*)(signal->getDataPtr());
6776 
6777   Uint32 rows = conf->completedOps;
6778   Uint32 done = conf->fragmentCompleted;
6779   Uint32 bytes = conf->total_len * sizeof(Uint32);
6780 
6781   Uint32 state = fragPtr.p->m_state;
6782   ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
6783 
6784   if (state == ScanFragHandle::SFH_WAIT_CLOSE && done == 0)
6785   {
6786     jam();
6787     /**
6788      * We sent an explicit close request...ignore this...a close will come later
6789      */
6790     return;
6791   }
6792 
6793   requestPtr.p->m_rows += rows;
6794   data.m_totalRows += rows;
6795   data.m_totalBytes += bytes;
6796   data.m_largestBatchRows = MAX(data.m_largestBatchRows, rows);
6797   data.m_largestBatchBytes = MAX(data.m_largestBatchBytes, bytes);
6798 
6799   if (!treeNodePtr.p->isLeaf())
6800   {
6801     jam();
6802     data.m_rows_expecting += rows;
6803   }
6804   ndbrequire(data.m_frags_outstanding);
6805   ndbrequire(state == ScanFragHandle::SFH_SCANNING ||
6806              state == ScanFragHandle::SFH_WAIT_CLOSE);
6807 
6808   data.m_frags_outstanding--;
6809   fragPtr.p->m_state = ScanFragHandle::SFH_WAIT_NEXTREQ;
6810 
6811   if (done)
6812   {
6813     jam();
6814     fragPtr.p->m_state = ScanFragHandle::SFH_COMPLETE;
6815     ndbrequire(data.m_frags_complete < data.m_fragCount);
6816     data.m_frags_complete++;
6817 
6818     if (data.m_frags_complete == data.m_fragCount ||
6819         ((requestPtr.p->m_state & Request::RS_ABORTING) != 0 &&
6820          data.m_fragCount == (data.m_frags_complete + data.m_frags_not_started)))
6821     {
6822       jam();
6823       ndbrequire(requestPtr.p->m_cnt_active);
6824       requestPtr.p->m_cnt_active--;
6825       treeNodePtr.p->m_state = TreeNode::TN_INACTIVE;
6826     }
6827   }
6828 
6829 
6830   if (data.m_frags_outstanding == 0)
6831   {
6832     const bool isFirstBatch = data.m_firstBatch;
6833     data.m_firstBatch = false;
6834 
6835     const ScanFragReq * const org
6836       = reinterpret_cast<const ScanFragReq*>(data.m_scanFragReq);
6837 
6838     if (data.m_frags_complete == data.m_fragCount)
6839     {
6840       jam();
6841       /**
6842        * Calculate what would have been the optimal parallelism for the
6843        * scan instance that we have just completed, and update
6844        * 'parallelismStat' with this value. We then use this statistics to set
6845        * the initial parallelism for the next instance of this operation.
6846        */
6847       double parallelism = data.m_fragCount;
6848       if (data.m_totalRows > 0)
6849       {
6850         parallelism = MIN(parallelism,
6851                           double(org->batch_size_rows) * data.m_fragCount
6852                           / data.m_totalRows);
6853       }
6854       if (data.m_totalBytes > 0)
6855       {
6856         parallelism = MIN(parallelism,
6857                           double(org->batch_size_bytes) * data.m_fragCount
6858                           / data.m_totalBytes);
6859       }
6860       data.m_parallelismStat.update(parallelism);
6861     }
6862 
6863     /**
6864      * Don't 'handleTreeNodeComplete' if we're aborting...
6865      */
6866     if (state == ScanFragHandle::SFH_WAIT_CLOSE)
6867     {
6868       jam();
6869       ndbrequire((requestPtr.p->m_state & Request::RS_ABORTING) != 0);
6870       checkBatchComplete(signal, requestPtr, 1);
6871       return;
6872     }
6873 
6874     if (isFirstBatch && data.m_frags_not_started > 0)
6875     {
6876       /**
6877        * Check if we can expect to be able to fetch the entire result set by
6878        * asking for more fragments within the same batch. This may improve
6879        * performance for bushy scans, as subsequent bushy branches must be
6880        * re-executed for each batch of this scan.
6881        */
6882 
6883       /**
6884        * Find the maximal correlation value that we may have seen so far.
6885        * Correlation value must be unique within batch and smaller than
6886        * org->batch_size_rows.
6887        */
6888       const Uint32 maxCorrVal = (data.m_totalRows) == 0 ? 0 :
6889         org->batch_size_rows / data.m_parallelism * (data.m_parallelism - 1)
6890         + data.m_totalRows;
6891 
6892       // Number of rows & bytes that we can still fetch in this batch.
6893       const Int32 remainingRows
6894         = static_cast<Int32>(org->batch_size_rows - maxCorrVal);
6895       const Int32 remainingBytes
6896         = static_cast<Int32>(org->batch_size_bytes - data.m_totalBytes);
6897 
6898       if (remainingRows >= data.m_frags_not_started &&
6899           remainingBytes >= data.m_frags_not_started &&
6900           /**
6901            * Check that (remaning row capacity)/(remaining fragments) is
6902            * greater or equal to (rows read so far)/(finished fragments).
6903            */
6904           remainingRows * static_cast<Int32>(data.m_parallelism) >=
6905             static_cast<Int32>(data.m_totalRows * data.m_frags_not_started) &&
6906           remainingBytes * static_cast<Int32>(data.m_parallelism) >=
6907             static_cast<Int32>(data.m_totalBytes * data.m_frags_not_started))
6908       {
6909         jam();
6910         Uint32 batchRange = maxCorrVal;
6911         Uint32 bs_rows  = remainingRows / data.m_frags_not_started;
6912         Uint32 bs_bytes = remainingBytes / data.m_frags_not_started;
6913 
6914         DEBUG("::scanIndex_execSCAN_FRAGCONF() first batch was not full."
6915               " Asking for new batches from " << data.m_frags_not_started <<
6916               " fragments with " <<
6917               bs_rows  <<" rows and " <<
6918               bs_bytes << " bytes.");
6919 
6920         if (unlikely(bs_rows > bs_bytes))
6921           bs_rows = bs_bytes;
6922 
6923         Uint32 frags_started =
6924           scanIndex_send(signal,
6925                          requestPtr,
6926                          treeNodePtr,
6927                          data.m_frags_not_started,
6928                          bs_bytes,
6929                          bs_rows,
6930                          batchRange);
6931 
6932         if (likely(frags_started > 0))
6933           return;
6934 
6935         // Else: scanIndex_send() didn't send anything for some reason.
6936         // Need to continue into 'completion detection' below.
6937         jam();
6938       }
6939     } // (data.m_frags_outstanding == 0)
6940 
6941     if (data.m_rows_received != data.m_rows_expecting)
6942     {
6943       jam();
6944       return;
6945     }
6946 
6947     handleTreeNodeComplete(signal, requestPtr, treeNodePtr);
6948     checkBatchComplete(signal, requestPtr, 1);
6949   } // if (data.m_frags_outstanding == 0)
6950 }
6951 
6952 void
scanIndex_execSCAN_FRAGREF(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,Ptr<ScanFragHandle> fragPtr)6953 Dbspj::scanIndex_execSCAN_FRAGREF(Signal* signal,
6954                                   Ptr<Request> requestPtr,
6955                                   Ptr<TreeNode> treeNodePtr,
6956                                   Ptr<ScanFragHandle> fragPtr)
6957 {
6958   jam();
6959 
6960   const ScanFragRef * rep = CAST_CONSTPTR(ScanFragRef, signal->getDataPtr());
6961   const Uint32 errCode = rep->errorCode;
6962 
6963   Uint32 state = fragPtr.p->m_state;
6964   ndbrequire(state == ScanFragHandle::SFH_SCANNING ||
6965              state == ScanFragHandle::SFH_WAIT_CLOSE);
6966 
6967   fragPtr.p->m_state = ScanFragHandle::SFH_COMPLETE;
6968 
6969   ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
6970   ndbrequire(data.m_frags_complete < data.m_fragCount);
6971   data.m_frags_complete++;
6972   ndbrequire(data.m_frags_outstanding > 0);
6973   data.m_frags_outstanding--;
6974 
6975   if (data.m_fragCount == (data.m_frags_complete + data.m_frags_not_started))
6976   {
6977     jam();
6978     ndbrequire(requestPtr.p->m_cnt_active);
6979     requestPtr.p->m_cnt_active--;
6980     treeNodePtr.p->m_state = TreeNode::TN_INACTIVE;
6981   }
6982 
6983   if (data.m_frags_outstanding == 0)
6984   {
6985     jam();
6986     ndbrequire(requestPtr.p->m_outstanding);
6987     requestPtr.p->m_outstanding--;
6988   }
6989 
6990   abort(signal, requestPtr, errCode);
6991 }
6992 
6993 void
scanIndex_execSCAN_NEXTREQ(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)6994 Dbspj::scanIndex_execSCAN_NEXTREQ(Signal* signal,
6995                                   Ptr<Request> requestPtr,
6996                                   Ptr<TreeNode> treeNodePtr)
6997 {
6998   jam();
6999   Uint32 err = checkTableError(treeNodePtr);
7000   if (unlikely(err))
7001   {
7002     jam();
7003     abort(signal, requestPtr, err);
7004     return;
7005   }
7006 
7007   ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
7008   const ScanFragReq * org = (const ScanFragReq*)data.m_scanFragReq;
7009 
7010   data.m_rows_received = 0;
7011   data.m_rows_expecting = 0;
7012   ndbassert(data.m_frags_outstanding == 0);
7013 
7014   ndbrequire(data.m_frags_complete < data.m_fragCount);
7015   if ((treeNodePtr.p->m_bits & TreeNode::T_SCAN_PARALLEL) == 0)
7016   {
7017     jam();
7018     /**
7019      * Since fetching few but large batches is more efficient, we
7020      * set parallelism to the lowest value where we can still expect each
7021      * batch to be full.
7022      */
7023     if (data.m_largestBatchRows < org->batch_size_rows/data.m_parallelism &&
7024         data.m_largestBatchBytes < org->batch_size_bytes/data.m_parallelism)
7025     {
7026       jam();
7027       data.m_parallelism = MIN(data.m_fragCount - data.m_frags_complete,
7028                                org->batch_size_rows);
7029       if (data.m_largestBatchRows > 0)
7030       {
7031         jam();
7032         data.m_parallelism =
7033           MIN(org->batch_size_rows / data.m_largestBatchRows,
7034               data.m_parallelism);
7035       }
7036       if (data.m_largestBatchBytes > 0)
7037       {
7038         jam();
7039         data.m_parallelism =
7040           MIN(data.m_parallelism,
7041               org->batch_size_bytes/data.m_largestBatchBytes);
7042       }
7043       if (data.m_frags_complete == 0 &&
7044           data.m_frags_not_started % data.m_parallelism != 0)
7045       {
7046         jam();
7047         /**
7048          * Set parallelism such that we can expect to have similar
7049          * parallelism in each batch. For example if there are 8 remaining
7050          * fragments, then we should fecth 2 times 4 fragments rather than
7051          * 7+1.
7052          */
7053         const Uint32 roundTrips =
7054           1 + data.m_frags_not_started / data.m_parallelism;
7055         data.m_parallelism = data.m_frags_not_started / roundTrips;
7056       }
7057     }
7058     else
7059     {
7060       jam();
7061       // We get full batches, so we should lower parallelism.
7062       data.m_parallelism = MIN(data.m_fragCount - data.m_frags_complete,
7063                                MAX(1, data.m_parallelism/2));
7064     }
7065     ndbassert(data.m_parallelism > 0);
7066 #ifdef DEBUG_SCAN_FRAGREQ
7067     DEBUG("::scanIndex_execSCAN_NEXTREQ() Asking for new batches from " <<
7068           data.m_parallelism <<
7069           " fragments with " << org->batch_size_rows/data.m_parallelism <<
7070           " rows and " << org->batch_size_bytes/data.m_parallelism <<
7071           " bytes.");
7072 #endif
7073   }
7074   else
7075   {
7076     jam();
7077     data.m_parallelism = MIN(data.m_fragCount - data.m_frags_complete,
7078                              org->batch_size_rows);
7079   }
7080 
7081   const Uint32 bs_rows = org->batch_size_rows/data.m_parallelism;
7082   ndbassert(bs_rows > 0);
7083   ScanFragNextReq* req =
7084     reinterpret_cast<ScanFragNextReq*>(signal->getDataPtrSend());
7085   req->requestInfo = 0;
7086   ScanFragNextReq::setCorrFactorFlag(req->requestInfo);
7087   req->transId1 = requestPtr.p->m_transId[0];
7088   req->transId2 = requestPtr.p->m_transId[1];
7089   req->batch_size_rows = bs_rows;
7090   req->batch_size_bytes = org->batch_size_bytes/data.m_parallelism;
7091 
7092   Uint32 batchRange = 0;
7093   Ptr<ScanFragHandle> fragPtr;
7094   Uint32 sentFragCount = 0;
7095   {
7096     /**
7097      * First, ask for more data from fragments that are already started.
7098      */
7099     Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments);
7100     list.first(fragPtr);
7101     while (sentFragCount < data.m_parallelism && !fragPtr.isNull())
7102     {
7103       jam();
7104       ndbassert(fragPtr.p->m_state == ScanFragHandle::SFH_WAIT_NEXTREQ ||
7105                 fragPtr.p->m_state == ScanFragHandle::SFH_COMPLETE ||
7106                 fragPtr.p->m_state == ScanFragHandle::SFH_NOT_STARTED);
7107       if (fragPtr.p->m_state == ScanFragHandle::SFH_WAIT_NEXTREQ)
7108       {
7109         jam();
7110 
7111         data.m_frags_outstanding++;
7112         req->variableData[0] = batchRange;
7113         fragPtr.p->m_state = ScanFragHandle::SFH_SCANNING;
7114         batchRange += bs_rows;
7115 
7116         DEBUG("scanIndex_execSCAN_NEXTREQ to: " << hex
7117               << treeNodePtr.p->m_send.m_ref
7118               << ", m_node_no=" << treeNodePtr.p->m_node_no
7119               << ", senderData: " << req->senderData);
7120 
7121 #ifdef DEBUG_SCAN_FRAGREQ
7122         printSCANFRAGNEXTREQ(stdout, &signal->theData[0],
7123                              ScanFragNextReq:: SignalLength + 1, DBLQH);
7124 #endif
7125 
7126         req->senderData = fragPtr.i;
7127         sendSignal(fragPtr.p->m_ref, GSN_SCAN_NEXTREQ, signal,
7128                    ScanFragNextReq::SignalLength + 1,
7129                    JBB);
7130         sentFragCount++;
7131       }
7132       list.next(fragPtr);
7133     }
7134   }
7135 
7136   Uint32 frags_started = 0;
7137   if (sentFragCount < data.m_parallelism)
7138   {
7139     /**
7140      * Then start new fragments until we reach data.m_parallelism.
7141      */
7142     jam();
7143     ndbassert(data.m_frags_not_started != 0);
7144     frags_started =
7145       scanIndex_send(signal,
7146                      requestPtr,
7147                      treeNodePtr,
7148                      data.m_parallelism - sentFragCount,
7149                      org->batch_size_bytes/data.m_parallelism,
7150                      bs_rows,
7151                      batchRange);
7152   }
7153   /**
7154    * sendSignal() or scanIndex_send() might have failed to send:
7155    * Check that we really did send something before
7156    * updating outstanding & active.
7157    */
7158   if (likely(sentFragCount+frags_started > 0))
7159   {
7160     jam();
7161     ndbrequire(data.m_batch_chunks > 0);
7162     data.m_batch_chunks++;
7163 
7164     requestPtr.p->m_outstanding++;
7165     requestPtr.p->m_completed_nodes.clear(treeNodePtr.p->m_node_no);
7166     ndbassert(treeNodePtr.p->m_state == TreeNode::TN_ACTIVE);
7167   }
7168 }
7169 
7170 void
scanIndex_complete(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)7171 Dbspj::scanIndex_complete(Signal* signal,
7172                           Ptr<Request> requestPtr,
7173                           Ptr<TreeNode> treeNodePtr)
7174 {
7175   jam();
7176   ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
7177   if (!data.m_fragments.isEmpty())
7178   {
7179     jam();
7180     DihScanTabCompleteRep* rep=(DihScanTabCompleteRep*)signal->getDataPtrSend();
7181     rep->tableId = treeNodePtr.p->m_tableOrIndexId;
7182     rep->scanCookie = data.m_scanCookie;
7183     sendSignal(DBDIH_REF, GSN_DIH_SCAN_TAB_COMPLETE_REP,
7184                signal, DihScanTabCompleteRep::SignalLength, JBB);
7185   }
7186 }
7187 
7188 void
scanIndex_abort(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)7189 Dbspj::scanIndex_abort(Signal* signal,
7190                        Ptr<Request> requestPtr,
7191                        Ptr<TreeNode> treeNodePtr)
7192 {
7193   jam();
7194 
7195   switch(treeNodePtr.p->m_state){
7196   case TreeNode::TN_BUILDING:
7197   case TreeNode::TN_PREPARING:
7198   case TreeNode::TN_INACTIVE:
7199   case TreeNode::TN_COMPLETING:
7200   case TreeNode::TN_END:
7201     ndbout_c("H'%.8x H'%.8x scanIndex_abort state: %u",
7202              requestPtr.p->m_transId[0],
7203              requestPtr.p->m_transId[1],
7204              treeNodePtr.p->m_state);
7205     return;
7206 
7207   case TreeNode::TN_ACTIVE:
7208     jam();
7209     break;
7210   }
7211 
7212   ScanFragNextReq* req = CAST_PTR(ScanFragNextReq, signal->getDataPtrSend());
7213   req->requestInfo = 0;
7214   ScanFragNextReq::setCloseFlag(req->requestInfo, 1);
7215   req->transId1 = requestPtr.p->m_transId[0];
7216   req->transId2 = requestPtr.p->m_transId[1];
7217   req->batch_size_rows = 0;
7218   req->batch_size_bytes = 0;
7219 
7220   ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
7221   Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments);
7222   Ptr<ScanFragHandle> fragPtr;
7223 
7224   Uint32 cnt_waiting = 0;
7225   Uint32 cnt_scanning = 0;
7226   for (list.first(fragPtr); !fragPtr.isNull(); list.next(fragPtr))
7227   {
7228     switch(fragPtr.p->m_state){
7229     case ScanFragHandle::SFH_NOT_STARTED:
7230     case ScanFragHandle::SFH_COMPLETE:
7231     case ScanFragHandle::SFH_WAIT_CLOSE:
7232       jam();
7233       break;
7234     case ScanFragHandle::SFH_WAIT_NEXTREQ:
7235       jam();
7236       cnt_waiting++;              // was idle...
7237       data.m_frags_outstanding++; // is closing
7238       goto do_abort;
7239     case ScanFragHandle::SFH_SCANNING:
7240       jam();
7241       cnt_scanning++;
7242       goto do_abort;
7243     do_abort:
7244       req->senderData = fragPtr.i;
7245       sendSignal(fragPtr.p->m_ref, GSN_SCAN_NEXTREQ, signal,
7246                  ScanFragNextReq::SignalLength, JBB);
7247 
7248       fragPtr.p->m_state = ScanFragHandle::SFH_WAIT_CLOSE;
7249       break;
7250     }
7251   }
7252 
7253   if (cnt_scanning == 0)
7254   {
7255     if (cnt_waiting > 0)
7256     {
7257       /**
7258        * If all were waiting...this should increase m_outstanding
7259        */
7260       jam();
7261       requestPtr.p->m_outstanding++;
7262     }
7263     else
7264     {
7265       /**
7266        * All fragments are either complete or not yet started, so there is
7267        * nothing to abort.
7268        */
7269       jam();
7270       ndbassert(data.m_frags_not_started > 0);
7271       ndbrequire(requestPtr.p->m_cnt_active);
7272       requestPtr.p->m_cnt_active--;
7273       treeNodePtr.p->m_state = TreeNode::TN_INACTIVE;
7274     }
7275   }
7276 }
7277 
7278 Uint32
scanIndex_execNODE_FAILREP(Signal * signal,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,NdbNodeBitmask nodes)7279 Dbspj::scanIndex_execNODE_FAILREP(Signal* signal,
7280                                   Ptr<Request> requestPtr,
7281                                   Ptr<TreeNode> treeNodePtr,
7282                                   NdbNodeBitmask nodes)
7283 {
7284   jam();
7285 
7286   switch(treeNodePtr.p->m_state){
7287   case TreeNode::TN_PREPARING:
7288   case TreeNode::TN_INACTIVE:
7289     return 1;
7290 
7291   case TreeNode::TN_BUILDING:
7292   case TreeNode::TN_COMPLETING:
7293   case TreeNode::TN_END:
7294     return 0;
7295 
7296   case TreeNode::TN_ACTIVE:
7297     jam();
7298     break;
7299   }
7300 
7301 
7302   Uint32 sum = 0;
7303   ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
7304   Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments);
7305   Ptr<ScanFragHandle> fragPtr;
7306 
7307   Uint32 save0 = data.m_frags_outstanding;
7308   Uint32 save1 = data.m_frags_complete;
7309 
7310   for (list.first(fragPtr); !fragPtr.isNull(); list.next(fragPtr))
7311   {
7312     if (nodes.get(refToNode(fragPtr.p->m_ref)) == false)
7313     {
7314       jam();
7315       /**
7316        * No action needed
7317        */
7318       continue;
7319     }
7320 
7321     switch(fragPtr.p->m_state){
7322     case ScanFragHandle::SFH_NOT_STARTED:
7323       jam();
7324       ndbrequire(data.m_frags_complete < data.m_fragCount);
7325       data.m_frags_complete++;
7326       ndbrequire(data.m_frags_not_started > 0);
7327       data.m_frags_not_started--;
7328       // fall through
7329     case ScanFragHandle::SFH_COMPLETE:
7330       jam();
7331       sum++; // indicate that we should abort
7332       /**
7333        * we could keep list of all fragments...
7334        *   or execute DIGETNODES again...
7335        *   but for now, we don't
7336        */
7337       break;
7338     case ScanFragHandle::SFH_WAIT_CLOSE:
7339     case ScanFragHandle::SFH_SCANNING:
7340       jam();
7341       ndbrequire(data.m_frags_outstanding > 0);
7342       data.m_frags_outstanding--;
7343       // fall through
7344     case ScanFragHandle::SFH_WAIT_NEXTREQ:
7345       jam();
7346       sum++;
7347       ndbrequire(data.m_frags_complete < data.m_fragCount);
7348       data.m_frags_complete++;
7349       break;
7350     }
7351     fragPtr.p->m_ref = 0;
7352     fragPtr.p->m_state = ScanFragHandle::SFH_COMPLETE;
7353   }
7354 
7355   if (save0 != 0 && data.m_frags_outstanding == 0)
7356   {
7357     jam();
7358     ndbrequire(requestPtr.p->m_outstanding);
7359     requestPtr.p->m_outstanding--;
7360   }
7361 
7362   if (save1 != 0 &&
7363       data.m_fragCount == (data.m_frags_complete + data.m_frags_not_started))
7364   {
7365     jam();
7366     ndbrequire(requestPtr.p->m_cnt_active);
7367     requestPtr.p->m_cnt_active--;
7368     treeNodePtr.p->m_state = TreeNode::TN_INACTIVE;
7369   }
7370 
7371   return sum;
7372 }
7373 
7374 void
scanIndex_release_rangekeys(Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)7375 Dbspj::scanIndex_release_rangekeys(Ptr<Request> requestPtr,
7376                                    Ptr<TreeNode> treeNodePtr)
7377 {
7378   jam();
7379   DEBUG("scanIndex_release_rangekeys(), tree node " << treeNodePtr.i
7380           << " m_node_no: " << treeNodePtr.p->m_node_no);
7381 
7382   ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
7383   Local_ScanFragHandle_list list(m_scanfraghandle_pool, data.m_fragments);
7384   Ptr<ScanFragHandle> fragPtr;
7385 
7386   if (treeNodePtr.p->m_bits & TreeNode::T_PRUNE_PATTERN)
7387   {
7388     jam();
7389     for (list.first(fragPtr); !fragPtr.isNull(); list.next(fragPtr))
7390     {
7391       if (fragPtr.p->m_rangePtrI != RNIL)
7392       {
7393         releaseSection(fragPtr.p->m_rangePtrI);
7394         fragPtr.p->m_rangePtrI = RNIL;
7395       }
7396       fragPtr.p->reset_ranges();
7397     }
7398   }
7399   else
7400   {
7401     jam();
7402     if (!list.first(fragPtr))
7403       return;
7404     if (fragPtr.p->m_rangePtrI != RNIL)
7405     {
7406       releaseSection(fragPtr.p->m_rangePtrI);
7407       fragPtr.p->m_rangePtrI = RNIL;
7408     }
7409     fragPtr.p->reset_ranges();
7410   }
7411 }
7412 
7413 /**
7414  * Parent batch has completed, and will not refetch (X-joined) results
7415  * from its childs. Release & reset range keys which are unsent or we
7416  * have kept for possible resubmits.
7417  */
7418 void
scanIndex_parent_batch_cleanup(Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)7419 Dbspj::scanIndex_parent_batch_cleanup(Ptr<Request> requestPtr,
7420                                       Ptr<TreeNode> treeNodePtr)
7421 {
7422   DEBUG("scanIndex_parent_batch_cleanup");
7423   scanIndex_release_rangekeys(requestPtr,treeNodePtr);
7424 }
7425 
7426 void
scanIndex_cleanup(Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr)7427 Dbspj::scanIndex_cleanup(Ptr<Request> requestPtr,
7428                          Ptr<TreeNode> treeNodePtr)
7429 {
7430   ScanIndexData& data = treeNodePtr.p->m_scanindex_data;
7431   DEBUG("scanIndex_cleanup");
7432 
7433   /**
7434    * Range keys has been collected wherever there are uncompleted
7435    * parent batches...release them to avoid memleak.
7436    */
7437   scanIndex_release_rangekeys(requestPtr,treeNodePtr);
7438 
7439   // Clear fragments list head.
7440   // TODO: is this needed, all elements should already be removed and released
7441   data.m_fragments.init();
7442 
7443   if (treeNodePtr.p->m_bits & TreeNode::T_PRUNE_PATTERN)
7444   {
7445     jam();
7446     LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
7447     Local_pattern_store pattern(pool, data.m_prunePattern);
7448     pattern.release();
7449   }
7450   else if (treeNodePtr.p->m_bits & TreeNode::T_CONST_PRUNE)
7451   {
7452     jam();
7453     if (data.m_constPrunePtrI != RNIL)
7454     {
7455       jam();
7456       releaseSection(data.m_constPrunePtrI);
7457       data.m_constPrunePtrI = RNIL;
7458     }
7459   }
7460 
7461   cleanup_common(requestPtr, treeNodePtr);
7462 }
7463 
7464 /**
7465  * END - MODULE SCAN INDEX
7466  */
7467 
7468 /**
7469  * Static OpInfo handling
7470  */
7471 const Dbspj::OpInfo*
getOpInfo(Uint32 op)7472 Dbspj::getOpInfo(Uint32 op)
7473 {
7474   DEBUG("getOpInfo(" << op << ")");
7475   switch(op){
7476   case QueryNode::QN_LOOKUP:
7477     return &Dbspj::g_LookupOpInfo;
7478   case QueryNode::QN_SCAN_FRAG:
7479     return &Dbspj::g_ScanFragOpInfo;
7480   case QueryNode::QN_SCAN_INDEX:
7481     return &Dbspj::g_ScanIndexOpInfo;
7482   default:
7483     return 0;
7484   }
7485 }
7486 
7487 /**
7488  * MODULE COMMON PARSE/UNPACK
7489  */
7490 
7491 /**
7492  *  @returns dstLen + 1 on error
7493  */
7494 static
7495 Uint32
unpackList(Uint32 dstLen,Uint32 * dst,Dbspj::DABuffer & buffer)7496 unpackList(Uint32 dstLen, Uint32 * dst, Dbspj::DABuffer & buffer)
7497 {
7498   const Uint32 * ptr = buffer.ptr;
7499   if (likely(ptr != buffer.end))
7500   {
7501     Uint32 tmp = * ptr++;
7502     Uint32 cnt = tmp & 0xFFFF;
7503 
7504     * dst ++ = (tmp >> 16); // Store first
7505     DEBUG("cnt: " << cnt << " first: " << (tmp >> 16));
7506 
7507     if (cnt > 1)
7508     {
7509       Uint32 len = cnt / 2;
7510       if (unlikely(cnt >= dstLen || (ptr + len > buffer.end)))
7511         goto error;
7512 
7513       cnt --; // subtract item stored in header
7514 
7515       for (Uint32 i = 0; i < cnt/2; i++)
7516       {
7517         * dst++ = (* ptr) & 0xFFFF;
7518         * dst++ = (* ptr) >> 16;
7519         ptr++;
7520       }
7521 
7522       if (cnt & 1)
7523       {
7524         * dst ++ = * ptr & 0xFFFF;
7525         ptr++;
7526       }
7527 
7528       cnt ++; // readd item stored in header
7529     }
7530     buffer.ptr = ptr;
7531     return cnt;
7532   }
7533   return 0;
7534 
7535 error:
7536   return dstLen + 1;
7537 }
7538 
7539 /**
7540  * This fuctions takes an array of attrinfo, and builds "header"
7541  *   which can be used to do random access inside the row
7542  */
7543 Uint32
buildRowHeader(RowPtr::Header * header,SegmentedSectionPtr ptr)7544 Dbspj::buildRowHeader(RowPtr::Header * header, SegmentedSectionPtr ptr)
7545 {
7546   Uint32 tmp, len;
7547   Uint32 * dst = header->m_offset;
7548   const Uint32 * const save = dst;
7549   SectionReader r0(ptr, getSectionSegmentPool());
7550   Uint32 offset = 0;
7551   do
7552   {
7553     * dst++ = offset;
7554     r0.getWord(&tmp);
7555     len = AttributeHeader::getDataSize(tmp);
7556     offset += 1 + len;
7557   } while (r0.step(len));
7558 
7559   return header->m_len = static_cast<Uint32>(dst - save);
7560 }
7561 
7562 /**
7563  * This fuctions takes an array of attrinfo, and builds "header"
7564  *   which can be used to do random access inside the row
7565  */
7566 Uint32
buildRowHeader(RowPtr::Header * header,const Uint32 * & src,Uint32 len)7567 Dbspj::buildRowHeader(RowPtr::Header * header, const Uint32 *& src, Uint32 len)
7568 {
7569   Uint32 * dst = header->m_offset;
7570   const Uint32 * save = dst;
7571   Uint32 offset = 0;
7572   for (Uint32 i = 0; i<len; i++)
7573   {
7574     * dst ++ = offset;
7575     Uint32 tmp = * src++;
7576     Uint32 tmp_len = AttributeHeader::getDataSize(tmp);
7577     offset += 1 + tmp_len;
7578     src += tmp_len;
7579   }
7580 
7581   return header->m_len = static_cast<Uint32>(dst - save);
7582 }
7583 
7584 Uint32
appendToPattern(Local_pattern_store & pattern,DABuffer & tree,Uint32 len)7585 Dbspj::appendToPattern(Local_pattern_store & pattern,
7586                        DABuffer & tree, Uint32 len)
7587 {
7588   jam();
7589   if (unlikely(tree.ptr + len > tree.end))
7590     return DbspjErr::InvalidTreeNodeSpecification;
7591 
7592   if (ERROR_INSERTED_CLEAR(17008))
7593   {
7594     ndbout_c("Injecting OutOfQueryMemory error 17008 at line %d file %s",
7595              __LINE__,  __FILE__);
7596     jam();
7597     return DbspjErr::OutOfQueryMemory;
7598   }
7599   if (unlikely(pattern.append(tree.ptr, len)==0))
7600     return DbspjErr::OutOfQueryMemory;
7601 
7602   tree.ptr += len;
7603   return 0;
7604 }
7605 
7606 Uint32
appendParamToPattern(Local_pattern_store & dst,const RowPtr::Linear & row,Uint32 col)7607 Dbspj::appendParamToPattern(Local_pattern_store& dst,
7608                             const RowPtr::Linear & row, Uint32 col)
7609 {
7610   jam();
7611   Uint32 offset = row.m_header->m_offset[col];
7612   const Uint32 * ptr = row.m_data + offset;
7613   Uint32 len = AttributeHeader::getDataSize(* ptr ++);
7614   /* Param COL's converted to DATA when appended to pattern */
7615   Uint32 info = QueryPattern::data(len);
7616 
7617   if (ERROR_INSERTED_CLEAR(17009))
7618   {
7619     ndbout_c("Injecting OutOfQueryMemory error 17009 at line %d file %s",
7620              __LINE__,  __FILE__);
7621     jam();
7622     return DbspjErr::OutOfQueryMemory;
7623   }
7624 
7625   return dst.append(&info,1) && dst.append(ptr,len) ? 0 : DbspjErr::OutOfQueryMemory;
7626 }
7627 
7628 #ifdef ERROR_INSERT
7629 static int fi_cnt = 0;
7630 bool
appendToSection(Uint32 & firstSegmentIVal,const Uint32 * src,Uint32 len)7631 Dbspj::appendToSection(Uint32& firstSegmentIVal,
7632                          const Uint32* src, Uint32 len)
7633 {
7634   if (ERROR_INSERTED(17510) && fi_cnt++ % 13 == 0)
7635   {
7636     jam();
7637     ndbout_c("Injecting appendToSection error 17510 at line %d file %s",
7638              __LINE__,  __FILE__);
7639     return false;
7640   }
7641   else
7642   {
7643     return SimulatedBlock::appendToSection(firstSegmentIVal, src, len);
7644   }
7645 }
7646 #endif
7647 
7648 Uint32
appendParamHeadToPattern(Local_pattern_store & dst,const RowPtr::Linear & row,Uint32 col)7649 Dbspj::appendParamHeadToPattern(Local_pattern_store& dst,
7650                                 const RowPtr::Linear & row, Uint32 col)
7651 {
7652   jam();
7653   Uint32 offset = row.m_header->m_offset[col];
7654   const Uint32 * ptr = row.m_data + offset;
7655   Uint32 len = AttributeHeader::getDataSize(*ptr);
7656   /* Param COL's converted to DATA when appended to pattern */
7657   Uint32 info = QueryPattern::data(len+1);
7658 
7659   if (ERROR_INSERTED_CLEAR(17010))
7660   {
7661     ndbout_c("Injecting OutOfQueryMemory error 17010 at line %d file %s",
7662              __LINE__,  __FILE__);
7663     jam();
7664     return DbspjErr::OutOfQueryMemory;
7665   }
7666 
7667   return dst.append(&info,1) && dst.append(ptr,len+1) ? 0 : DbspjErr::OutOfQueryMemory;
7668 }
7669 
7670 Uint32
appendTreeToSection(Uint32 & ptrI,SectionReader & tree,Uint32 len)7671 Dbspj::appendTreeToSection(Uint32 & ptrI, SectionReader & tree, Uint32 len)
7672 {
7673   /**
7674    * TODO handle errors
7675    */
7676   jam();
7677   Uint32 SZ = 16;
7678   Uint32 tmp[16];
7679   while (len > SZ)
7680   {
7681     jam();
7682     tree.getWords(tmp, SZ);
7683     if (!appendToSection(ptrI, tmp, SZ))
7684       return DbspjErr::OutOfSectionMemory;
7685     len -= SZ;
7686   }
7687 
7688   tree.getWords(tmp, len);
7689   if (!appendToSection(ptrI, tmp, len))
7690     return DbspjErr::OutOfSectionMemory;
7691 
7692   return 0;
7693 }
7694 
7695 void
getCorrelationData(const RowPtr::Section & row,Uint32 col,Uint32 & correlationNumber)7696 Dbspj::getCorrelationData(const RowPtr::Section & row,
7697                           Uint32 col,
7698                           Uint32& correlationNumber)
7699 {
7700   /**
7701    * TODO handle errors
7702    */
7703   SegmentedSectionPtr ptr(row.m_dataPtr);
7704   SectionReader reader(ptr, getSectionSegmentPool());
7705   Uint32 offset = row.m_header->m_offset[col];
7706   ndbrequire(reader.step(offset));
7707   Uint32 tmp;
7708   ndbrequire(reader.getWord(&tmp));
7709   Uint32 len = AttributeHeader::getDataSize(tmp);
7710   ndbrequire(len == 1);
7711   ndbrequire(AttributeHeader::getAttributeId(tmp) == AttributeHeader::CORR_FACTOR32);
7712   ndbrequire(reader.getWord(&correlationNumber));
7713 }
7714 
7715 void
getCorrelationData(const RowPtr::Linear & row,Uint32 col,Uint32 & correlationNumber)7716 Dbspj::getCorrelationData(const RowPtr::Linear & row,
7717                           Uint32 col,
7718                           Uint32& correlationNumber)
7719 {
7720   /**
7721    * TODO handle errors
7722    */
7723   Uint32 offset = row.m_header->m_offset[col];
7724   Uint32 tmp = row.m_data[offset];
7725   Uint32 len = AttributeHeader::getDataSize(tmp);
7726   ndbrequire(len == 1);
7727   ndbrequire(AttributeHeader::getAttributeId(tmp) == AttributeHeader::CORR_FACTOR32);
7728   correlationNumber = row.m_data[offset+1];
7729 }
7730 
7731 Uint32
appendColToSection(Uint32 & dst,const RowPtr::Section & row,Uint32 col,bool & hasNull)7732 Dbspj::appendColToSection(Uint32 & dst, const RowPtr::Section & row,
7733                           Uint32 col, bool& hasNull)
7734 {
7735   jam();
7736   /**
7737    * TODO handle errors
7738    */
7739   SegmentedSectionPtr ptr(row.m_dataPtr);
7740   SectionReader reader(ptr, getSectionSegmentPool());
7741   Uint32 offset = row.m_header->m_offset[col];
7742   ndbrequire(reader.step(offset));
7743   Uint32 tmp;
7744   ndbrequire(reader.getWord(&tmp));
7745   Uint32 len = AttributeHeader::getDataSize(tmp);
7746   if (unlikely(len==0))
7747   {
7748     jam();
7749     hasNull = true;  // NULL-value in key
7750     return 0;
7751   }
7752   return appendTreeToSection(dst, reader, len);
7753 }
7754 
7755 Uint32
appendColToSection(Uint32 & dst,const RowPtr::Linear & row,Uint32 col,bool & hasNull)7756 Dbspj::appendColToSection(Uint32 & dst, const RowPtr::Linear & row,
7757                           Uint32 col, bool& hasNull)
7758 {
7759   jam();
7760   Uint32 offset = row.m_header->m_offset[col];
7761   const Uint32 * ptr = row.m_data + offset;
7762   Uint32 len = AttributeHeader::getDataSize(* ptr ++);
7763   if (unlikely(len==0))
7764   {
7765     jam();
7766     hasNull = true;  // NULL-value in key
7767     return 0;
7768   }
7769   return appendToSection(dst, ptr, len) ? 0 : DbspjErr::OutOfSectionMemory;
7770 }
7771 
7772 Uint32
appendAttrinfoToSection(Uint32 & dst,const RowPtr::Linear & row,Uint32 col,bool & hasNull)7773 Dbspj::appendAttrinfoToSection(Uint32 & dst, const RowPtr::Linear & row,
7774                                Uint32 col, bool& hasNull)
7775 {
7776   jam();
7777   Uint32 offset = row.m_header->m_offset[col];
7778   const Uint32 * ptr = row.m_data + offset;
7779   Uint32 len = AttributeHeader::getDataSize(* ptr);
7780   if (unlikely(len==0))
7781   {
7782     jam();
7783     hasNull = true;  // NULL-value in key
7784   }
7785   return appendToSection(dst, ptr, 1 + len) ? 0 : DbspjErr::OutOfSectionMemory;
7786 }
7787 
7788 Uint32
appendAttrinfoToSection(Uint32 & dst,const RowPtr::Section & row,Uint32 col,bool & hasNull)7789 Dbspj::appendAttrinfoToSection(Uint32 & dst, const RowPtr::Section & row,
7790                                Uint32 col, bool& hasNull)
7791 {
7792   jam();
7793   /**
7794    * TODO handle errors
7795    */
7796   SegmentedSectionPtr ptr(row.m_dataPtr);
7797   SectionReader reader(ptr, getSectionSegmentPool());
7798   Uint32 offset = row.m_header->m_offset[col];
7799   ndbrequire(reader.step(offset));
7800   Uint32 tmp;
7801   ndbrequire(reader.peekWord(&tmp));
7802   Uint32 len = AttributeHeader::getDataSize(tmp);
7803   if (unlikely(len==0))
7804   {
7805     jam();
7806     hasNull = true;  // NULL-value in key
7807   }
7808   return appendTreeToSection(dst, reader, 1 + len);
7809 }
7810 
7811 /**
7812  * 'PkCol' is the composite NDB$PK column in an unique index consisting of
7813  * a fragment id and the composite PK value (all PK columns concatenated)
7814  */
7815 Uint32
appendPkColToSection(Uint32 & dst,const RowPtr::Section & row,Uint32 col)7816 Dbspj::appendPkColToSection(Uint32 & dst, const RowPtr::Section & row, Uint32 col)
7817 {
7818   jam();
7819   /**
7820    * TODO handle errors
7821    */
7822   SegmentedSectionPtr ptr(row.m_dataPtr);
7823   SectionReader reader(ptr, getSectionSegmentPool());
7824   Uint32 offset = row.m_header->m_offset[col];
7825   ndbrequire(reader.step(offset));
7826   Uint32 tmp;
7827   ndbrequire(reader.getWord(&tmp));
7828   Uint32 len = AttributeHeader::getDataSize(tmp);
7829   ndbrequire(len>1);  // NULL-value in PkKey is an error
7830   ndbrequire(reader.step(1)); // Skip fragid
7831   return appendTreeToSection(dst, reader, len-1);
7832 }
7833 
7834 /**
7835  * 'PkCol' is the composite NDB$PK column in an unique index consisting of
7836  * a fragment id and the composite PK value (all PK columns concatenated)
7837  */
7838 Uint32
appendPkColToSection(Uint32 & dst,const RowPtr::Linear & row,Uint32 col)7839 Dbspj::appendPkColToSection(Uint32 & dst, const RowPtr::Linear & row, Uint32 col)
7840 {
7841   jam();
7842   Uint32 offset = row.m_header->m_offset[col];
7843   Uint32 tmp = row.m_data[offset];
7844   Uint32 len = AttributeHeader::getDataSize(tmp);
7845   ndbrequire(len>1);  // NULL-value in PkKey is an error
7846   return appendToSection(dst, row.m_data+offset+2, len - 1) ? 0 : DbspjErr::OutOfSectionMemory;
7847 }
7848 
7849 Uint32
appendFromParent(Uint32 & dst,Local_pattern_store & pattern,Local_pattern_store::ConstDataBufferIterator & it,Uint32 levels,const RowPtr & rowptr,bool & hasNull)7850 Dbspj::appendFromParent(Uint32 & dst, Local_pattern_store& pattern,
7851                         Local_pattern_store::ConstDataBufferIterator& it,
7852                         Uint32 levels, const RowPtr & rowptr,
7853                         bool& hasNull)
7854 {
7855   jam();
7856   Ptr<TreeNode> treeNodePtr;
7857   m_treenode_pool.getPtr(treeNodePtr, rowptr.m_src_node_ptrI);
7858   Uint32 corrVal = rowptr.m_src_correlation;
7859   RowPtr targetRow;
7860   DEBUG("appendFromParent-of"
7861      << " node: " << treeNodePtr.p->m_node_no);
7862   while (levels--)
7863   {
7864     jam();
7865     if (unlikely(treeNodePtr.p->m_parentPtrI == RNIL))
7866     {
7867       DEBUG_CRASH();
7868       return DbspjErr::InvalidPattern;
7869     }
7870     m_treenode_pool.getPtr(treeNodePtr, treeNodePtr.p->m_parentPtrI);
7871     DEBUG("appendFromParent"
7872        << ", node: " << treeNodePtr.p->m_node_no);
7873     if (unlikely(treeNodePtr.p->m_rows.m_type != RowCollection::COLLECTION_MAP))
7874     {
7875       DEBUG_CRASH();
7876       return DbspjErr::InvalidPattern;
7877     }
7878 
7879     RowRef ref;
7880     treeNodePtr.p->m_rows.m_map.copyto(ref);
7881     const Uint32* const mapptr = get_row_ptr(ref);
7882 
7883     Uint32 pos = corrVal >> 16; // parent corr-val
7884     if (unlikely(! (pos < treeNodePtr.p->m_rows.m_map.m_size)))
7885     {
7886       DEBUG_CRASH();
7887       return DbspjErr::InvalidPattern;
7888     }
7889 
7890     // load ref to parent row
7891     treeNodePtr.p->m_rows.m_map.load(mapptr, pos, ref);
7892 
7893     const Uint32* const rowptr = get_row_ptr(ref);
7894     setupRowPtr(treeNodePtr.p->m_rows, targetRow, ref, rowptr);
7895 
7896     if (levels)
7897     {
7898       jam();
7899       getCorrelationData(targetRow.m_row_data.m_linear,
7900                          targetRow.m_row_data.m_linear.m_header->m_len - 1,
7901                          corrVal);
7902     }
7903   }
7904 
7905   if (unlikely(it.isNull()))
7906   {
7907     DEBUG_CRASH();
7908     return DbspjErr::InvalidPattern;
7909   }
7910 
7911   Uint32 info = *it.data;
7912   Uint32 type = QueryPattern::getType(info);
7913   Uint32 val = QueryPattern::getLength(info);
7914   pattern.next(it);
7915   switch(type){
7916   case QueryPattern::P_COL:
7917     jam();
7918     return appendColToSection(dst, targetRow.m_row_data.m_linear, val, hasNull);
7919   case QueryPattern::P_UNQ_PK:
7920     jam();
7921     return appendPkColToSection(dst, targetRow.m_row_data.m_linear, val);
7922   case QueryPattern::P_ATTRINFO:
7923     jam();
7924     return appendAttrinfoToSection(dst, targetRow.m_row_data.m_linear, val, hasNull);
7925   case QueryPattern::P_DATA:
7926     jam();
7927     // retreiving DATA from parent...is...an error
7928     DEBUG_CRASH();
7929     return DbspjErr::InvalidPattern;
7930   case QueryPattern::P_PARENT:
7931     jam();
7932     // no point in nesting P_PARENT...an error
7933     DEBUG_CRASH();
7934     return DbspjErr::InvalidPattern;
7935   case QueryPattern::P_PARAM:
7936   case QueryPattern::P_PARAM_HEADER:
7937     jam();
7938     // should have been expanded during build
7939     DEBUG_CRASH();
7940     return DbspjErr::InvalidPattern;
7941   default:
7942     jam();
7943     DEBUG_CRASH();
7944     return DbspjErr::InvalidPattern;
7945   }
7946 }
7947 
7948 Uint32
appendDataToSection(Uint32 & ptrI,Local_pattern_store & pattern,Local_pattern_store::ConstDataBufferIterator & it,Uint32 len,bool & hasNull)7949 Dbspj::appendDataToSection(Uint32 & ptrI,
7950                            Local_pattern_store& pattern,
7951                            Local_pattern_store::ConstDataBufferIterator& it,
7952                            Uint32 len, bool& hasNull)
7953 {
7954   jam();
7955   if (unlikely(len==0))
7956   {
7957     jam();
7958     hasNull = true;
7959     return 0;
7960   }
7961 
7962 #if 0
7963   /**
7964    * TODO handle errors
7965    */
7966   Uint32 tmp[NDB_SECTION_SEGMENT_SZ];
7967   while (len > NDB_SECTION_SEGMENT_SZ)
7968   {
7969     pattern.copyout(tmp, NDB_SECTION_SEGMENT_SZ, it);
7970     appendToSection(ptrI, tmp, NDB_SECTION_SEGMENT_SZ);
7971     len -= NDB_SECTION_SEGMENT_SZ;
7972   }
7973 
7974   pattern.copyout(tmp, len, it);
7975   appendToSection(ptrI, tmp, len);
7976   return 0;
7977 #else
7978   Uint32 remaining = len;
7979   Uint32 dstIdx = 0;
7980   Uint32 tmp[NDB_SECTION_SEGMENT_SZ];
7981 
7982   while (remaining > 0 && !it.isNull())
7983   {
7984     tmp[dstIdx] = *it.data;
7985     remaining--;
7986     dstIdx++;
7987     pattern.next(it);
7988     if (dstIdx == NDB_SECTION_SEGMENT_SZ || remaining == 0)
7989     {
7990       if (!appendToSection(ptrI, tmp, dstIdx))
7991       {
7992         jam();
7993         return DbspjErr::OutOfSectionMemory;
7994       }
7995       dstIdx = 0;
7996     }
7997   }
7998   if (remaining > 0)
7999   {
8000     DEBUG_CRASH();
8001     return DbspjErr::InvalidPattern;
8002   }
8003   else
8004   {
8005     return 0;
8006   }
8007 #endif
8008 }
8009 
8010 /**
8011  * This function takes a pattern and a row and expands it into a section
8012  */
8013 Uint32
expandS(Uint32 & _dst,Local_pattern_store & pattern,const RowPtr & row,bool & hasNull)8014 Dbspj::expandS(Uint32 & _dst, Local_pattern_store& pattern,
8015                const RowPtr & row, bool& hasNull)
8016 {
8017   Uint32 err;
8018   Uint32 dst = _dst;
8019   hasNull = false;
8020   Local_pattern_store::ConstDataBufferIterator it;
8021   pattern.first(it);
8022   while (!it.isNull())
8023   {
8024     Uint32 info = *it.data;
8025     Uint32 type = QueryPattern::getType(info);
8026     Uint32 val = QueryPattern::getLength(info);
8027     pattern.next(it);
8028     switch(type){
8029     case QueryPattern::P_COL:
8030       jam();
8031       err = appendColToSection(dst, row.m_row_data.m_section, val, hasNull);
8032       break;
8033     case QueryPattern::P_UNQ_PK:
8034       jam();
8035       err = appendPkColToSection(dst, row.m_row_data.m_section, val);
8036       break;
8037     case QueryPattern::P_ATTRINFO:
8038       jam();
8039       err = appendAttrinfoToSection(dst, row.m_row_data.m_section, val, hasNull);
8040       break;
8041     case QueryPattern::P_DATA:
8042       jam();
8043       err = appendDataToSection(dst, pattern, it, val, hasNull);
8044       break;
8045     case QueryPattern::P_PARENT:
8046       jam();
8047       // P_PARENT is a prefix to another pattern token
8048       // that permits code to access rows from earlier than immediate parent.
8049       // val is no of levels to move up the tree
8050       err = appendFromParent(dst, pattern, it, val, row, hasNull);
8051       break;
8052       // PARAM's was converted to DATA by ::expand(pattern...)
8053     case QueryPattern::P_PARAM:
8054     case QueryPattern::P_PARAM_HEADER:
8055     default:
8056       jam();
8057       err = DbspjErr::InvalidPattern;
8058       DEBUG_CRASH();
8059     }
8060     if (unlikely(err != 0))
8061     {
8062       jam();
8063       _dst = dst;
8064       return err;
8065     }
8066   }
8067 
8068   _dst = dst;
8069   return 0;
8070 }
8071 
8072 /**
8073  * This function takes a pattern and a row and expands it into a section
8074  */
8075 Uint32
expandL(Uint32 & _dst,Local_pattern_store & pattern,const RowPtr & row,bool & hasNull)8076 Dbspj::expandL(Uint32 & _dst, Local_pattern_store& pattern,
8077                const RowPtr & row, bool& hasNull)
8078 {
8079   Uint32 err;
8080   Uint32 dst = _dst;
8081   hasNull = false;
8082   Local_pattern_store::ConstDataBufferIterator it;
8083   pattern.first(it);
8084   while (!it.isNull())
8085   {
8086     Uint32 info = *it.data;
8087     Uint32 type = QueryPattern::getType(info);
8088     Uint32 val = QueryPattern::getLength(info);
8089     pattern.next(it);
8090     switch(type){
8091     case QueryPattern::P_COL:
8092       jam();
8093       err = appendColToSection(dst, row.m_row_data.m_linear, val, hasNull);
8094       break;
8095     case QueryPattern::P_UNQ_PK:
8096       jam();
8097       err = appendPkColToSection(dst, row.m_row_data.m_linear, val);
8098       break;
8099     case QueryPattern::P_ATTRINFO:
8100       jam();
8101       err = appendAttrinfoToSection(dst, row.m_row_data.m_linear, val, hasNull);
8102       break;
8103     case QueryPattern::P_DATA:
8104       jam();
8105       err = appendDataToSection(dst, pattern, it, val, hasNull);
8106       break;
8107     case QueryPattern::P_PARENT:
8108       jam();
8109       // P_PARENT is a prefix to another pattern token
8110       // that permits code to access rows from earlier than immediate parent
8111       // val is no of levels to move up the tree
8112       err = appendFromParent(dst, pattern, it, val, row, hasNull);
8113       break;
8114       // PARAM's was converted to DATA by ::expand(pattern...)
8115     case QueryPattern::P_PARAM:
8116     case QueryPattern::P_PARAM_HEADER:
8117     default:
8118       jam();
8119       err = DbspjErr::InvalidPattern;
8120       DEBUG_CRASH();
8121     }
8122     if (unlikely(err != 0))
8123     {
8124       jam();
8125       _dst = dst;
8126       return err;
8127     }
8128   }
8129 
8130   _dst = dst;
8131   return 0;
8132 }
8133 
8134 /* ::expand() used during initial 'build' phase on 'tree' + 'param' from API */
8135 Uint32
expand(Uint32 & ptrI,DABuffer & pattern,Uint32 len,DABuffer & param,Uint32 paramCnt,bool & hasNull)8136 Dbspj::expand(Uint32 & ptrI, DABuffer& pattern, Uint32 len,
8137               DABuffer& param, Uint32 paramCnt, bool& hasNull)
8138 {
8139   jam();
8140   /**
8141    * TODO handle error
8142    */
8143   Uint32 err = 0;
8144   Uint32 tmp[1+MAX_ATTRIBUTES_IN_TABLE];
8145   struct RowPtr::Linear row;
8146   row.m_data = param.ptr;
8147   row.m_header = CAST_PTR(RowPtr::Header, &tmp[0]);
8148   buildRowHeader(CAST_PTR(RowPtr::Header, &tmp[0]), param.ptr, paramCnt);
8149 
8150   Uint32 dst = ptrI;
8151   const Uint32 * ptr = pattern.ptr;
8152   const Uint32 * end = ptr + len;
8153   hasNull = false;
8154 
8155   for (; ptr < end; )
8156   {
8157     Uint32 info = * ptr++;
8158     Uint32 type = QueryPattern::getType(info);
8159     Uint32 val = QueryPattern::getLength(info);
8160     switch(type){
8161     case QueryPattern::P_PARAM:
8162       jam();
8163       ndbassert(val < paramCnt);
8164       err = appendColToSection(dst, row, val, hasNull);
8165       break;
8166     case QueryPattern::P_PARAM_HEADER:
8167       jam();
8168       ndbassert(val < paramCnt);
8169       err = appendAttrinfoToSection(dst, row, val, hasNull);
8170       break;
8171     case QueryPattern::P_DATA:
8172       if (unlikely(val==0))
8173       {
8174         jam();
8175         hasNull = true;
8176       }
8177       else if (likely(appendToSection(dst, ptr, val)))
8178       {
8179         jam();
8180         ptr += val;
8181       }
8182       else
8183       {
8184         jam();
8185         err = DbspjErr::OutOfSectionMemory;
8186       }
8187       break;
8188     case QueryPattern::P_COL:    // (linked) COL's not expected here
8189     case QueryPattern::P_PARENT: // Prefix to P_COL
8190     case QueryPattern::P_ATTRINFO:
8191     case QueryPattern::P_UNQ_PK:
8192     default:
8193       jam();
8194       jamLine(type);
8195       err = DbspjErr::InvalidPattern;
8196     }
8197     if (unlikely(err != 0))
8198     {
8199       jam();
8200       ptrI = dst;
8201       return err;
8202     }
8203   }
8204 
8205   /**
8206    * Iterate forward
8207    */
8208   pattern.ptr = end;
8209   ptrI = dst;
8210   return 0;
8211 }
8212 
8213 /* ::expand() used during initial 'build' phase on 'tree' + 'param' from API */
8214 Uint32
expand(Local_pattern_store & dst,Ptr<TreeNode> treeNodePtr,DABuffer & pattern,Uint32 len,DABuffer & param,Uint32 paramCnt)8215 Dbspj::expand(Local_pattern_store& dst, Ptr<TreeNode> treeNodePtr,
8216               DABuffer& pattern, Uint32 len,
8217               DABuffer& param, Uint32 paramCnt)
8218 {
8219   jam();
8220   /**
8221    * TODO handle error
8222    */
8223   Uint32 err;
8224   Uint32 tmp[1+MAX_ATTRIBUTES_IN_TABLE];
8225   struct RowPtr::Linear row;
8226   row.m_header = CAST_PTR(RowPtr::Header, &tmp[0]);
8227   row.m_data = param.ptr;
8228   buildRowHeader(CAST_PTR(RowPtr::Header, &tmp[0]), param.ptr, paramCnt);
8229 
8230   const Uint32 * end = pattern.ptr + len;
8231   for (; pattern.ptr < end; )
8232   {
8233     Uint32 info = *pattern.ptr;
8234     Uint32 type = QueryPattern::getType(info);
8235     Uint32 val = QueryPattern::getLength(info);
8236     switch(type){
8237     case QueryPattern::P_COL:
8238     case QueryPattern::P_UNQ_PK:
8239     case QueryPattern::P_ATTRINFO:
8240       jam();
8241       err = appendToPattern(dst, pattern, 1);
8242       break;
8243     case QueryPattern::P_DATA:
8244       jam();
8245       err = appendToPattern(dst, pattern, val+1);
8246       break;
8247     case QueryPattern::P_PARAM:
8248       jam();
8249       // NOTE: Converted to P_DATA by appendParamToPattern
8250       ndbassert(val < paramCnt);
8251       err = appendParamToPattern(dst, row, val);
8252       pattern.ptr++;
8253       break;
8254     case QueryPattern::P_PARAM_HEADER:
8255       jam();
8256       // NOTE: Converted to P_DATA by appendParamHeadToPattern
8257       ndbassert(val < paramCnt);
8258       err = appendParamHeadToPattern(dst, row, val);
8259       pattern.ptr++;
8260       break;
8261     case QueryPattern::P_PARENT: // Prefix to P_COL
8262     {
8263       jam();
8264       err = appendToPattern(dst, pattern, 1);
8265       if (unlikely(err))
8266       {
8267         jam();
8268         break;
8269       }
8270       // Locate requested grandparent and request it to
8271       // T_ROW_BUFFER its result rows
8272       Ptr<TreeNode> parentPtr;
8273       m_treenode_pool.getPtr(parentPtr, treeNodePtr.p->m_parentPtrI);
8274       while (val--)
8275       {
8276         jam();
8277         ndbassert(parentPtr.p->m_parentPtrI != RNIL);
8278         m_treenode_pool.getPtr(parentPtr, parentPtr.p->m_parentPtrI);
8279         parentPtr.p->m_bits |= TreeNode::T_ROW_BUFFER;
8280         parentPtr.p->m_bits |= TreeNode::T_ROW_BUFFER_MAP;
8281       }
8282       Ptr<Request> requestPtr;
8283       m_request_pool.getPtr(requestPtr, treeNodePtr.p->m_requestPtrI);
8284       requestPtr.p->m_bits |= Request::RT_ROW_BUFFERS;
8285       break;
8286     }
8287     default:
8288       err = DbspjErr::InvalidPattern;
8289       jam();
8290     }
8291 
8292     if (unlikely(err != 0))
8293     {
8294       jam();
8295       return err;
8296     }
8297   }
8298   return 0;
8299 }
8300 
8301 Uint32
parseDA(Build_context & ctx,Ptr<Request> requestPtr,Ptr<TreeNode> treeNodePtr,DABuffer & tree,Uint32 treeBits,DABuffer & param,Uint32 paramBits)8302 Dbspj::parseDA(Build_context& ctx,
8303                Ptr<Request> requestPtr,
8304                Ptr<TreeNode> treeNodePtr,
8305                DABuffer& tree, Uint32 treeBits,
8306                DABuffer& param, Uint32 paramBits)
8307 {
8308   Uint32 err;
8309   Uint32 attrInfoPtrI = RNIL;
8310   Uint32 attrParamPtrI = RNIL;
8311 
8312   do
8313   {
8314     /**
8315      * Test execution terminated due to 'OutOfSectionMemory' which
8316      * may happen multiple places (eg. appendtosection, expand) below:
8317      * - 17050: Fail on parseDA at first call
8318      * - 17051: Fail on parseDA if 'isLeaf'
8319      * - 17052: Fail on parseDA if treeNode not root
8320      * - 17053: Fail on parseDA at a random node of the query tree
8321      */
8322     if (ERROR_INSERTED(17050) ||
8323        (ERROR_INSERTED(17051) && (treeNodePtr.p->isLeaf())) ||
8324        (ERROR_INSERTED(17052) && (treeNodePtr.p->m_parentPtrI != RNIL)) ||
8325        (ERROR_INSERTED(17053) && (rand() % 7) == 0))
8326     {
8327       jam();
8328       CLEAR_ERROR_INSERT_VALUE;
8329       ndbout_c("Injecting OutOfSectionMemory error at line %d file %s",
8330                 __LINE__,  __FILE__);
8331       err = DbspjErr::OutOfSectionMemory;
8332       break;
8333     }
8334 
8335     if (treeBits & DABits::NI_REPEAT_SCAN_RESULT)
8336     {
8337       jam();
8338       DEBUG("use REPEAT_SCAN_RESULT when returning results");
8339       requestPtr.p->m_bits |= Request::RT_REPEAT_SCAN_RESULT;
8340     } // DABits::NI_HAS_PARENT
8341 
8342     if (treeBits & DABits::NI_HAS_PARENT)
8343     {
8344       jam();
8345       DEBUG("NI_HAS_PARENT");
8346       /**
8347        * OPTIONAL PART 1:
8348        *
8349        * Parent nodes are stored first in optional part
8350        *   this is a list of 16-bit numbers refering to
8351        *   *earlier* nodes in tree
8352        *   the list stores length of list as first 16-bit
8353        */
8354       err = DbspjErr::InvalidTreeNodeSpecification;
8355       Uint32 dst[63];
8356       Uint32 cnt = unpackList(NDB_ARRAY_SIZE(dst), dst, tree);
8357       if (unlikely(cnt > NDB_ARRAY_SIZE(dst)))
8358       {
8359         jam();
8360         break;
8361       }
8362 
8363       if (unlikely(cnt!=1))
8364       {
8365         /**
8366          * Only a single parent supported for now, i.e only trees
8367          */
8368         jam();
8369         break;
8370       }
8371 
8372       err = 0;
8373       for (Uint32 i = 0; i<cnt; i++)
8374       {
8375         DEBUG("adding " << dst[i] << " as parent");
8376         Ptr<TreeNode> parentPtr = ctx.m_node_list[dst[i]];
8377         LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
8378         Local_dependency_map map(pool, parentPtr.p->m_dependent_nodes);
8379         if (unlikely(!map.append(&treeNodePtr.i, 1)))
8380         {
8381           err = DbspjErr::OutOfQueryMemory;
8382           jam();
8383           break;
8384         }
8385         parentPtr.p->m_bits &= ~(Uint32)TreeNode::T_LEAF;
8386         treeNodePtr.p->m_parentPtrI = parentPtr.i;
8387 
8388         // Build Bitmask of all ancestors to treeNode
8389         treeNodePtr.p->m_ancestors = parentPtr.p->m_ancestors;
8390         treeNodePtr.p->m_ancestors.set(parentPtr.p->m_node_no);
8391       }
8392 
8393       if (unlikely(err != 0))
8394         break;
8395     } // DABits::NI_HAS_PARENT
8396 
8397     err = DbspjErr::InvalidTreeParametersSpecificationKeyParamBitsMissmatch;
8398     if (unlikely( ((treeBits  & DABits::NI_KEY_PARAMS)==0) !=
8399                   ((paramBits & DABits::PI_KEY_PARAMS)==0)))
8400     {
8401       jam();
8402       break;
8403     }
8404 
8405     if (treeBits & (DABits::NI_KEY_PARAMS
8406                     | DABits::NI_KEY_LINKED
8407                     | DABits::NI_KEY_CONSTS))
8408     {
8409       jam();
8410       DEBUG("NI_KEY_PARAMS | NI_KEY_LINKED | NI_KEY_CONSTS");
8411 
8412       /**
8413        * OPTIONAL PART 2:
8414        *
8415        * If keys are parametrized or linked
8416        *   DATA0[LO/HI] - Length of key pattern/#parameters to key
8417        */
8418       Uint32 len_cnt = * tree.ptr ++;
8419       Uint32 len = len_cnt & 0xFFFF; // length of pattern in words
8420       Uint32 cnt = len_cnt >> 16;    // no of parameters
8421 
8422       LocalArenaPoolImpl pool(requestPtr.p->m_arena, m_dependency_map_pool);
8423       Local_pattern_store pattern(pool, treeNodePtr.p->m_keyPattern);
8424 
8425       err = DbspjErr::InvalidTreeParametersSpecificationIncorrectKeyParamCount;
8426       if (unlikely( ((cnt==0) != ((treeBits & DABits::NI_KEY_PARAMS) == 0)) ||
8427                     ((cnt==0) != ((paramBits & DABits::PI_KEY_PARAMS) == 0))))
8428       {
8429         jam();
8430         break;
8431       }
8432 
8433       if (treeBits & DABits::NI_KEY_LINKED)
8434       {
8435         jam();
8436         DEBUG("LINKED-KEY PATTERN w/ " << cnt << " PARAM values");
8437         /**
8438          * Expand pattern into a new pattern (with linked values)
8439          */
8440         err = expand(pattern, treeNodePtr, tree, len, param, cnt);
8441         if (unlikely(err != 0))
8442         {
8443           jam();
8444           break;
8445         }
8446         /**
8447          * This node constructs a new key for each send
8448          */
8449         treeNodePtr.p->m_bits |= TreeNode::T_KEYINFO_CONSTRUCTED;
8450       }
8451       else
8452       {
8453         jam();
8454         DEBUG("FIXED-KEY w/ " << cnt << " PARAM values");
8455         /**
8456          * Expand pattern directly into keyinfo
8457          *   This means a "fixed" key from here on
8458          */
8459         bool hasNull;
8460         Uint32 keyInfoPtrI = RNIL;
8461         err = expand(keyInfoPtrI, tree, len, param, cnt, hasNull);
8462         if (unlikely(err != 0))
8463         {
8464           jam();
8465           releaseSection(keyInfoPtrI);
8466           break;
8467         }
8468         if (unlikely(hasNull))
8469         {
8470           /* API should have elliminated requests w/ const-NULL keys */
8471           jam();
8472           DEBUG("BEWARE: FIXED-key contain NULL values");
8473           releaseSection(keyInfoPtrI);
8474 //        treeNodePtr.p->m_bits |= TreeNode::T_NULL_PRUNE;
8475 //        break;
8476           ndbrequire(false);
8477         }
8478         treeNodePtr.p->m_send.m_keyInfoPtrI = keyInfoPtrI;
8479       }
8480       ndbassert(err == 0); // All errors should have been handled
8481     } // DABits::NI_KEY_...
8482 
8483     const Uint32 mask =
8484       DABits::NI_LINKED_ATTR | DABits::NI_ATTR_INTERPRET |
8485       DABits::NI_ATTR_LINKED | DABits::NI_ATTR_PARAMS;
8486 
8487     if (((treeBits & mask) | (paramBits & DABits::PI_ATTR_LIST)) != 0)
8488     {
8489       jam();
8490       /**
8491        * OPTIONAL PART 3: attrinfo handling
8492        * - NI_LINKED_ATTR - these are attributes to be passed to children
8493        * - PI_ATTR_LIST   - this is "user-columns" (passed as parameters)
8494 
8495        * - NI_ATTR_INTERPRET - tree contains interpreted program
8496        * - NI_ATTR_LINKED - means that the attr-info contains linked-values
8497        * - NI_ATTR_PARAMS - means that the attr-info is parameterized
8498        *   PI_ATTR_PARAMS - means that the parameters contains attr parameters
8499        *
8500        * IF NI_ATTR_INTERPRET
8501        *   DATA0[LO/HI] = Length of program / total #arguments to program
8502        *   DATA1..N     = Program
8503        *
8504        * IF NI_ATTR_PARAMS
8505        *   DATA0[LO/HI] = Length / #param
8506        *   DATA1..N     = PARAM-0...PARAM-M
8507        *
8508        * IF PI_ATTR_INTERPRET
8509        *   DATA0[LO/HI] = Length of program / Length of subroutine-part
8510        *   DATA1..N     = Program (scan filter)
8511        *
8512        * IF NI_ATTR_LINKED
8513        *   DATA0[LO/HI] = Length / #
8514        *
8515        *
8516        */
8517       Uint32 sections[5] = { 0, 0, 0, 0, 0 };
8518       Uint32 * sectionptrs = 0;
8519 
8520       bool interpreted =
8521         (treeBits & DABits::NI_ATTR_INTERPRET) ||
8522         (paramBits & DABits::PI_ATTR_INTERPRET) ||
8523         (treeNodePtr.p->m_bits & TreeNode::T_ATTR_INTERPRETED);
8524 
8525       if (interpreted)
8526       {
8527         /**
8528          * Add section headers for interpreted execution
8529          *   and create pointer so that they can be updated later
8530          */
8531         jam();
8532         err = DbspjErr::OutOfSectionMemory;
8533         if (unlikely(!appendToSection(attrInfoPtrI, sections, 5)))
8534         {
8535           jam();
8536           break;
8537         }
8538 
8539         SegmentedSectionPtr ptr;
8540         getSection(ptr, attrInfoPtrI);
8541         sectionptrs = ptr.p->theData;
8542 
8543         if (treeBits & DABits::NI_ATTR_INTERPRET)
8544         {
8545           jam();
8546 
8547           /**
8548            * Having two interpreter programs is an error.
8549            */
8550           err = DbspjErr::BothTreeAndParametersContainInterpretedProgram;
8551           if (unlikely(paramBits & DABits::PI_ATTR_INTERPRET))
8552           {
8553             jam();
8554             break;
8555           }
8556 
8557           treeNodePtr.p->m_bits |= TreeNode::T_ATTR_INTERPRETED;
8558           Uint32 len2 = * tree.ptr++;
8559           Uint32 len_prg = len2 & 0xFFFF; // Length of interpret program
8560           Uint32 len_pattern = len2 >> 16;// Length of attr param pattern
8561           err = DbspjErr::OutOfSectionMemory;
8562           if (unlikely(!appendToSection(attrInfoPtrI, tree.ptr, len_prg)))
8563           {
8564             jam();
8565             break;
8566           }
8567 
8568           tree.ptr += len_prg;
8569           sectionptrs[1] = len_prg; // size of interpret program
8570 
8571           Uint32 tmp = * tree.ptr ++; // attr-pattern header
8572           Uint32 cnt = tmp & 0xFFFF;
8573 
8574           if (treeBits & DABits::NI_ATTR_LINKED)
8575           {
8576             jam();
8577             /**
8578              * Expand pattern into a new pattern (with linked values)
8579              */
8580             LocalArenaPoolImpl pool(requestPtr.p->m_arena,
8581                                     m_dependency_map_pool);
8582             Local_pattern_store pattern(pool,treeNodePtr.p->m_attrParamPattern);
8583             err = expand(pattern, treeNodePtr, tree, len_pattern, param, cnt);
8584             if (unlikely(err))
8585             {
8586               jam();
8587               break;
8588             }
8589             /**
8590              * This node constructs a new attr-info for each send
8591              */
8592             treeNodePtr.p->m_bits |= TreeNode::T_ATTRINFO_CONSTRUCTED;
8593           }
8594           else
8595           {
8596             jam();
8597             /**
8598              * Expand pattern directly into attr-info param
8599              *   This means a "fixed" attr-info param from here on
8600              */
8601             bool hasNull;
8602             err = expand(attrParamPtrI, tree, len_pattern, param, cnt, hasNull);
8603             if (unlikely(err))
8604             {
8605               jam();
8606               break;
8607             }
8608 //          ndbrequire(!hasNull);
8609           }
8610         }
8611         else // if (treeBits & DABits::NI_ATTR_INTERPRET)
8612         {
8613           jam();
8614           /**
8615            * Only relevant for interpreted stuff
8616            */
8617           ndbrequire((treeBits & DABits::NI_ATTR_PARAMS) == 0);
8618           ndbrequire((paramBits & DABits::PI_ATTR_PARAMS) == 0);
8619           ndbrequire((treeBits & DABits::NI_ATTR_LINKED) == 0);
8620 
8621           treeNodePtr.p->m_bits |= TreeNode::T_ATTR_INTERPRETED;
8622 
8623           if (! (paramBits & DABits::PI_ATTR_INTERPRET))
8624           {
8625             jam();
8626 
8627             /**
8628              * Tree node has interpreted execution,
8629              *   but no interpreted program specified
8630              *   auto-add Exit_ok (i.e return each row)
8631              */
8632             Uint32 tmp = Interpreter::ExitOK();
8633             err = DbspjErr::OutOfSectionMemory;
8634             if (unlikely(!appendToSection(attrInfoPtrI, &tmp, 1)))
8635             {
8636               jam();
8637               break;
8638             }
8639             sectionptrs[1] = 1;
8640           }
8641         } // if (treeBits & DABits::NI_ATTR_INTERPRET)
8642       } // if (interpreted)
8643 
8644       if (paramBits & DABits::PI_ATTR_INTERPRET)
8645       {
8646         jam();
8647 
8648         /**
8649          * Add the interpreted code that represents the scan filter.
8650          */
8651         const Uint32 len2 = * param.ptr++;
8652         Uint32 program_len = len2 & 0xFFFF;
8653         Uint32 subroutine_len = len2 >> 16;
8654         err = DbspjErr::OutOfSectionMemory;
8655         if (unlikely(!appendToSection(attrInfoPtrI, param.ptr, program_len)))
8656         {
8657           jam();
8658           break;
8659         }
8660         /**
8661          * The interpreted code is added is in the "Interpreted execute region"
8662          * of the attrinfo (see Dbtup::interpreterStartLab() for details).
8663          * It will thus execute before reading the attributes that constitutes
8664          * the projections.
8665          */
8666         sectionptrs[1] = program_len;
8667         param.ptr += program_len;
8668 
8669         if (subroutine_len)
8670         {
8671           if (unlikely(!appendToSection(attrParamPtrI,
8672                                         param.ptr, subroutine_len)))
8673           {
8674             jam();
8675             break;
8676           }
8677           sectionptrs[4] = subroutine_len;
8678           param.ptr += subroutine_len;
8679         }
8680         treeNodePtr.p->m_bits |= TreeNode::T_ATTR_INTERPRETED;
8681       }
8682 
8683       Uint32 sum_read = 0;
8684       Uint32 dst[MAX_ATTRIBUTES_IN_TABLE + 2];
8685 
8686       if (paramBits & DABits::PI_ATTR_LIST)
8687       {
8688         jam();
8689         Uint32 len = * param.ptr++;
8690         DEBUG("PI_ATTR_LIST");
8691 
8692         treeNodePtr.p->m_bits |= TreeNode::T_USER_PROJECTION;
8693         err = DbspjErr::OutOfSectionMemory;
8694         if (!appendToSection(attrInfoPtrI, param.ptr, len))
8695         {
8696           jam();
8697           break;
8698         }
8699 
8700         param.ptr += len;
8701 
8702         /**
8703          * Insert a flush of this partial result set
8704          */
8705         Uint32 flush[4];
8706         flush[0] = AttributeHeader::FLUSH_AI << 16;
8707         flush[1] = ctx.m_resultRef;
8708         flush[2] = ctx.m_resultData;
8709         flush[3] = ctx.m_senderRef; // RouteRef
8710         if (!appendToSection(attrInfoPtrI, flush, 4))
8711         {
8712           jam();
8713           break;
8714         }
8715 
8716         sum_read += len + 4;
8717       }
8718 
8719       if (treeBits & DABits::NI_LINKED_ATTR)
8720       {
8721         jam();
8722         DEBUG("NI_LINKED_ATTR");
8723         err = DbspjErr::InvalidTreeNodeSpecification;
8724         Uint32 cnt = unpackList(MAX_ATTRIBUTES_IN_TABLE, dst, tree);
8725         if (unlikely(cnt > MAX_ATTRIBUTES_IN_TABLE))
8726         {
8727           jam();
8728           break;
8729         }
8730 
8731         /**
8732          * AttributeHeader contains attrId in 16-higher bits
8733          */
8734         for (Uint32 i = 0; i<cnt; i++)
8735           dst[i] <<= 16;
8736 
8737         /**
8738          * Read correlation factor
8739          */
8740         dst[cnt++] = AttributeHeader::CORR_FACTOR32 << 16;
8741 
8742         err = DbspjErr::OutOfSectionMemory;
8743         if (!appendToSection(attrInfoPtrI, dst, cnt))
8744         {
8745           jam();
8746           break;
8747         }
8748 
8749         sum_read += cnt;
8750       }
8751 
8752       if (interpreted)
8753       {
8754         jam();
8755         /**
8756          * Let reads be performed *after* interpreted program
8757          *   i.e in "final read"-section
8758          */
8759         sectionptrs[3] = sum_read;
8760 
8761         if (attrParamPtrI != RNIL)
8762         {
8763           jam();
8764           ndbrequire(!(treeNodePtr.p->m_bits&TreeNode::T_ATTRINFO_CONSTRUCTED));
8765 
8766           SegmentedSectionPtr ptr;
8767           getSection(ptr, attrParamPtrI);
8768           {
8769             SectionReader r0(ptr, getSectionSegmentPool());
8770             err = appendTreeToSection(attrInfoPtrI, r0, ptr.sz);
8771             if (unlikely(err != 0))
8772             {
8773               jam();
8774               break;
8775             }
8776             sectionptrs[4] = ptr.sz;
8777           }
8778           releaseSection(attrParamPtrI);
8779           attrParamPtrI = RNIL;
8780         }
8781       }
8782 
8783       treeNodePtr.p->m_send.m_attrInfoPtrI = attrInfoPtrI;
8784       attrInfoPtrI = RNIL;
8785     } // if (((treeBits & mask) | (paramBits & DABits::PI_ATTR_LIST)) != 0)
8786 
8787     // Empty attrinfo would cause node crash.
8788     if (treeNodePtr.p->m_send.m_attrInfoPtrI == RNIL)
8789     {
8790       jam();
8791 
8792       // Add dummy interpreted program.
8793       Uint32 tmp = Interpreter::ExitOK();
8794       err = DbspjErr::OutOfSectionMemory;
8795       if (unlikely(!appendToSection(treeNodePtr.p->m_send.m_attrInfoPtrI, &tmp, 1)))
8796       {
8797         jam();
8798         break;
8799       }
8800     }
8801 
8802     return 0;
8803   } while (0);
8804 
8805   if (attrInfoPtrI != RNIL)
8806   {
8807     jam();
8808     releaseSection(attrInfoPtrI);
8809   }
8810 
8811   if (attrParamPtrI != RNIL)
8812   {
8813     jam();
8814     releaseSection(attrParamPtrI);
8815   }
8816 
8817   return err;
8818 }
8819 
8820 /**
8821  * END - MODULE COMMON PARSE/UNPACK
8822  */
8823 
8824 /**
8825  * Process a scan request for an ndb$info table. (These are used for monitoring
8826  * purposes and do not contain application data.)
8827  */
execDBINFO_SCANREQ(Signal * signal)8828 void Dbspj::execDBINFO_SCANREQ(Signal *signal)
8829 {
8830   DbinfoScanReq req= * CAST_PTR(DbinfoScanReq, &signal->theData[0]);
8831   const Ndbinfo::ScanCursor* cursor =
8832     CAST_CONSTPTR(Ndbinfo::ScanCursor, DbinfoScan::getCursorPtr(&req));
8833   Ndbinfo::Ratelimit rl;
8834 
8835   jamEntry();
8836 
8837   switch(req.tableId){
8838 
8839     // The SPJ block only implements the ndbinfo.counters table.
8840   case Ndbinfo::COUNTERS_TABLEID:
8841   {
8842     Ndbinfo::counter_entry counters[] = {
8843       { Ndbinfo::SPJ_READS_RECEIVED_COUNTER,
8844         c_Counters.get_counter(CI_READS_RECEIVED) },
8845       { Ndbinfo::SPJ_LOCAL_READS_SENT_COUNTER,
8846         c_Counters.get_counter(CI_LOCAL_READS_SENT) },
8847       { Ndbinfo::SPJ_REMOTE_READS_SENT_COUNTER,
8848         c_Counters.get_counter(CI_REMOTE_READS_SENT) },
8849       { Ndbinfo::SPJ_READS_NOT_FOUND_COUNTER,
8850         c_Counters.get_counter(CI_READS_NOT_FOUND) },
8851       { Ndbinfo::SPJ_TABLE_SCANS_RECEIVED_COUNTER,
8852         c_Counters.get_counter(CI_TABLE_SCANS_RECEIVED) },
8853       { Ndbinfo::SPJ_LOCAL_TABLE_SCANS_SENT_COUNTER,
8854         c_Counters.get_counter(CI_LOCAL_TABLE_SCANS_SENT) },
8855       { Ndbinfo::SPJ_RANGE_SCANS_RECEIVED_COUNTER,
8856         c_Counters.get_counter(CI_RANGE_SCANS_RECEIVED) },
8857       { Ndbinfo::SPJ_LOCAL_RANGE_SCANS_SENT_COUNTER,
8858         c_Counters.get_counter(CI_LOCAL_RANGE_SCANS_SENT) },
8859       { Ndbinfo::SPJ_REMOTE_RANGE_SCANS_SENT_COUNTER,
8860         c_Counters.get_counter(CI_REMOTE_RANGE_SCANS_SENT) },
8861       { Ndbinfo::SPJ_SCAN_BATCHES_RETURNED_COUNTER,
8862         c_Counters.get_counter(CI_SCAN_BATCHES_RETURNED) },
8863       { Ndbinfo::SPJ_SCAN_ROWS_RETURNED_COUNTER,
8864         c_Counters.get_counter(CI_SCAN_ROWS_RETURNED) },
8865       { Ndbinfo::SPJ_PRUNED_RANGE_SCANS_RECEIVED_COUNTER,
8866         c_Counters.get_counter(CI_PRUNED_RANGE_SCANS_RECEIVED) },
8867       { Ndbinfo::SPJ_CONST_PRUNED_RANGE_SCANS_RECEIVED_COUNTER,
8868         c_Counters.get_counter(CI_CONST_PRUNED_RANGE_SCANS_RECEIVED) }
8869     };
8870     const size_t num_counters = sizeof(counters) / sizeof(counters[0]);
8871 
8872     Uint32 i = cursor->data[0];
8873     const BlockNumber bn = blockToMain(number());
8874     while(i < num_counters)
8875     {
8876       jam();
8877       Ndbinfo::Row row(signal, req);
8878       row.write_uint32(getOwnNodeId());
8879       row.write_uint32(bn);           // block number
8880       row.write_uint32(instance());   // block instance
8881       row.write_uint32(counters[i].id);
8882 
8883       row.write_uint64(counters[i].val);
8884       ndbinfo_send_row(signal, req, row, rl);
8885       i++;
8886       if (rl.need_break(req))
8887       {
8888         jam();
8889         ndbinfo_send_scan_break(signal, req, rl, i);
8890         return;
8891       }
8892     }
8893     break;
8894   }
8895 
8896   default:
8897     break;
8898   }
8899 
8900   ndbinfo_send_scan_conf(signal, req, rl);
8901 } // Dbspj::execDBINFO_SCANREQ(Signal *signal)
8902 
8903 
8904 /**
8905  * Incremental calculation of standard deviation:
8906  *
8907  * Suppose that the data set is x1, x2,..., xn then for each xn
8908  * we can find an updated mean (M) and square of sums (S) as:
8909  *
8910  * M(1) = x(1), M(k) = M(k-1) + (x(k) - M(k-1)) / k
8911  * S(1) = 0, S(k) = S(k-1) + (x(k) - M(k-1)) * (x(k) - M(k))
8912  *
8913  * Source: http://mathcentral.uregina.ca/QQ/database/QQ.09.02/carlos1.html
8914  */
update(double sample)8915 void Dbspj::IncrementalStatistics::update(double sample)
8916 {
8917   // Prevent wrap-around
8918   if(m_noOfSamples < 0xffffffff)
8919   {
8920     m_noOfSamples++;
8921     const double delta = sample - m_mean;
8922     m_mean += delta/m_noOfSamples;
8923     m_sumSquare +=  delta * (sample - m_mean);
8924   }
8925 }
8926