1 /*
2    Copyright (c) 2012, 2021, Oracle and/or its affiliates.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 */
24 
25 #ifndef DBSPJ_H
26 #define DBSPJ_H
27 
28 #include <SimulatedBlock.hpp>
29 #include <signaldata/LqhKey.hpp>
30 #include <signaldata/ScanFrag.hpp>
31 #include <AttributeHeader.hpp>
32 #include <IntrusiveList.hpp>
33 #include <ArenaPool.hpp>
34 #include <DataBuffer2.hpp>
35 #include <Bitmask.hpp>
36 #include <signaldata/DbspjErr.hpp>
37 #include "../dbtup/tuppage.hpp"
38 
39 #define JAM_FILE_ID 481
40 
41 
42 class SectionReader;
43 struct QueryNode;
44 struct QueryNodeParameters;
45 
46 //#define SPJ_TRACE_TIME
47 
48 class Dbspj: public SimulatedBlock {
49 public:
50   Dbspj(Block_context& ctx, Uint32 instanceNumber = 0);
51   virtual ~Dbspj();
52 
53 private:
54   BLOCK_DEFINES(Dbspj);
55 
56   /**
57    * Signals from DICT
58    */
59   void execTC_SCHVERREQ(Signal* signal);
60   void execTAB_COMMITREQ(Signal* signal);
61   void execPREP_DROP_TAB_REQ(Signal* signal);
62   void execDROP_TAB_REQ(Signal* signal);
63   void execALTER_TAB_REQ(Signal* signal);
64 
65   /**
66    * Signals from TC
67    */
68   void execLQHKEYREQ(Signal* signal);
69   void execSCAN_FRAGREQ(Signal* signal);
70   void execSCAN_NEXTREQ(Signal* signal);
71 
72   void execDIH_SCAN_TAB_REF(Signal*);
73   void execDIH_SCAN_TAB_CONF(Signal*);
74   void execDIH_SCAN_GET_NODES_REF(Signal*);
75   void execDIH_SCAN_GET_NODES_CONF(Signal*);
76 
77   void execSIGNAL_DROPPED_REP(Signal*);
78 
79   /**
80    * Signals from LQH
81    */
82   void execLQHKEYREF(Signal* signal);
83   void execLQHKEYCONF(Signal* signal);
84   void execSCAN_FRAGREF(Signal* signal);
85   void execSCAN_FRAGCONF(Signal* signal);
86   void execSCAN_HBREP(Signal* signal);
87   void execTRANSID_AI(Signal*signal);
88 
89   /**
90    * General signals
91    */
execDUMP_STATE_ORD(Signal * signal)92   void execDUMP_STATE_ORD(Signal* signal){}
93   void execREAD_NODESCONF(Signal*);
94   void execREAD_CONFIG_REQ(Signal* signal);
95   void execSTTOR(Signal* signal);
96   void execDBINFO_SCANREQ(Signal* signal);
97   void execCONTINUEB(Signal*);
98   void execNODE_FAILREP(Signal*);
99   void execINCL_NODEREQ(Signal*);
100   void execAPI_FAILREQ(Signal*);
101 
102   void sendSTTORRY(Signal* signal);
103 
104 protected:
105   //virtual bool getParam(const char* name, Uint32* count);
106 
107 public:
108   struct Request;
109   struct TreeNode;
110   struct ScanFragHandle;
111   typedef DataBuffer2<14, LocalArenaPoolImpl> Correlation_list;
112   typedef LocalDataBuffer2<14, LocalArenaPoolImpl> Local_correlation_list;
113   typedef DataBuffer2<14, LocalArenaPoolImpl> Dependency_map;
114   typedef LocalDataBuffer2<14, LocalArenaPoolImpl> Local_dependency_map;
115   typedef DataBuffer2<14, LocalArenaPoolImpl> PatternStore;
116   typedef LocalDataBuffer2<14, LocalArenaPoolImpl> Local_pattern_store;
117   typedef Bitmask<(NDB_SPJ_MAX_TREE_NODES+31)/32> TreeNodeBitMask;
118 
119   /* *********** TABLE RECORD ********************************************* */
120 
121   /********************************************************/
122   /* THIS RECORD CONTAINS THE CURRENT SCHEMA VERSION OF   */
123   /* ALL TABLES IN THE SYSTEM.                            */
124   /********************************************************/
125   struct TableRecord {
TableRecordDbspj::TableRecord126     TableRecord()
127     : m_currentSchemaVersion(0), m_flags(0)
128     {};
129 
TableRecordDbspj::TableRecord130     TableRecord(Uint32 schemaVersion)
131     : m_currentSchemaVersion(schemaVersion), m_flags(TR_PREPARED)
132     {};
133 
134     Uint32 m_currentSchemaVersion;
135     Uint16 m_flags;
136 
137     enum {
138       TR_ENABLED      = 1 << 0,
139       TR_DROPPING     = 1 << 1,
140       TR_PREPARED     = 1 << 2
141     };
get_enabledDbspj::TableRecord142     Uint8 get_enabled()     const { return (m_flags & TR_ENABLED)      != 0; }
get_droppingDbspj::TableRecord143     Uint8 get_dropping()    const { return (m_flags & TR_DROPPING)     != 0; }
get_preparedDbspj::TableRecord144     Uint8 get_prepared()    const { return (m_flags & TR_PREPARED)     != 0; }
set_enabledDbspj::TableRecord145     void set_enabled(Uint8 f)     { f ? m_flags |= (Uint16)TR_ENABLED      : m_flags &= ~(Uint16)TR_ENABLED; }
set_droppingDbspj::TableRecord146     void set_dropping(Uint8 f)    { f ? m_flags |= (Uint16)TR_DROPPING     : m_flags &= ~(Uint16)TR_DROPPING; }
set_preparedDbspj::TableRecord147     void set_prepared(Uint8 f)    { f ? m_flags |= (Uint16)TR_PREPARED : m_flags &= ~(Uint16)TR_PREPARED; }
148 
149     Uint32 checkTableError(Uint32 schemaVersion) const;
150   };
151   typedef Ptr<TableRecord> TableRecordPtr;
152 
153   enum Buffer_type {
154     BUFFER_VOID  = 0,
155     BUFFER_STACK = 1,
156     BUFFER_VAR   = 2
157   };
158 
159   struct RowRef
160   {
161     Uint32 m_page_id;
162     Uint16 m_page_pos;
163     union
164     {
165       Uint16 unused;
166       enum Buffer_type m_alloc_type:16;
167     };
168 
copyto_linkDbspj::RowRef169     void copyto_link(Uint32 * dst) const {
170       dst[0] = m_page_id; dst[1] = m_page_pos;
171     }
assign_from_linkDbspj::RowRef172     void assign_from_link(const Uint32 * src) {
173       m_page_id = src[0];
174       m_page_pos = src[1];
175     }
176 
copyto_mapDbspj::RowRef177     void copyto_map(Uint16 * dst) const {
178       dst[0] = Uint16(m_page_id);
179       dst[1] = Uint16(m_page_id >> 16);
180       dst[2] = m_page_pos;
181     }
182 
assign_from_mapDbspj::RowRef183     void assign_from_map(const Uint16 * src) {
184       m_page_id = src[0];
185       m_page_id += Uint32(src[1]) << 16;
186       m_page_pos = src[2];
187     }
188 
map_is_nullDbspj::RowRef189     static bool map_is_null(const Uint16 * src) {
190       return src[2] == 0xFFFF;
191     }
192 
setNullDbspj::RowRef193     void setNull() { m_page_id = RNIL;}
isNullDbspj::RowRef194     bool isNull() const { return m_page_id == RNIL;}
195   };
196 
197   static const RowRef NullRowRef;
198 
199   /**
200    * This struct represent a row being passed to a child
201    */
202   struct RowPtr
203   {
204     Uint32 m_type;
205     Uint32 m_src_node_ptrI;
206     Uint32 m_src_correlation;
207 
208     struct Header
209     {
210       Uint32 m_len;
211       Uint32 m_offset[1];
212     };
213 
214     struct Section
215     {
216       const Header * m_header;
217       SegmentedSectionPtrPOD m_dataPtr;
218     };
219 
220     struct Linear
221     {
222       RowRef m_row_ref;
223       const Header * m_header;
224       const Uint32 * m_data;
225     };
226     union
227     {
228       struct Section m_section;
229       struct Linear m_linear;
230     } m_row_data;
231 
232     enum RowType
233     {
234       RT_SECTION = 1,
235       RT_LINEAR = 2,
236       RT_END = 0
237     };
238   };
239 
240   struct RowBuffer;  // forward decl.
241 
242   /**
243    * Define overlayed 'base class' for SLFifoRowList and RowMap.
244    * As we want these to be POD struct, we does not use
245    * inheritance, but have to take care that first part
246    * of these struct are correctly overlayed.
247    */
248   struct RowCollectionBase
249   {
250     RowBuffer* m_rowBuffer;
251   };
252 
253   struct SLFifoRowList //: public RowCollectionBase
254   {
255     /**
256      * BEWARE: Overlayed 'struct RowCollectionBase'
257      */
258     RowBuffer* m_rowBuffer;
259 
260     /**
261      * Data used for a single linked list of rows
262      */
263     Uint32 m_first_row_page_id;
264     Uint32 m_last_row_page_id;
265     Uint16 m_first_row_page_pos;
266     Uint16 m_last_row_page_pos;
267 
constructDbspj::SLFifoRowList268     void construct(RowBuffer& rowBuffer) {
269       m_rowBuffer = &rowBuffer;
270       init();
271     }
initDbspj::SLFifoRowList272     void init() { m_first_row_page_id = RNIL;}
isNullDbspj::SLFifoRowList273     bool isNull() const { return m_first_row_page_id == RNIL; }
274   };
275 
276   struct RowMap //: public RowCollectionBase
277   {
278     /**
279      * BEWARE: Overlayed 'struct RowCollectionBase'
280      */
281     RowBuffer* m_rowBuffer;
282 
283     /**
284      * Data used for a map with rows (key is correlation id)
285      *   currently a single array is used to store row references
286      *   (size == batch size)
287      */
288     RowRef m_map_ref;
289     Uint16 m_size;                // size of array
290     Uint16 m_elements;            // #elements in array
291 
constructDbspj::RowMap292     void construct(RowBuffer& rowBuffer,
293                    Uint32 capacity)
294     {
295       m_rowBuffer = &rowBuffer;
296       m_size = capacity;
297       init();
298     }
initDbspj::RowMap299     void init() {
300       m_map_ref.setNull();
301       m_elements = 0;
302     }
303 
isNullDbspj::RowMap304     bool isNull() const { return m_map_ref.isNull(); }
305 
assignDbspj::RowMap306     void assign (RowRef ref) {
307       m_map_ref = ref;
308     }
309 
copytoDbspj::RowMap310     void copyto(RowRef& ref) const {
311       ref = m_map_ref;
312     }
313 
314     /**
315      * functions for manipulating *content* of map
316      */
clearDbspj::RowMap317     void clear(Uint32 * ptr)  {
318       memset(ptr, 0xFF, MAP_SIZE_PER_REF_16 * m_size * sizeof(Uint16));
319     }
storeDbspj::RowMap320     void store(Uint32 * _ptr, Uint32 pos, RowRef ref) {
321       Uint16 * ptr = (Uint16*)_ptr;
322       ptr += MAP_SIZE_PER_REF_16 * pos;
323       ref.copyto_map(ptr);
324       m_elements++;
325     }
loadDbspj::RowMap326     static void load(const Uint32 * _ptr, Uint32 pos, RowRef & ref) {
327       const Uint16 * ptr = (const Uint16*)_ptr;
328       ptr += MAP_SIZE_PER_REF_16 * pos;
329       ref.assign_from_map(ptr);
330     }
isNullDbspj::RowMap331     static bool isNull(const Uint32 * _ptr, Uint32 pos) {
332       const Uint16 * ptr = (const Uint16*)_ptr;
333       ptr += MAP_SIZE_PER_REF_16 * pos;
334       return RowRef::map_is_null(ptr);
335     }
336 
337     STATIC_CONST( MAP_SIZE_PER_REF_16 = 3 );
338   };
339 
340   /**
341    * Define overlayed 'base class' for SLFifoRowListIterator
342    * and RowMapIterator.
343    * As we want these to be POD struct, we does not use
344    * inheritance, but have to take care that first part
345    * of these struct are correctly overlayed.
346    */
347   struct RowIteratorBase
348   {
349     RowRef m_ref;
350     Uint32 * m_row_ptr;
351 
isNullDbspj::RowIteratorBase352     bool isNull() const { return m_ref.isNull(); }
setNullDbspj::RowIteratorBase353     void setNull() { m_ref.setNull(); }
354   };
355 
356   struct SLFifoRowListIterator //: public RowIteratorBase
357   {
358     /**
359      * BEWARE: Overlayed 'struct RowIteratorBase'
360      */
361     RowRef m_ref;
362     Uint32 * m_row_ptr;
363 
isNullDbspj::SLFifoRowListIterator364     bool isNull() const { return m_ref.isNull(); }
setNullDbspj::SLFifoRowListIterator365     void setNull() { m_ref.setNull(); }
366     // END: RowIteratorBase
367   };
368 
369   struct RowMapIterator //: public RowIteratorBase
370   {
371     /**
372      * BEWARE: Overlayed 'struct RowIteratorBase'
373      */
374     RowRef m_ref;
375     Uint32 * m_row_ptr;
376 
isNullDbspj::RowMapIterator377     bool isNull() const { return m_ref.isNull(); }
setNullDbspj::RowMapIterator378     void setNull() { m_ref.setNull(); }
379     // END: RowIteratorBase
380 
381     Uint32 * m_map_ptr;
382     Uint16 m_size;
383     Uint16 m_element_no;
384   };
385 
386   /**
387    * Abstraction of SLFifoRowList & RowMap
388    */
389   struct RowCollection
390   {
391     enum collection_type
392     {
393       COLLECTION_VOID,
394       COLLECTION_MAP,
395       COLLECTION_LIST
396     };
397     union
398     {
399       RowCollectionBase m_base;  // Common part for map & list
400       SLFifoRowList m_list;
401       RowMap m_map;
402     };
403 
RowCollectionDbspj::RowCollection404     RowCollection() : m_type(COLLECTION_VOID) {}
405 
constructDbspj::RowCollection406     void construct(collection_type type,
407                    RowBuffer& rowBuffer,
408                    Uint32 capacity)
409     {
410       m_type = type;
411       if (m_type == COLLECTION_MAP)
412         m_map.construct(rowBuffer,capacity);
413       else if (m_type == COLLECTION_LIST)
414         m_list.construct(rowBuffer);
415     }
416 
initDbspj::RowCollection417     void init() {
418       if (m_type == COLLECTION_MAP)
419         m_map.init();
420       else if (m_type == COLLECTION_LIST)
421         m_list.init();
422     }
423 
rowOffsetDbspj::RowCollection424     Uint32 rowOffset() const {
425       return (m_type == COLLECTION_MAP) ? 0 : 2;
426     }
427 
428     collection_type m_type;
429   };
430 
431   struct RowIterator
432   {
433     union
434     {
435       RowIteratorBase m_base;  // Common part for map & list
436       SLFifoRowListIterator m_list;
437       RowMapIterator m_map;
438     };
439     RowCollection::collection_type m_type;
440 
RowIteratorDbspj::RowIterator441     RowIterator() { init(); }
initDbspj::RowIterator442     void init() { m_base.setNull(); }
isNullDbspj::RowIterator443     bool isNull() const { return m_base.isNull(); }
444   };
445 
446 
447   /**
448    * A struct used when building an TreeNode
449    */
450   struct Build_context
451   {
452     Uint32 m_cnt;
453     Uint32 m_scanPrio;
454     Uint32 m_savepointId;
455     Uint32 m_batch_size_rows;
456     Uint32 m_resultRef;  // API
457     Uint32 m_resultData; // API
458     Uint32 m_senderRef;  // TC (used for routing)
459     Uint32 m_scan_cnt;
460     Signal* m_start_signal; // Argument to first node in tree
461 
462     TreeNodeBitMask m_scans; // TreeNodes doing scans
463 
464     // Used for resolving dependencies
465     Ptr<TreeNode> m_node_list[NDB_SPJ_MAX_TREE_NODES];
466   };
467 
468   struct RowPage
469   {
470     /**
471      * NOTE: This contains various padding to be binary aligned with Tup_page
472      *       (for storing into DLFifoList<RowPage>
473      */
RowPageDbspj::RowPage474     RowPage() {}
475     struct File_formats::Page_header m_page_header;
476     Uint32 unused0;
477     Uint32 unused1;
478     Uint32 nextList;
479     Uint32 prevList;
480     Uint32 m_data[GLOBAL_PAGE_SIZE_WORDS - 7];
481     STATIC_CONST( SIZE = GLOBAL_PAGE_SIZE_WORDS - 7 );
482   };
483 
484   typedef Tup_varsize_page Var_page;
485 
486   struct RowBuffer
487   {
488     enum Buffer_type m_type;
489 
RowBufferDbspj::RowBuffer490     RowBuffer() : m_type(BUFFER_VOID) {}
491     DLFifoList<RowPage>::Head m_page_list;
492 
initDbspj::RowBuffer493     void init(enum Buffer_type type)
494     {
495       new (&m_page_list) DLFifoList<RowPage>::Head();
496       m_type = type;
497       reset();
498     }
resetDbspj::RowBuffer499     void reset()
500     {
501       if (m_type == BUFFER_STACK)
502         m_stack.m_pos = 0xFFFF;
503       else if (m_type == BUFFER_VAR)
504         m_var.m_free = 0;
505     }
506 
507     struct Stack
508     {
509       Uint32 m_pos; // position on head-page
510     };
511 
512     struct Var
513     {
514       Uint32 m_free; // Free on last page in list
515     };
516 
517     union {
518       struct Stack m_stack;
519       struct Var m_var;
520     };
521   };
522 
523   /**
524    * A struct for building DA-part
525    *   that is shared between QN_LookupNode & QN_ScanFragNode
526    */
527   struct DABuffer
528   {
529     const Uint32 * ptr;
530     const Uint32 * end;
531   };
532 
533   /**
534    * A struct with "virtual" functions for different operations
535    */
536   struct OpInfo
537   {
538     /**
539      * This function create a operation suitable
540      *   for execution
541      */
542     Uint32 (Dbspj::*m_build)(Build_context&ctx, Ptr<Request>,
543                              const QueryNode*, const QueryNodeParameters*);
544 
545     /**
546      * This function is called after build, but before start
547      *   it's allowed to block (i.e send signals)
548      *   and should if so increase request::m_outstanding
549      */
550     void (Dbspj::*m_prepare)(Signal*, Ptr<Request>, Ptr<TreeNode>);
551 
552     /**
553      * This function is used for starting a request
554      */
555     void (Dbspj::*m_start)(Signal*, Ptr<Request>, Ptr<TreeNode>);
556 
557     /**
558      * This function is used when getting a TRANSID_AI
559      */
560     void (Dbspj::*m_execTRANSID_AI)(Signal*,Ptr<Request>,Ptr<TreeNode>,
561 				    const RowPtr&);
562 
563     /**
564      * This function is used when getting a LQHKEYREF
565      */
566     void (Dbspj::*m_execLQHKEYREF)(Signal*, Ptr<Request>, Ptr<TreeNode>);
567 
568     /**
569      * This function is used when getting a LQHKEYCONF
570      */
571     void (Dbspj::*m_execLQHKEYCONF)(Signal*, Ptr<Request>, Ptr<TreeNode>);
572 
573     /**
574      * This function is used when getting a SCAN_FRAGREF
575      */
576     void (Dbspj::*m_execSCAN_FRAGREF)(Signal*, Ptr<Request>, Ptr<TreeNode>, Ptr<ScanFragHandle>);
577 
578     /**
579      * This function is used when getting a SCAN_FRAGCONF
580      */
581     void (Dbspj::*m_execSCAN_FRAGCONF)(Signal*, Ptr<Request>, Ptr<TreeNode>, Ptr<ScanFragHandle>);
582 
583     /**
584      * This function is called on the *child* by the *parent* when passing rows
585      */
586     void (Dbspj::*m_parent_row)(Signal*,Ptr<Request>,Ptr<TreeNode>,
587                                 const RowPtr&);
588 
589     /**
590      * This function is called on the *child* by the *parent* when *parent*
591      *   has completed a batch
592      */
593     void (Dbspj::*m_parent_batch_complete)(Signal*,Ptr<Request>,Ptr<TreeNode>);
594 
595     /**
596      * This function is called on the *child* by the *parent* when this
597      *   child should prepare to resend results related to parents current batch
598      */
599     void (Dbspj::*m_parent_batch_repeat)(Signal*,Ptr<Request>,Ptr<TreeNode>);
600 
601     /**
602      * This function is called on the *child* by the *parent* when
603      *   child should release buffers related to parents current batch
604      */
605     void (Dbspj::*m_parent_batch_cleanup)(Ptr<Request>,Ptr<TreeNode>);
606 
607     /**
608      * This function is called when getting a SCAN_NEXTREQ
609      */
610     void (Dbspj::*m_execSCAN_NEXTREQ)(Signal*, Ptr<Request>,Ptr<TreeNode>);
611 
612     /**
613      * This function is called when all nodes in tree are finished
614      *   it's allowed to "block" (by increaseing requestPtr.p->m_outstanding)
615      */
616     void (Dbspj::*m_complete)(Signal*, Ptr<Request>,Ptr<TreeNode>);
617 
618     /**
619      * This function is called when a tree is aborted
620      *   it's allowed to "block" (by increaseing requestPtr.p->m_outstanding)
621      */
622     void (Dbspj::*m_abort)(Signal*, Ptr<Request>, Ptr<TreeNode>);
623 
624     /**
625      * This function is called on node-failure
626      */
627     Uint32 (Dbspj::*m_execNODE_FAILREP)(Signal*, Ptr<Request>, Ptr<TreeNode>,
628                                         NdbNodeBitmask);
629     /**
630      * This function is called when request/node(s) is/are removed
631      *  should only do local cleanup(s)
632      */
633     void (Dbspj::*m_cleanup)(Ptr<Request>, Ptr<TreeNode>);
634   };  //struct OpInfo
635 
636   struct LookupData
637   {
638     Uint32 m_api_resultRef;
639     Uint32 m_api_resultData;
640     /**
641      * This is the number of outstanding messages. When this number is zero
642      * and m_parent_batch_complete is true, we know that we have received
643      * all rows for this operation in this batch.
644      */
645     Uint32 m_outstanding;
646     Uint32 m_lqhKeyReq[LqhKeyReq::FixedSignalLength + 4];
647   };
648 
649   struct ScanFragData
650   {
651     Uint32 m_rows_received;  // #execTRANSID_AI
652     Uint32 m_rows_expecting; // ScanFragConf
653     Uint32 m_scanFragReq[ScanFragReq::SignalLength + 2];
654     Uint32 m_scanFragHandlePtrI;
655   };
656 
657   struct ScanFragHandle
658   {
659     enum SFH_State
660     {
661       SFH_NOT_STARTED  = 0,
662       SFH_SCANNING     = 1, // in LQH
663       SFH_WAIT_NEXTREQ = 2,
664       SFH_COMPLETE     = 3,
665       SFH_WAIT_CLOSE   = 4
666     };
667 
initDbspj::ScanFragHandle668     void init(Uint32 fid) {
669       m_ref = 0;
670       m_fragId = fid;
671       m_state = SFH_NOT_STARTED;
672       m_rangePtrI = RNIL;
673       reset_ranges();
674     }
675 
676     Uint32 m_magic;
677     Uint32 m_treeNodePtrI;
678     Uint16 m_fragId;
679     Uint16 m_state;
680     Uint32 m_ref;
681 
reset_rangesDbspj::ScanFragHandle682     void reset_ranges() {
683       // m_rangePtrI is explicitly managed...in code
684       m_range_builder.m_range_cnt = m_range_builder.m_range_size = 0;
685     }
686     struct RangeBuilder
687     {
688       Uint32 m_range_size;
689       Uint16 m_range_cnt; // too set bounds info correctly
690     } m_range_builder;
691     Uint32 m_rangePtrI;
692     union {
693       Uint32 nextList;
694       Uint32 nextPool;
695     };
696   };
697 
698   typedef RecordPool<ScanFragHandle, ArenaPool> ScanFragHandle_pool;
699   typedef SLFifoListImpl<ScanFragHandle_pool, ScanFragHandle> ScanFragHandle_list;
700   typedef LocalSLFifoListImpl<ScanFragHandle_pool, ScanFragHandle> Local_ScanFragHandle_list;
701 
702   /**
703    * This class computes mean and standard deviation incrementally for a series
704    * of samples.
705    */
706   class IncrementalStatistics
707   {
708   public:
709     /**
710      * We cannot have a (non-trivial) constructor, since this class is used in
711      * unions.
712      */
init()713     void init()
714     {
715       m_mean = m_sumSquare = 0.0;
716       m_noOfSamples = 0;
717     }
718 
719     // Add another sample.
720     void update(double sample);
721 
getMean() const722     double getMean() const { return m_mean; }
723 
getStdDev() const724     double getStdDev() const {
725       return m_noOfSamples < 2 ? 0.0 : sqrt(m_sumSquare/(m_noOfSamples - 1));
726     }
727 
728   private:
729     // Mean of all samples
730     double m_mean;
731     //Sum of square of differences from the current mean.
732     double m_sumSquare;
733     Uint32 m_noOfSamples;
734   }; // IncrementalStatistics
735 
736   struct ScanIndexData
737   {
738     Uint16 m_frags_complete;
739     Uint16 m_frags_outstanding;
740     /**
741      * The number of fragment for which we have not yet sent SCAN_FRAGREQ but
742      * will eventually do so.
743      */
744     Uint16 m_frags_not_started;
745     Uint32 m_rows_received;  // #execTRANSID_AI
746     Uint32 m_rows_expecting; // Sum(ScanFragConf)
747     Uint32 m_batch_chunks;   // #SCAN_FRAGREQ + #SCAN_NEXTREQ to retrieve batch
748     Uint32 m_scanCookie;
749     Uint32 m_fragCount;
750     // The number of fragments that we scan in parallel.
751     Uint32 m_parallelism;
752     // True if we are still receiving the first batch for this operation.
753     bool   m_firstBatch;
754     /**
755      * True if this is the first instantiation of this operation. A child
756      * operation will be instantiated once for each batch of its parent.
757      */
758     bool m_firstExecution;
759     /**
760      * Mean and standard deviation for the optimal parallelism for earlier
761      * executions of this operation.
762      */
763     IncrementalStatistics m_parallelismStat;
764     // Total number of rows for the current execution of this operation.
765     Uint32 m_totalRows;
766     // Total number of bytes for the current execution of this operation.
767     Uint32 m_totalBytes;
768 
769     ScanFragHandle_list::HeadPOD m_fragments; // ScanFrag states
770     union
771     {
772       PatternStore::HeadPOD m_prunePattern;
773       Uint32 m_constPrunePtrI;
774     };
775     /**
776      * Max number of rows seen in a batch. Used for calculating the number of
777      * rows per fragment in the next next batch when using adaptive batch size.
778      */
779     Uint32 m_largestBatchRows;
780     /**
781      * Max number of bytes seen in a batch. Used for calculating the number of
782      * rows per fragment in the next next batch when using adaptive batch size.
783      */
784     Uint32 m_largestBatchBytes;
785     Uint32 m_scanFragReq[ScanFragReq::SignalLength + 2];
786   };
787 
788   struct DeferredParentOps
789   {
790     /**
791      * m_correlations contains a list of Correlation Values (Uint32)
792      * which identifies parent rows which has been deferred.
793      * m_pos are index into this array, identifying the next parent row
794      * for which to resume operation.
795      */
796     Correlation_list::Head m_correlations;
797     Uint16 m_pos; // Next row operation to resume
798 
DeferredParentOpsDbspj::DeferredParentOps799     DeferredParentOps() : m_correlations(), m_pos(0) {}
800 
initDbspj::DeferredParentOps801     void init()  {
802       m_correlations.init();
803       m_pos = 0;
804     }
isEmptyDbspj::DeferredParentOps805     bool isEmpty() const {
806       return (m_pos == m_correlations.getSize());
807     }
808   };
809 
810   struct TreeNode_cursor_ptr
811   {
812     Uint32 nextList;
813   };
814 
815   /**
816    * A node in a Query
817    *   (This is an instantiated version of QueryNode in
818    *    include/kernel/signal/QueryTree.hpp)
819    */
820   struct TreeNode : TreeNode_cursor_ptr
821   {
822     STATIC_CONST ( MAGIC = ~RT_SPJ_TREENODE );
823 
TreeNodeDbspj::TreeNode824     TreeNode()
825     : m_magic(MAGIC), m_state(TN_END),
826       m_parentPtrI(RNIL), m_requestPtrI(RNIL),
827       m_ancestors(),
828       m_resumeEvents(0), m_resumePtrI(RNIL)
829     {
830     }
831 
TreeNodeDbspj::TreeNode832     TreeNode(Uint32 request)
833     : m_magic(MAGIC),
834       m_info(0), m_bits(T_LEAF), m_state(TN_BUILDING),
835       m_parentPtrI(RNIL), m_requestPtrI(request),
836       m_ancestors(),
837       m_resumeEvents(0), m_resumePtrI(RNIL),
838       nextList(RNIL), prevList(RNIL)
839     {
840 //    m_send.m_ref = 0;
841       m_send.m_correlation = 0;
842       m_send.m_keyInfoPtrI = RNIL;
843       m_send.m_attrInfoPtrI = RNIL;
844     }
845 
846     const Uint32 m_magic;
847     const struct OpInfo* m_info;
848 
849     enum TreeNodeState
850     {
851       /**
852        * Initial
853        */
854       TN_BUILDING = 1,
855 
856       /**
857        * Tree node is preparing
858        */
859       TN_PREPARING = 2,
860 
861       /**
862        * Tree node is build and prepared, but not active
863        */
864       TN_INACTIVE = 3,
865 
866       /**
867        * Tree node is active (i.e has outstanding request(s))
868        */
869       TN_ACTIVE = 4,
870 
871       /**
872        * Tree node is "finishing" (after TN_INACTIVE)
873        */
874       TN_COMPLETING = 5,
875 
876       /**
877        * end-marker, not a valid state
878        */
879       TN_END = 0
880     };
881 
882     enum TreeNodeBits
883     {
884       T_ATTR_INTERPRETED = 0x1,
885 
886       /**
887        * Will node be executed only once (::parent_row())
888        *   implies key/attr-info will be disowned (by send-signal)
889        */
890       T_ONE_SHOT = 0x2,
891 
892       /**
893        * Is keyinfo "constructed"
894        *   (implies key info will be disowned (by send-signal)
895        */
896       T_KEYINFO_CONSTRUCTED = 0x4,
897 
898       /**
899        * Is attrinfo "constructed"
900        *   (implies attr info will be disowned (by send-signal)
901        */
902       T_ATTRINFO_CONSTRUCTED = 0x8,
903 
904       /**
905        * Is this node a leaf-node
906        */
907       T_LEAF = 0x10,
908 
909       /**
910        * Does this node have a user projection. (The index access part of
911        * an index lookup operation has no user projection, since only the
912        * base table tuple is sent to the API.)
913        */
914       T_USER_PROJECTION = 0x20,
915 
916       /**
917        * Is this a unique index lookup (on index table)
918        *   (implies some extra error handling code)
919        */
920       T_UNIQUE_INDEX_LOOKUP = 0x40,
921 
922       /*
923        * Should this node buffers its rows
924        */
925       T_ROW_BUFFER = 0x80,
926 
927       /**
928        * Should rows have dictionary (i.e random access capability)
929        *  This is typically used when having nodes depending on multiple parents
930        *  so that when row gets availble from "last" parent, a key can be
931        *  constructed using correlation value from parents
932        */
933       T_ROW_BUFFER_MAP = 0x100,
934 
935       /**
936        * Does any child need to know when all its ancestors are complete
937        */
938       T_REPORT_BATCH_COMPLETE  = 0x200,
939 
940       /**
941        * Do *I need* to know when all ancestors has completed this batch
942        */
943       T_NEED_REPORT_BATCH_COMPLETED = 0x400,
944 
945       /**
946        * Constant prune pattern
947        */
948       T_CONST_PRUNE = 0x800,
949 
950       /**
951        * Prune pattern
952        */
953       T_PRUNE_PATTERN = 0x1000,
954 
955       /**
956        * Should index scan be parallel
957        */
958       T_SCAN_PARALLEL = 0x2000,
959 
960       /**
961        * Possible requesting resultset for this index scan to be repeated
962        */
963       T_SCAN_REPEATABLE = 0x4000,
964 
965       /**
966        * Exec of a previous REQ must complete before we can proceed.
967        * A ResumeEvent will later resume exec. of this operation
968        */
969       T_EXEC_SEQUENTIAL = 0x8000,
970 
971       // End marker...
972       T_END = 0
973     };
974 
975     /**
976      * Describe whether a LQHKEY-REF and/or CONF whould trigger a
977      * exec resume of another TreeNode having T_EXEC_SEQUENTIAL.
978      * (Used as a bitmask)
979      */
980     enum TreeNodeResumeEvents
981     {
982       TN_RESUME_REF   = 0x01,
983       TN_RESUME_CONF  = 0x02
984     };
985 
isLeafDbspj::TreeNode986     bool isLeaf() const { return (m_bits & T_LEAF) != 0;}
987 
988     // table or index this TreeNode operates on, and its schemaVersion
989     Uint32 m_tableOrIndexId;
990     Uint32 m_schemaVersion;
991 
992     // TableId if 'm_tableOrIndexId' is an index, else equal
993     Uint32 m_primaryTableId;
994 
995     Uint32 m_bits;
996     Uint32 m_state;
997     Uint32 m_node_no;
998     Uint32 m_batch_size;
999     Uint32 m_parentPtrI;
1000     const Uint32 m_requestPtrI;
1001     TreeNodeBitMask m_ancestors;
1002     Dependency_map::Head m_dependent_nodes;
1003     PatternStore::Head m_keyPattern;
1004     PatternStore::Head m_attrParamPattern;
1005 
1006     /**
1007      * Rows buffered by this node
1008      */
1009     RowCollection m_rows;
1010 
1011     /**
1012      * T_EXEC_SEQUENTIAL cause execution of child operations to
1013      * be deferred.  These operations are queued in the 'struct DeferredParentOps'
1014      * Currently only Lookup operation might be deferred.
1015      * Could later be extended to also cover index scans.
1016      */
1017     DeferredParentOps m_deferred;
1018 
1019     /**
1020      * Set of TreeNodeResumeEvents, possibly or'ed.
1021      * Specify whether a REF or CONF will cause a resume
1022      * of the TreeNode referred by 'm_resumePtrI'.
1023      */
1024     Uint32 m_resumeEvents;
1025     Uint32 m_resumePtrI;
1026 
1027     union
1028     {
1029       LookupData m_lookup_data;
1030       ScanFragData m_scanfrag_data;
1031       ScanIndexData m_scanindex_data;
1032     };
1033 
1034     struct {
1035       Uint32 m_ref;              // dst for signal
1036       /** Each tuple has a 16-bit id that is unique within that operation,
1037        * batch and SPJ block instance. The upper half word of m_correlation
1038        * is the id of the parent tuple, and the lower half word is the
1039        * id of the current tuple.*/
1040       Uint32 m_correlation;
1041       Uint32 m_keyInfoPtrI;      // keyInfoSection
1042       Uint32 m_attrInfoPtrI;     // attrInfoSection
1043     } m_send;
1044 
1045     union {
1046       Uint32 nextList;
1047       Uint32 nextPool;
1048     };
1049     Uint32 prevList;
1050   };  //struct TreeNode
1051 
1052   static const Ptr<TreeNode> NullTreeNodePtr;
1053 
1054   typedef RecordPool<TreeNode, ArenaPool> TreeNode_pool;
1055   typedef DLFifoListImpl<TreeNode_pool, TreeNode> TreeNode_list;
1056   typedef LocalDLFifoListImpl<TreeNode_pool, TreeNode> Local_TreeNode_list;
1057 
1058   typedef SLListImpl<TreeNode_pool, TreeNode, TreeNode_cursor_ptr>
1059   TreeNodeCursor_list;
1060   typedef LocalSLListImpl<TreeNode_pool, TreeNode, TreeNode_cursor_ptr>
1061   Local_TreeNodeCursor_list;
1062 
1063   /**
1064    * A request (i.e a query + parameters)
1065    */
1066   struct Request
1067   {
1068     enum RequestBits
1069     {
1070       RT_SCAN                = 0x1  // unbounded result set, scan interface
1071       ,RT_ROW_BUFFERS        = 0x2  // Do any of the node use row-buffering
1072       ,RT_MULTI_SCAN         = 0x4  // Is there several scans in request
1073 //    ,RT_VAR_ALLOC          = 0x8  // DEPRECATED
1074       ,RT_NEED_PREPARE       = 0x10 // Does any node need m_prepare hook
1075       ,RT_NEED_COMPLETE      = 0x20 // Does any node need m_complete hook
1076       ,RT_REPEAT_SCAN_RESULT = 0x40 // Repeat bushy scan result when required
1077     };
1078 
1079     enum RequestState
1080     {
1081       RS_BUILDING   = 0x1,
1082       RS_PREPARING  = 0x2,
1083       RS_RUNNING    = 0x3,
1084       RS_COMPLETING = 0x4,
1085 
1086       RS_ABORTING   = 0x1000, // Or:ed together with other states
1087       RS_WAITING    = 0x2000, // Waiting for SCAN_NEXTREQ
1088 
1089       RS_ABORTED    = 0x2008, // Aborted and waiting for SCAN_NEXTREQ
1090       RS_END = 0
1091     };  //struct Request
1092 
RequestDbspj::Request1093     Request() {}
RequestDbspj::Request1094     Request(const ArenaHead & arena) : m_arena(arena) {}
1095     Uint32 m_magic;
1096     Uint32 m_bits;
1097     Uint32 m_state;
1098     Uint32 m_errCode;
1099     Uint32 m_node_cnt;
1100     Uint32 m_senderRef;
1101     Uint32 m_senderData;
1102     Uint32 m_rootResultData;
1103     Uint32 m_rootFragId;
1104     Uint32 m_transId[2];
1105     TreeNode_list::Head m_nodes;
1106     TreeNodeCursor_list::Head m_cursor_nodes;
1107     Uint32 m_cnt_active;       // No of "running" nodes
1108     TreeNodeBitMask
1109            m_active_nodes;     // Nodes which will return more data in NEXTREQ
1110     TreeNodeBitMask
1111            m_completed_nodes;  // Nodes wo/ any 'outstanding' signals
1112     Uint32 m_rows;             // Rows accumulated in current batch
1113     Uint32 m_outstanding;      // Outstanding signals, when 0, batch is done
1114     Uint16 m_lookup_node_data[MAX_NDB_NODES];
1115     ArenaHead m_arena;
1116     RowBuffer m_rowBuffer;
1117 
1118 #ifdef SPJ_TRACE_TIME
1119     Uint32 m_cnt_batches;
1120     Uint32 m_sum_rows;
1121     Uint32 m_sum_running;
1122     Uint32 m_sum_waiting;
1123     NDB_TICKS m_save_time;
1124 #endif
1125 
isScanDbspj::Request1126     bool isScan() const { return (m_bits & RT_SCAN) != 0;}
isLookupDbspj::Request1127     bool isLookup() const { return (m_bits & RT_SCAN) == 0;}
1128 
equalDbspj::Request1129     bool equal(const Request & key) const {
1130       return
1131 	m_senderData == key.m_senderData &&
1132 	m_transId[0] == key.m_transId[0] &&
1133 	m_transId[1] == key.m_transId[1];
1134     }
1135 
hashValueDbspj::Request1136     Uint32 hashValue() const {
1137       return m_transId[0] ^ m_senderData;
1138     }
1139 
1140     union {
1141       Uint32 nextHash;
1142       Uint32 nextPool;
1143     };
1144     Uint32 prevHash;
1145   };
1146 
1147 private:
1148   /**
1149    * These are the rows in ndbinfo.counters that concerns the SPJ block.
1150    * @see Ndbinfo::counter_id.
1151    */
1152   enum CounterId
1153   {
1154     /**
1155      * This is the number of incomming LQHKEYREQ messages (i.e queries with a
1156      * lookup as root).
1157      */
1158     CI_READS_RECEIVED = 0,
1159 
1160     /**
1161      * This is the number of lookup operations (LQHKEYREQ) sent to a local
1162      * LQH block.
1163      */
1164     CI_LOCAL_READS_SENT = 1,
1165 
1166     /**
1167      * This is the number of lookup operations (LQHKEYREQ) sent to a remote
1168      * LQH block.
1169      */
1170     CI_REMOTE_READS_SENT = 2,
1171 
1172     /**
1173      * No of lookup operations which did not return a row (LQHKEYREF).
1174      * (Most likely due to non matching key, or predicate
1175      * filter which evalueted  to 'false').
1176      */
1177     CI_READS_NOT_FOUND = 3,
1178 
1179     /**
1180      * This is the number of incomming queries where the root operation is a
1181      * fragment scan and this is a "direct scan" that does not go via an index.
1182      */
1183     CI_TABLE_SCANS_RECEIVED = 4,
1184 
1185     /**
1186      * This is the number of "direct" fragment scans (i.e. no via an ordered
1187      * index)sent to the local LQH block.
1188      */
1189     CI_LOCAL_TABLE_SCANS_SENT = 5,
1190 
1191     /**
1192      * This is the number of incomming queries where the root operation is a
1193      * fragment scan which scans the fragment via an ordered index..
1194      */
1195     CI_RANGE_SCANS_RECEIVED = 6,
1196 
1197     /**
1198      * This the number of scans using ordered indexes that have been sent to the
1199      * local LQH block.
1200      */
1201     CI_LOCAL_RANGE_SCANS_SENT = 7,
1202 
1203     /**
1204      * This the number of scans using ordered indexes that have been sent to a
1205      * remote LQH block.
1206      */
1207     CI_REMOTE_RANGE_SCANS_SENT = 8,
1208 
1209     /**
1210      * No of scan batches (on range or full table) returned to ndbapi
1211      */
1212     CI_SCAN_BATCHES_RETURNED = 9,
1213 
1214     /**
1215      * Total no of rows returned from scans.
1216      */
1217     CI_SCAN_ROWS_RETURNED = 10,
1218 
1219     /**
1220      * No of prunable indexscans that has been received
1221      */
1222     CI_PRUNED_RANGE_SCANS_RECEIVED = 11,
1223 
1224     /**
1225      * No of "const" prunable index scans that has been received
1226      * i.e index-scan only access 1 partition
1227      */
1228     CI_CONST_PRUNED_RANGE_SCANS_RECEIVED = 12,
1229 
1230     CI_END = 13 // End marker - not a valid counter id.
1231   };
1232 
1233   /**
1234    * This is a set of counters for monitoring the behavior of the SPJ block.
1235    * They may be read through the ndbinfo.counters SQL table.
1236    */
1237   class MonotonicCounters {
1238   public:
1239 
MonotonicCounters()1240     MonotonicCounters()
1241     {
1242       for(int i = 0; i < CI_END; i++)
1243       {
1244         m_counters[i] = 0;
1245       }
1246     }
1247 
get_counter(CounterId id) const1248     Uint64 get_counter(CounterId id) const
1249     {
1250       return m_counters[id];
1251     }
1252 
incr_counter(CounterId id,Uint64 delta)1253     void incr_counter(CounterId id, Uint64 delta)
1254     {
1255       m_counters[id] += delta;
1256     }
1257 
1258   private:
1259     Uint64 m_counters[CI_END];
1260 
1261   } c_Counters;
1262 
1263   typedef RecordPool<Request, ArenaPool> Request_pool;
1264   typedef DLListImpl<Request_pool, Request> Request_list;
1265   typedef LocalDLListImpl<Request_pool, Request> Local_Request_list;
1266   typedef DLHashTableImpl<Request_pool, Request> Request_hash;
1267   typedef DLHashTableImpl<Request_pool, Request>::Iterator Request_iterator;
1268 
1269   ArenaAllocator m_arenaAllocator;
1270   Request_pool m_request_pool;
1271   Request_hash m_scan_request_hash;
1272   Request_hash m_lookup_request_hash;
1273   ArenaPool m_dependency_map_pool;
1274   TreeNode_pool m_treenode_pool;
1275   ScanFragHandle_pool m_scanfraghandle_pool;
1276 
1277   TableRecord *m_tableRecord;
1278   UintR c_tabrecFilesize;
1279 
1280   NdbNodeBitmask c_alive_nodes;
1281 
1282   void do_init(Request*, const LqhKeyReq*, Uint32 senderRef);
1283   void store_lookup(Ptr<Request>);
1284   void handle_early_lqhkey_ref(Signal*, const LqhKeyReq *, Uint32 err);
1285   void sendTCKEYREF(Signal* signal, Uint32 ref, Uint32 routeRef);
1286   void sendTCKEYCONF(Signal* signal, Uint32 len, Uint32 ref, Uint32 routeRef);
1287 
1288   void do_init(Request*, const ScanFragReq*, Uint32 senderRef);
1289   void store_scan(Ptr<Request>);
1290   void handle_early_scanfrag_ref(Signal*, const ScanFragReq *, Uint32 err);
1291 
1292   struct BuildKeyReq
1293   {
1294     Uint32 hashInfo[4]; // Used for hashing
1295     Uint32 fragId;
1296     Uint32 fragDistKey;
1297     Uint32 receiverRef; // NodeId + InstanceNo
1298   };
1299 
1300   /**
1301    * Build
1302    */
1303   const OpInfo* getOpInfo(Uint32 op);
1304   Uint32 build(Build_context&,Ptr<Request>,SectionReader&,SectionReader&);
1305   Uint32 initRowBuffers(Ptr<Request>);
1306   void buildExecPlan(Ptr<Request>, Ptr<TreeNode> node, Ptr<TreeNode> next);
1307   void checkPrepareComplete(Signal*, Ptr<Request>, Uint32 cnt);
1308   void start(Signal*, Ptr<Request>);
1309   void checkBatchComplete(Signal*, Ptr<Request>, Uint32 cnt);
1310   void batchComplete(Signal*, Ptr<Request>);
1311   void prepareNextBatch(Signal*, Ptr<Request>);
1312   void sendConf(Signal*, Ptr<Request>, bool is_complete);
1313   void complete(Signal*, Ptr<Request>);
1314   void cleanup(Ptr<Request>);
1315   void cleanupBatch(Ptr<Request>);
1316   void abort(Signal*, Ptr<Request>, Uint32 errCode);
1317   Uint32 nodeFail(Signal*, Ptr<Request>, NdbNodeBitmask mask);
1318 
1319   Uint32 createNode(Build_context&, Ptr<Request>, Ptr<TreeNode> &);
1320   void handleTreeNodeComplete(Signal*, Ptr<Request>, Ptr<TreeNode>);
1321   void reportAncestorsComplete(Signal*, Ptr<Request>, Ptr<TreeNode>);
1322   void releaseScanBuffers(Ptr<Request> requestPtr);
1323   void releaseRequestBuffers(Ptr<Request> requestPtr);
1324   void releaseNodeRows(Ptr<Request> requestPtr, Ptr<TreeNode>);
1325   void registerActiveCursor(Ptr<Request>, Ptr<TreeNode>);
1326   void nodeFail_checkRequests(Signal*);
1327   void cleanup_common(Ptr<Request>, Ptr<TreeNode>);
1328 
1329   /**
1330    * Row buffering
1331    */
1332   Uint32 storeRow(RowCollection& collection, RowPtr &row);
1333   void releaseRow(RowCollection& collection, RowRef ref);
1334   Uint32* stackAlloc(RowBuffer& dst, RowRef&, Uint32 len);
1335   Uint32* varAlloc(RowBuffer& dst, RowRef&, Uint32 len);
1336   Uint32* rowAlloc(RowBuffer& dst, RowRef&, Uint32 len);
1337 
1338   void add_to_list(SLFifoRowList & list, RowRef);
1339   Uint32 add_to_map(RowMap& map, Uint32, RowRef);
1340 
1341   void setupRowPtr(const RowCollection& collection,
1342                    RowPtr& dst, RowRef, const Uint32 * src);
1343   Uint32 * get_row_ptr(RowRef pos);
1344 
1345   /**
1346    * SLFifoRowListIterator
1347    */
1348   bool first(const SLFifoRowList& list, SLFifoRowListIterator&);
1349   bool next(SLFifoRowListIterator&);
1350 
1351   /**
1352    * RowMapIterator
1353    */
1354   bool first(const RowMap& map, RowMapIterator&);
1355   bool next(RowMapIterator&);
1356 
1357   /**
1358    * RowIterator:
1359    * Abstraction which may iterate either a RowList or Map
1360    */
1361   bool first(const RowCollection&, RowIterator&);
1362   bool next(RowIterator&);
1363 
1364   /**
1365    * Misc
1366    */
1367   Uint32 buildRowHeader(RowPtr::Header *, SegmentedSectionPtr);
1368   Uint32 buildRowHeader(RowPtr::Header *, const Uint32 *& src, Uint32 len);
1369   void getCorrelationData(const RowPtr::Section & row, Uint32 col,
1370                           Uint32& correlationNumber);
1371   void getCorrelationData(const RowPtr::Linear & row, Uint32 col,
1372                           Uint32& correlationNumber);
1373   Uint32 appendToPattern(Local_pattern_store &, DABuffer & tree, Uint32);
1374   Uint32 appendParamToPattern(Local_pattern_store&,const RowPtr::Linear&,
1375                               Uint32);
1376   Uint32 appendParamHeadToPattern(Local_pattern_store&,const RowPtr::Linear&,
1377                                   Uint32);
1378 
1379   Uint32 appendTreeToSection(Uint32 & ptrI, SectionReader &, Uint32);
1380   Uint32 appendColToSection(Uint32 & ptrI, const RowPtr::Linear&, Uint32 col, bool& hasNull);
1381   Uint32 appendColToSection(Uint32 & ptrI, const RowPtr::Section&, Uint32 col, bool& hasNull);
1382   Uint32 appendPkColToSection(Uint32 & ptrI, const RowPtr::Section&,Uint32 col);
1383   Uint32 appendPkColToSection(Uint32 & ptrI, const RowPtr::Linear&, Uint32 col);
1384   Uint32 appendAttrinfoToSection(Uint32 &, const RowPtr::Linear&, Uint32 col, bool& hasNull);
1385   Uint32 appendAttrinfoToSection(Uint32 &, const RowPtr::Section&, Uint32 col, bool& hasNull);
1386   Uint32 appendDataToSection(Uint32 & ptrI, Local_pattern_store&,
1387 			     Local_pattern_store::ConstDataBufferIterator&,
1388 			     Uint32 len, bool& hasNull);
1389   Uint32 appendFromParent(Uint32 & ptrI, Local_pattern_store&,
1390                           Local_pattern_store::ConstDataBufferIterator&,
1391                           Uint32 level, const RowPtr&, bool& hasNull);
expand(Uint32 & ptrI,Local_pattern_store & p,const RowPtr & r,bool & hasNull)1392   Uint32 expand(Uint32 & ptrI, Local_pattern_store& p, const RowPtr& r, bool& hasNull){
1393     switch(r.m_type){
1394     case RowPtr::RT_SECTION:
1395       return expandS(ptrI, p, r, hasNull);
1396     case RowPtr::RT_LINEAR:
1397       return expandL(ptrI, p, r, hasNull);
1398     }
1399     return DbspjErr::InternalError;
1400   }
1401   Uint32 expandS(Uint32 & ptrI, Local_pattern_store&, const RowPtr&, bool& hasNull);
1402   Uint32 expandL(Uint32 & ptrI, Local_pattern_store&, const RowPtr&, bool& hasNull);
1403   Uint32 expand(Uint32 & ptrI, DABuffer& pattern, Uint32 len,
1404                 DABuffer & param, Uint32 cnt, bool& hasNull);
1405   Uint32 expand(Local_pattern_store& dst, Ptr<TreeNode> treeNodePtr,
1406                 DABuffer & pattern, Uint32 len,
1407                 DABuffer & param, Uint32 cnt);
1408   Uint32 parseDA(Build_context&, Ptr<Request>, Ptr<TreeNode>,
1409                  DABuffer & tree, Uint32 treeBits,
1410                  DABuffer & param, Uint32 paramBits);
1411 
1412   Uint32 getResultRef(Ptr<Request> requestPtr);
1413 
1414   Uint32 checkTableError(Ptr<TreeNode> treeNodePtr) const;
1415   Uint32 getNodes(Signal*, BuildKeyReq&, Uint32 tableId);
1416 
1417   void common_execTRANSID_AI(Signal*, Ptr<Request>, Ptr<TreeNode>,
1418 			     const RowPtr&);
1419 
1420   /**
1421    * Lookup
1422    */
1423   static const OpInfo g_LookupOpInfo;
1424   Uint32 lookup_build(Build_context&,Ptr<Request>,
1425 		      const QueryNode*, const QueryNodeParameters*);
1426   void lookup_start(Signal*, Ptr<Request>, Ptr<TreeNode>);
1427   void lookup_resume(Signal*, Ptr<Request>, Ptr<TreeNode>);
1428   void lookup_send(Signal*, Ptr<Request>, Ptr<TreeNode>);
1429   void lookup_execTRANSID_AI(Signal*, Ptr<Request>, Ptr<TreeNode>,
1430 			     const RowPtr&);
1431   void lookup_execLQHKEYREF(Signal*, Ptr<Request>, Ptr<TreeNode>);
1432   void lookup_execLQHKEYCONF(Signal*, Ptr<Request>, Ptr<TreeNode>);
1433   void lookup_stop_branch(Signal*, Ptr<Request>, Ptr<TreeNode>, Uint32 err);
1434   void lookup_parent_row(Signal*, Ptr<Request>, Ptr<TreeNode>, const RowPtr &);
1435   void lookup_row(Signal*, Ptr<Request>, Ptr<TreeNode>, const RowPtr &);
1436   void lookup_abort(Signal*, Ptr<Request>, Ptr<TreeNode>);
1437   Uint32 lookup_execNODE_FAILREP(Signal*signal, Ptr<Request>, Ptr<TreeNode>,
1438                                NdbNodeBitmask);
1439 
1440   void lookup_sendLeafCONF(Signal*, Ptr<Request>, Ptr<TreeNode>,
1441                            Uint32 node);
1442   void lookup_cleanup(Ptr<Request>, Ptr<TreeNode>);
1443 
1444   Uint32 handle_special_hash(Uint32 tableId, Uint32 dstHash[4],
1445                              const Uint64* src,
1446                              Uint32 srcLen,       // Len in #32bit words
1447                              const struct KeyDescriptor* desc);
1448 
1449   Uint32 computeHash(Signal*, BuildKeyReq&, Uint32 table, Uint32 keyInfoPtrI);
1450   Uint32 computePartitionHash(Signal*, BuildKeyReq&, Uint32 table, Uint32 keyInfoPtrI);
1451 
1452   /**
1453    * ScanFrag
1454    */
1455   static const OpInfo g_ScanFragOpInfo;
1456   Uint32 scanFrag_build(Build_context&, Ptr<Request>,
1457                         const QueryNode*, const QueryNodeParameters*);
1458   void scanFrag_start(Signal*, Ptr<Request>,Ptr<TreeNode>);
1459   void scanFrag_send(Signal*, Ptr<Request>, Ptr<TreeNode>);
1460   void scanFrag_execTRANSID_AI(Signal*, Ptr<Request>, Ptr<TreeNode>,
1461 			       const RowPtr &);
1462   void scanFrag_execSCAN_FRAGREF(Signal*, Ptr<Request>, Ptr<TreeNode>, Ptr<ScanFragHandle>);
1463   void scanFrag_execSCAN_FRAGCONF(Signal*, Ptr<Request>, Ptr<TreeNode>, Ptr<ScanFragHandle>);
1464   void scanFrag_execSCAN_NEXTREQ(Signal*, Ptr<Request>,Ptr<TreeNode>);
1465   void scanFrag_abort(Signal*, Ptr<Request>, Ptr<TreeNode>);
1466   void scanFrag_cleanup(Ptr<Request>, Ptr<TreeNode>);
1467 
1468   /**
1469    * ScanIndex
1470    */
1471   static const OpInfo g_ScanIndexOpInfo;
1472   Uint32 scanIndex_build(Build_context&, Ptr<Request>,
1473                          const QueryNode*, const QueryNodeParameters*);
1474   Uint32 parseScanIndex(Build_context&, Ptr<Request>, Ptr<TreeNode>,
1475                         DABuffer tree, Uint32 treeBits,
1476                         DABuffer param, Uint32 paramBits);
1477   void scanIndex_prepare(Signal*, Ptr<Request>, Ptr<TreeNode>);
1478   void scanIndex_execTRANSID_AI(Signal*, Ptr<Request>, Ptr<TreeNode>,
1479                                 const RowPtr &);
1480   void scanIndex_execSCAN_FRAGREF(Signal*, Ptr<Request>, Ptr<TreeNode>, Ptr<ScanFragHandle>);
1481   void scanIndex_execSCAN_FRAGCONF(Signal*, Ptr<Request>, Ptr<TreeNode>, Ptr<ScanFragHandle>);
1482   void scanIndex_parent_row(Signal*,Ptr<Request>,Ptr<TreeNode>, const RowPtr&);
1483   void scanIndex_fixupBound(Ptr<ScanFragHandle> fragPtr, Uint32 ptrI, Uint32);
1484   Uint32 scanIndex_send(Signal* signal,
1485                         Ptr<Request> requestPtr,
1486                         Ptr<TreeNode> treeNodePtr,
1487                         Uint32 noOfFrags,
1488                         Uint32 bs_bytes,
1489                         Uint32 bs_rows,
1490                         Uint32& batchRange);
1491   void scanIndex_batchComplete(Signal* signal);
1492   Uint32 scanIndex_findFrag(Local_ScanFragHandle_list &, Ptr<ScanFragHandle>&,
1493                             Uint32 fragId);
1494   void scanIndex_parent_batch_complete(Signal*, Ptr<Request>, Ptr<TreeNode>);
1495   void scanIndex_parent_batch_repeat(Signal*, Ptr<Request>, Ptr<TreeNode>);
1496   void scanIndex_execSCAN_NEXTREQ(Signal*, Ptr<Request>,Ptr<TreeNode>);
1497   void scanIndex_complete(Signal*, Ptr<Request>, Ptr<TreeNode>);
1498   void scanIndex_abort(Signal*, Ptr<Request>, Ptr<TreeNode>);
1499   Uint32 scanIndex_execNODE_FAILREP(Signal*signal, Ptr<Request>, Ptr<TreeNode>,
1500                                   NdbNodeBitmask);
1501   void scanIndex_parent_batch_cleanup(Ptr<Request>, Ptr<TreeNode>);
1502   void scanIndex_cleanup(Ptr<Request>, Ptr<TreeNode>);
1503 
1504   void scanIndex_release_rangekeys(Ptr<Request>, Ptr<TreeNode>);
1505 
1506   Uint32 scanindex_sendDihGetNodesReq(Signal* signal,
1507                                       Ptr<Request> requestPtr,
1508                                       Ptr<TreeNode> treeNodePtr);
1509 
1510   /**
1511    * Page manager
1512    */
1513   bool allocPage(Ptr<RowPage> &);
1514   void releasePage(Ptr<RowPage>);
1515   void releasePages(Uint32 first, Ptr<RowPage> last);
1516   void releaseGlobal(Signal*);
1517   SLList<RowPage>::Head m_free_page_list;
1518   ArrayPool<RowPage> m_page_pool;
1519 
1520   /* Random fault injection */
1521 
1522 #ifdef ERROR_INSERT
1523   bool appendToSection(Uint32& firstSegmentIVal,
1524                        const Uint32* src, Uint32 len);
1525 #endif
1526 
1527   /**
1528    * Scratch buffers...
1529    */
1530   Uint32 m_buffer0[16*1024]; // 64k
1531   Uint32 m_buffer1[16*1024]; // 64k
1532 };
1533 
1534 
1535 #undef JAM_FILE_ID
1536 
1537 #endif
1538