1 /*
2    Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
23 */
24 /**
25   @file
26 
27   @brief
28   This file defines various classes and methods used for pushing queries
29   to the ndb data node (for execution by the SPJ block).
30 */
31 
32 #include "storage/ndb/plugin/ha_ndbcluster_push.h"
33 
34 #include "my_dbug.h"
35 #include "sql/abstract_query_plan.h"
36 #include "sql/current_thd.h"
37 #include "sql/sql_class.h"
38 #include "sql/sql_lex.h"
39 #include "storage/ndb/include/ndb_version.h"
40 #include "storage/ndb/include/ndbapi/NdbApi.hpp"
41 #include "storage/ndb/include/ndbapi/NdbInterpretedCode.hpp"
42 #include "storage/ndb/plugin/ha_ndbcluster.h"
43 #include "storage/ndb/plugin/ha_ndbcluster_cond.h"
44 #include "storage/ndb/plugin/ndb_thd.h"
45 #include "storage/ndb/src/ndbapi/NdbQueryBuilder.hpp"
46 #include "storage/ndb/src/ndbapi/NdbQueryOperation.hpp"
47 
48 typedef NdbDictionary::Table NDBTAB;
49 
50 /**
51  * antijoin_null_cond is inserted by the optimizer when it create the
52  * special antijoin-NULL-condition. It serves as a token to uniquely
53  * identify such a NULL-condition. Also see similar usage of it
54  * when building the iterators in sql_executor.cc
55  */
56 extern const char *antijoin_null_cond;
57 
58 /*
59   Explain why an operation could not be pushed
60   @param[in] msgfmt printf style format string.
61 */
62 #define EXPLAIN_NO_PUSH(msgfmt, ...)                                   \
63   do {                                                                 \
64     if (unlikely(current_thd->lex->is_explain())) {                    \
65       push_warning_printf(current_thd, Sql_condition::SL_NOTE, ER_YES, \
66                           (msgfmt), __VA_ARGS__);                      \
67     }                                                                  \
68   } while (0)
69 
get_referred_field_name(const Item_field * field_item)70 static inline const char *get_referred_field_name(
71     const Item_field *field_item) {
72   DBUG_ASSERT(field_item->type() == Item::FIELD_ITEM);
73   return field_item->field->field_name;
74 }
75 
get_referred_table_access_name(const Item_field * field_item)76 static const char *get_referred_table_access_name(
77     const Item_field *field_item) {
78   DBUG_ASSERT(field_item->type() == Item::FIELD_ITEM);
79   return field_item->field->table->alias;
80 }
81 
ndbcluster_is_lookup_operation(AQP::enum_access_type accessType)82 static bool ndbcluster_is_lookup_operation(AQP::enum_access_type accessType) {
83   return accessType == AQP::AT_PRIMARY_KEY ||
84          accessType == AQP::AT_MULTI_PRIMARY_KEY ||
85          accessType == AQP::AT_UNIQUE_KEY;
86 }
87 
first_table(uint start) const88 uint ndb_table_access_map::first_table(uint start) const {
89   for (uint table_no = start; table_no < length(); table_no++) {
90     if (contain(table_no)) return table_no;
91   }
92   return length();
93 }
94 
GetTriggerCondOrNull(const Item * item)95 static const Item_func_trig_cond *GetTriggerCondOrNull(const Item *item) {
96   if (item->type() == Item::FUNC_ITEM &&
97       down_cast<const Item_func *>(item)->functype() ==
98           Item_bool_func2::TRIG_COND_FUNC) {
99     return down_cast<const Item_func_trig_cond *>(item);
100   } else {
101     return nullptr;
102   }
103 }
104 
105 /**
106  * Check if the specified 'item' is a antijoin-NULL-condition.
107  * This condition is constructed such that all rows being 'matches'
108  * are filtered away, and only the non-(anti)matches will pass.
109  *
110  * Logic inspired by similar code in sql_executor.cc.
111  */
is_antijoin_null_cond(const Item * item)112 static bool is_antijoin_null_cond(const Item *item) {
113   const Item_func_trig_cond *trig_cond = GetTriggerCondOrNull(item);
114   if (trig_cond != nullptr &&
115       trig_cond->get_trig_type() == Item_func_trig_cond::IS_NOT_NULL_COMPL) {
116     const Item *inner_cond = trig_cond->arguments()[0];
117     const Item_func_trig_cond *inner_trig_cond =
118         GetTriggerCondOrNull(inner_cond);
119     if (inner_trig_cond != nullptr) {
120       const Item *inner_inner_cond = inner_trig_cond->arguments()[0];
121       if (inner_inner_cond->item_name.ptr() == antijoin_null_cond) {
122         return true;
123       }
124     }
125   }
126   return false;
127 }
128 
last_table(uint start) const129 uint ndb_table_access_map::last_table(uint start) const {
130   uint table_no = start;
131   while (true) {
132     if (contain(table_no))
133       return table_no;
134     else if (table_no == 0)
135       return length();
136     table_no--;
137   }
138 }
139 
ndb_pushed_join(const ndb_pushed_builder_ctx & builder,const NdbQueryDef * query_def)140 ndb_pushed_join::ndb_pushed_join(const ndb_pushed_builder_ctx &builder,
141                                  const NdbQueryDef *query_def)
142     : m_query_def(query_def),
143       m_operation_count(0),
144       m_field_count(builder.m_fld_refs) {
145   DBUG_ASSERT(query_def != NULL);
146   DBUG_ASSERT(builder.m_fld_refs <= MAX_REFERRED_FIELDS);
147   ndb_table_access_map searched;
148   for (uint tab_no = 0; searched != builder.m_join_scope; tab_no++) {
149     const AQP::Table_access *const join_tab =
150         builder.m_plan.get_table_access(tab_no);
151     if (builder.m_join_scope.contain(tab_no)) {
152       DBUG_ASSERT(m_operation_count < MAX_PUSHED_OPERATIONS);
153       m_tables[m_operation_count++] = join_tab->get_table();
154       searched.add(tab_no);
155     }
156   }
157   for (uint i = 0; i < builder.m_fld_refs; i++) {
158     m_referred_fields[i] = builder.m_referred_fields[i];
159   }
160 }
161 
~ndb_pushed_join()162 ndb_pushed_join::~ndb_pushed_join() {
163   if (m_query_def) m_query_def->destroy();
164 }
165 
match_definition(int type,const NDB_INDEX_DATA * idx) const166 bool ndb_pushed_join::match_definition(int type,  // NdbQueryOperationDef::Type,
167                                        const NDB_INDEX_DATA *idx) const {
168   const NdbQueryOperationDef *const root_operation =
169       m_query_def->getQueryOperation((uint)0);
170   const NdbQueryOperationDef::Type def_type = root_operation->getType();
171 
172   if (def_type != type) {
173     DBUG_PRINT(
174         "info",
175         ("Cannot execute push join. Root operation prepared as %s "
176          "not executable as %s",
177          NdbQueryOperationDef::getTypeName(def_type),
178          NdbQueryOperationDef::getTypeName((NdbQueryOperationDef::Type)type)));
179     return false;
180   }
181   const NdbDictionary::Index *const expected_index = root_operation->getIndex();
182 
183   // Check that we still use the same index as when the query was prepared.
184   switch (def_type) {
185     case NdbQueryOperationDef::PrimaryKeyAccess:
186       DBUG_ASSERT(idx != NULL);
187       DBUG_ASSERT(idx->unique_index == expected_index);
188       break;
189 
190     case NdbQueryOperationDef::UniqueIndexAccess:
191       DBUG_ASSERT(idx != NULL);
192       // DBUG_ASSERT(idx->unique_index == expected_index);
193       if (idx->unique_index != expected_index) {
194         DBUG_PRINT("info",
195                    ("Actual index %s differs from expected index %s."
196                     "Therefore, join cannot be pushed.",
197                     idx->unique_index->getName(), expected_index->getName()));
198         return false;
199       }
200       break;
201 
202     case NdbQueryOperationDef::TableScan:
203       DBUG_ASSERT(idx == NULL && expected_index == NULL);
204       break;
205 
206     case NdbQueryOperationDef::OrderedIndexScan:
207       DBUG_ASSERT(idx != NULL);
208       // DBUG_ASSERT(idx->index == expected_index);
209       if (idx->index != expected_index) {
210         DBUG_PRINT("info", ("Actual index %s differs from expected index %s. "
211                             "Therefore, join cannot be pushed.",
212                             idx->index->getName(), expected_index->getName()));
213         return false;
214       }
215       break;
216 
217     default:
218       DBUG_ASSERT(false);
219       break;
220   }
221 
222   /**
223    * There may be referrences to Field values from tables outside the scope of
224    * our pushed join which are supplied as paramValues().
225    * If any of these are NULL values, join can't be pushed.
226    *
227    * Note that the 'Late NULL filtering' in the Iterator::Read() methods will
228    * eliminate such NULL-key Read's anyway, so not pushing these joins
229    * should be a non-issue.
230    */
231   for (uint i = 0; i < get_field_referrences_count(); i++) {
232     Field *field = m_referred_fields[i];
233     if (field->is_real_null()) {
234       DBUG_PRINT("info",
235                  ("paramValue is NULL, can not execute as pushed join"));
236       return false;
237     }
238   }
239 
240   return true;
241 }
242 
243 #ifdef WORDS_BIGENDIAN
244 /**
245  * Determine if a specific column type is represented in a format which is
246  * sensitive to the endian format of the underlying platform.
247  */
is_endian_sensible_type(const Field * field)248 static bool is_endian_sensible_type(const Field *field) {
249   const enum_field_types type = field->real_type();
250   switch (type) {
251     // Most numerics are endian sensible, note the int24 though.
252     // Note: Enum dont have its own type, represented as an int.
253     case MYSQL_TYPE_SHORT:
254     case MYSQL_TYPE_LONG:
255     case MYSQL_TYPE_LONGLONG:
256     case MYSQL_TYPE_FLOAT:
257     case MYSQL_TYPE_DOUBLE:
258     // Deprecated temporal types were 8/4 byte integers
259     case MYSQL_TYPE_DATETIME:
260     case MYSQL_TYPE_TIMESTAMP:
261       return true;
262 
263     // The new temporal data types did it right, not endian sensitive
264     case MYSQL_TYPE_NEWDATE:
265     case MYSQL_TYPE_TIME2:
266     case MYSQL_TYPE_DATETIME2:
267     case MYSQL_TYPE_TIMESTAMP2:
268     // The Tiny type is a single byte, so endianness does not matter
269     case MYSQL_TYPE_TINY:
270     // Year is also a 'tiny', single byte
271     case MYSQL_TYPE_YEAR:
272     // Odly enough, The int24 is *not* stored in an endian sensible format
273     case MYSQL_TYPE_INT24:
274     // The (deprecated) Time type was handled as an int24.
275     case MYSQL_TYPE_TIME:
276     // Decimal is basically a char string variant.
277     case MYSQL_TYPE_DECIMAL:
278     case MYSQL_TYPE_NEWDECIMAL:
279     // Other datatypes (char, blob, json, ..) is not an endian concern
280     default:
281       return false;
282   }
283 }
284 #endif
285 
make_query_instance(NdbTransaction * trans,const NdbQueryParamValue * keyFieldParams,uint paramCnt) const286 NdbQuery *ndb_pushed_join::make_query_instance(
287     NdbTransaction *trans, const NdbQueryParamValue *keyFieldParams,
288     uint paramCnt) const {
289   DBUG_TRACE;
290   DBUG_PRINT("info", ("executing chain of %d pushed joins."
291                       " First table is %s, accessed as %s.",
292                       get_operation_count(), get_table(0)->alias,
293                       NdbQueryOperationDef::getTypeName(
294                           m_query_def->getQueryOperation((uint)0)->getType())));
295 
296   const NdbQueryParamValue *paramValues = keyFieldParams;
297 
298   /**
299    * There may be referrences to Field values from tables outside the scope of
300    * our pushed join: These are expected to be supplied as paramValues()
301    * after the keyFieldParams[].
302    */
303   uint outer_fields = get_field_referrences_count();
304   NdbQueryParamValue *extendedParams = NULL;
305   if (unlikely(outer_fields > 0)) {
306     uint size = sizeof(NdbQueryParamValue) * (paramCnt + outer_fields);
307     extendedParams = reinterpret_cast<NdbQueryParamValue *>(my_alloca(size));
308     // Copy specified keyFieldParams[] first
309     for (uint i = 0; i < paramCnt; i++) {
310       new (extendedParams + i) NdbQueryParamValue(keyFieldParams[i]);
311     }
312 
313     // There may be referrences to Field values from tables outside the scope of
314     // our pushed join: These are expected to be supplied as paramValues()
315     for (uint i = 0; i < outer_fields; i++) {
316       Field *field = m_referred_fields[i];
317       DBUG_ASSERT(!field->is_real_null());  // Checked by ::check_if_pushable()
318       uchar *raw = field->field_ptr();
319 
320 #ifdef WORDS_BIGENDIAN
321       if (field->table->s->db_low_byte_first &&
322           is_endian_sensible_type(field)) {
323         const uint32 field_length = field->pack_length();
324         raw = static_cast<uchar *>(my_alloca(field_length));
325 
326         // Byte order is swapped to get the correct endian format.
327         const uchar *last = field->field_ptr() + field_length;
328         for (uint pos = 0; pos < field_length; pos++) raw[pos] = *(--last);
329       }
330 #else
331       // Little endian platforms are expected to be only 'low_byte_first'
332       DBUG_ASSERT(field->table->s->db_low_byte_first);
333 #endif
334 
335       new (extendedParams + paramCnt + i) NdbQueryParamValue(raw, false);
336     }
337     paramValues = extendedParams;
338   }
339 
340   NdbQuery *query = trans->createQuery(&get_query_def(), paramValues);
341   if (unlikely(extendedParams != NULL)) {
342     for (uint i = 0; i < paramCnt + outer_fields; i++) {
343       extendedParams[i].~NdbQueryParamValue();
344     }
345   }
346   return query;
347 }
348 
349 /////////////////////////////////////////
350 
ndb_pushed_builder_ctx(const Thd_ndb * thd_ndb,AQP::Table_access * root)351 ndb_pushed_builder_ctx::ndb_pushed_builder_ctx(const Thd_ndb *thd_ndb,
352                                                AQP::Table_access *root)
353     : m_thd_ndb(thd_ndb),
354       m_plan(*root->get_join_plan()),
355       m_join_root(root),
356       m_join_scope(),
357       m_const_scope(),
358       m_scan_operations(),
359       m_has_pending_cond(),
360       m_internal_op_count(0),
361       m_fld_refs(0),
362       m_builder(nullptr) {}
363 
~ndb_pushed_builder_ctx()364 ndb_pushed_builder_ctx::~ndb_pushed_builder_ctx() {
365   if (m_builder) {
366     m_builder->destroy();
367   }
368 }
369 
getNdbError() const370 const NdbError &ndb_pushed_builder_ctx::getNdbError() const {
371   DBUG_ASSERT(m_builder);
372   return m_builder->getNdbError();
373 }
374 
maybe_pushable(AQP::Table_access * table,join_pushability check)375 bool ndb_pushed_builder_ctx::maybe_pushable(AQP::Table_access *table,
376                                             join_pushability check) {
377   DBUG_TRACE;
378   TABLE *tab = table->get_table();
379 
380   if (tab == nullptr) {
381     // There could be unused tables allocated in the 'plan', skip these
382     return false;
383   }
384 
385   if (tab->s->db_type()->db_type != DB_TYPE_NDBCLUSTER) {
386     // Ignore non-NDBCLUSTER tables.
387     DBUG_PRINT("info",
388                ("Table '%s' not in ndb engine, not pushable", tab->alias));
389     return false;
390   }
391 
392   if (table->get_table()->file->member_of_pushed_join()) {
393     return false;  // Already pushed
394   }
395 
396   uint pushable = table->get_table_properties();
397   if (pushable & PUSHABILITY_KNOWN) {
398     return ((pushable & check) == check);
399   }
400 
401   bool allowed = false;
402   const char *reason = nullptr;
403   pushable = 0;  // Assume not pushable
404 
405   switch (table->get_access_type()) {
406     case AQP::AT_VOID:
407       DBUG_ASSERT(false);
408       reason = "UNKNOWN";
409       break;
410 
411     case AQP::AT_FIXED:
412       reason = "optimized away, or const'ified by optimizer";
413       break;
414 
415     case AQP::AT_UNDECIDED:
416       reason = "Access type was not chosen at 'prepare' time";
417       break;
418 
419     case AQP::AT_OTHER:
420       reason = table->get_other_access_reason();
421       break;
422 
423     default:
424       const ha_ndbcluster *handler =
425           down_cast<ha_ndbcluster *>(table->get_table()->file);
426 
427       if (handler->maybe_pushable_join(reason)) {
428         allowed = true;
429         pushable = PUSHABLE_AS_CHILD | PUSHABLE_AS_PARENT;
430       }
431       break;
432   }  // switch
433 
434   if (reason != nullptr) {
435     DBUG_ASSERT(!allowed);
436     EXPLAIN_NO_PUSH("Table '%s' is not pushable: %s", tab->alias, reason);
437   }
438   table->set_table_properties(pushable | PUSHABILITY_KNOWN);
439   return allowed;
440 }  // ndb_pushed_builder_ctx::maybe_pushable
441 
442 /**
443  * Get *internal* table_no of table referred by 'key_item'
444  */
get_table_no(const Item * key_item) const445 uint ndb_pushed_builder_ctx::get_table_no(const Item *key_item) const {
446   DBUG_ASSERT(key_item->type() == Item::FIELD_ITEM);
447   const uint count = m_plan.get_access_count();
448   const table_map bitmap = key_item->used_tables();
449 
450   for (uint i = 0; i < count; i++) {
451     TABLE *table = m_plan.get_table_access(i)->get_table();
452     if (table != nullptr && table->pos_in_table_list != nullptr) {
453       const table_map map = table->pos_in_table_list->map();
454       if (bitmap & map) {
455         DBUG_ASSERT((bitmap & ~map) == 0);  // No other tables in 'bitmap'
456         return i;
457       }
458     }
459   }
460   return MAX_TABLES;
461 }
462 
463 /**
464  * Get a ndb_table_access_map containg all tables [first..last]
465  */
get_tables_in_range(uint first,uint last)466 static ndb_table_access_map get_tables_in_range(uint first, uint last) {
467   ndb_table_access_map table_map;
468   for (uint i = first; i <= last; i++) {
469     table_map.add(i);
470   }
471   return table_map;
472 }
473 
474 /**
475  * Translate a table_map from external to internal table enumeration
476  */
get_table_map(table_map external_map) const477 ndb_table_access_map ndb_pushed_builder_ctx::get_table_map(
478     table_map external_map) const {
479   ndb_table_access_map internal_map;
480   const uint count = m_plan.get_access_count();
481   table_map bitmap = (external_map & ~PSEUDO_TABLE_BITS);
482 
483   for (uint i = 0; bitmap != 0 && i < count; i++) {
484     TABLE *table = m_plan.get_table_access(i)->get_table();
485     if (table != nullptr && table->pos_in_table_list != nullptr) {
486       const table_map map = table->pos_in_table_list->map();
487       if (bitmap & map) {
488         internal_map.add(i);
489         bitmap &= ~map;  // clear handled table
490       }
491     }
492   }
493   DBUG_ASSERT(bitmap == 0);
494   return internal_map;
495 }
496 
497 /**
498  * Main entry point to build a pushed join having 'join_root'
499  * as it first operation.
500  *
501  * If the root operation is pushable, we append as many 'child'
502  * operations as possible to the pushed join.
503  *
504  * This currently is implemented as a 3 pass algorithm:
505  *
506  *  1) Analyze each child and add it to 'm_join_scope' as
507  *    'pushable' if it qualifies as such. Part of this phase
508  *     is also calculations of possible parents for each table.
509  *
510  *  2) Determine the parent to be used among the set of possible
511  *     parents. This is decided based on simple heuristic where
512  *     the goal is to employ filters as soon as possible, and utilize
513  *     the parallelism of the SPJ block whenever considdered optimal.
514  *
515  *  3) Build the pushed query.
516  *
517  */
make_pushed_join(const ndb_pushed_join * & pushed_join)518 int ndb_pushed_builder_ctx::make_pushed_join(
519     const ndb_pushed_join *&pushed_join) {
520   DBUG_TRACE;
521   pushed_join = NULL;
522 
523   if (is_pushable_with_root()) {
524     int error;
525     error = optimize_query_plan();
526     if (unlikely(error)) return error;
527 
528     error = build_query();
529     if (unlikely(error)) return error;
530 
531     const NdbQueryDef *const query_def = m_builder->prepare(m_thd_ndb->ndb);
532     if (unlikely(query_def == NULL))
533       return -1;  // Get error with ::getNdbError()
534 
535     pushed_join = new ndb_pushed_join(*this, query_def);
536     if (unlikely(pushed_join == NULL)) return HA_ERR_OUT_OF_MEM;
537 
538     DBUG_PRINT("info", ("Created pushed join with %d child operations",
539                         pushed_join->get_operation_count() - 1));
540   }
541   return 0;
542 }  // ndb_pushed_builder_ctx::make_pushed_join()
543 
544 /**
545  * Find the number SPJ operations needed to execute a given access type.
546  * (Unique index lookups are translated to two single table lookups internally.)
547  */
internal_operation_count(AQP::enum_access_type accessType)548 uint internal_operation_count(AQP::enum_access_type accessType) {
549   switch (accessType) {
550     case AQP::AT_PRIMARY_KEY:
551     case AQP::AT_ORDERED_INDEX_SCAN:
552     case AQP::AT_MULTI_PRIMARY_KEY:
553     case AQP::AT_MULTI_MIXED:
554     case AQP::AT_TABLE_SCAN:
555       return 1;
556 
557       // Unique key lookups is mapped to two primary key lookups internally.
558     case AQP::AT_UNIQUE_KEY:
559     case AQP::AT_MULTI_UNIQUE_KEY:
560       return 2;
561 
562     default:
563       // Other access types are not pushable, so seeing them here is an error.
564       DBUG_ASSERT(false);
565       return 2;
566   }
567 }
568 
569 /**
570  * If there is a pushable query starting with 'root'; add as many
571  * child operations as possible to this 'ndb_pushed_builder_ctx' starting
572  * with that join_root.
573  */
is_pushable_with_root()574 bool ndb_pushed_builder_ctx::is_pushable_with_root() {
575   DBUG_TRACE;
576 
577   if (!maybe_pushable(m_join_root, PUSHABLE_AS_PARENT)) {
578     return false;
579   }
580 
581   const uint root_no = m_join_root->get_access_no();
582   const AQP::enum_access_type access_type = m_join_root->get_access_type();
583   DBUG_ASSERT(access_type != AQP::AT_VOID);
584 
585   if (access_type == AQP::AT_MULTI_UNIQUE_KEY) {
586     EXPLAIN_NO_PUSH(
587         "Table '%s' is not pushable, "
588         "access type 'MULTI_UNIQUE_KEY' not implemented",
589         m_join_root->get_table()->alias);
590     return false;
591   }
592 
593   if (m_join_root->filesort_before_join()) {
594     EXPLAIN_NO_PUSH(
595         "Table '%s' is not pushable, "
596         "need filesort before joining child tables",
597         m_join_root->get_table()->alias);
598     return false;
599   }
600 
601   /**
602    * Past this point we know at least root to be pushable as parent
603    * operation. Search remaining tables appendable if '::is_pushable_as_child()'
604    */
605   DBUG_PRINT("info",
606              ("Table %d is pushable as root", m_join_root->get_access_no()));
607   DBUG_EXECUTE("info", m_join_root->dbug_print(););
608   m_fld_refs = 0;
609   m_const_scope.set_prefix(root_no);
610   m_join_scope.add(root_no);
611   m_internal_op_count = internal_operation_count(access_type);
612 
613   /**
614    * Analyze tables below 'm_join_root' as potential members of a pushed
615    * join query starting with root.
616    * As part of analyzing the outer join and semi join structure,
617    * we use the join- and semi-join-nest structures set up by the optimizer,
618    * available trough the Abstract Query Plan (AQP) interface.
619    * See further documentation of how the nest structure is
620    * represented in m_tables[] in ha_ndbcluster_push.h.
621    */
622   {
623     const uint last_table = m_plan.get_access_count() - 1;
624     DBUG_ASSERT(m_plan.get_table_access(0)->get_first_inner() == 0);
625     DBUG_ASSERT(m_plan.get_table_access(0)->get_last_inner() == last_table);
626 
627     ndb_table_access_map upper_nests;
628     ndb_table_access_map inner_nest;
629     ndb_table_access_map sj_nest;
630 
631     uint first_inner = m_join_root->get_first_inner();
632     uint last_inner = m_join_root->get_last_inner();
633     int first_upper = m_join_root->get_first_upper();
634     if (root_no > first_inner) {
635       // m_join_root was not the 'first_inner' in nest;
636       // Last_inner / first_upper is only reliable read at first_inner:
637       last_inner = m_plan.get_table_access(first_inner)->get_last_inner();
638       first_upper = m_plan.get_table_access(first_inner)->get_first_upper();
639     }
640     int first_sj_inner = m_join_root->get_first_sj_inner();
641 
642     m_tables[root_no].m_first_inner = first_inner;
643     m_tables[root_no].m_last_inner = last_inner;
644     m_tables[root_no].m_first_upper = first_upper;
645 
646     for (uint tab_no = root_no; tab_no <= last_table; tab_no++) {
647       AQP::Table_access *table = m_plan.get_table_access(tab_no);
648 
649       // Set up join-nest for this tab_no
650       if (table->get_first_inner() == first_inner) {
651         // Still in the join-nest starting at 'first_inner'
652         m_tables[tab_no] = m_tables[first_inner];
653       } else {
654         DBUG_ASSERT(table->get_first_inner() == tab_no);
655 
656         // Enter new inner nest
657         upper_nests = m_tables[first_inner].m_upper_nests;
658         upper_nests.add(inner_nest);
659         inner_nest.clear_all();
660         first_upper = first_inner;
661         first_inner = tab_no;
662         last_inner = table->get_last_inner();
663 
664         m_tables[first_inner].m_first_inner = first_inner;
665         m_tables[first_inner].m_last_inner = last_inner;
666         m_tables[first_inner].m_first_upper = first_upper;
667         m_tables[first_inner].m_upper_nests = upper_nests;
668       }
669       m_tables[tab_no].m_inner_nest = inner_nest;
670       inner_nest.add(tab_no);
671 
672       /**
673        * Build similar info for sj_nest. Note that sj_nests are not nested
674        * inside other sj_nests. Thus there are no 'upper_sj_nests', and the
675        * logic for leaving a sj_nest becomes much simpler.
676        * (No un-nesting of nests)
677        */
678       if (table->get_first_sj_inner() >= 0) {
679         if (table->get_first_sj_inner() == first_sj_inner) {
680           // still within same sj_nest starting at first_sj_inner.
681         } else if (table->get_first_sj_inner() == (int)tab_no) {
682           // Start new sj_nest
683           first_sj_inner = table->get_first_sj_inner();
684           sj_nest.clear_all();
685         }
686         sj_nest.add(tab_no);
687       } else {
688         // Not in a sj_nest any longer
689         first_sj_inner = -1;
690         sj_nest.clear_all();
691       }
692       m_tables[tab_no].m_sj_nest = sj_nest;
693 
694       /**
695        * Use is_pushable_as_child() to analyze whether this table is
696        * pushable as part of query starting with 'root'. Note that
697        * outer- and semi-joined table scans can not be completely analyzed
698        * by is_pushable_as_child(): Pushability also depends on that all
699        * later tables in the same nest are pushed, and that there are no
700        * unpushed conditions for any (later) tables in this nest.
701        * These extra conditions are later checked by validate_join_nest(),
702        * when the nest is completed. This may cause some tables which passed
703        * the first pushability check, to later fail and be removed. This
704        * also has a cascading effect on any tables depending on those
705        * being removed. (See validate_join_nest() and remove_pushable())
706        */
707       if (table == m_join_root ||         // root, already known pushable
708           is_pushable_as_child(table)) {  // else, check child pushable
709         if (!ndbcluster_is_lookup_operation(table->get_access_type())) {
710           // A pushable table scan, collect in bitmap for later checks
711           m_scan_operations.add(tab_no);
712         }
713       }
714 
715       /**
716        * This table can be the last inner table of join-nest(s).
717        * That will require additional pushability checks of entire nest
718        */
719       if (table->get_last_sj_inner() == (int)tab_no) {
720         if (first_sj_inner > (int)root_no) {  // Leaving the semi_join nest
721           // Phase 2 of pushability check, see big comment above.
722           validate_join_nest(sj_nest, first_sj_inner, tab_no, "semi");
723         }
724         first_sj_inner = -1;
725         sj_nest.clear_all();
726       }
727 
728       /**
729        * Note that the same tab_no may unwind several inner join-nests.
730        * ... all having the same 'last_inner' (this tab_no)
731        */
732       while (tab_no == last_inner &&  // End of current join-nest, and
733              first_upper >= 0) {      // has an embedding upper nest
734         if (first_inner > root_no) {  // Leaving an outer joined nest
735           // Phase 2 of pushability check, see big comment above.
736           validate_join_nest(inner_nest, first_inner, tab_no, "outer");
737         }
738 
739         // The upper_nest becomes our new inner_nest when we 'unwind'.
740         ndb_table_access_map upper_nest(upper_nests);
741         upper_nest.subtract(m_tables[first_upper].m_upper_nests);
742         inner_nest = upper_nest;
743         upper_nests = m_tables[first_upper].m_upper_nests;
744         first_inner = first_upper;
745 
746         /**
747          * Note that we may 'unwind' to a nest level above where we started as
748          * root. m_tables[first_upper] will then not hold the last_inner,
749          * first_upper, so we need to read it from the AQP interface instead.
750          */
751         last_inner = m_plan.get_table_access(first_upper)->get_last_inner();
752         first_upper = m_plan.get_table_access(first_upper)->get_first_upper();
753 
754       }  // while 'leaving a nest'
755     }    // for tab_no [root_no..last_table]
756     DBUG_ASSERT(upper_nests.is_clear_all());
757   }
758   DBUG_ASSERT(m_join_scope.contain(root_no));
759   return (m_join_scope.last_table() > root_no);  // Anything pushed?
760 }  // ndb_pushed_builder_ctx::is_pushable_with_root()
761 
762 /***************************************************************
763  *  is_pushable_as_child()
764  *
765  * Determines if the specified child ('table') can be appended to
766  * an existing chain of previously pushed join operations.
767  *
768  * To be considered pushable the child operation should:
769  *
770  *  1) Have an REF to the previous parent operations.
771  *  2) Refer only a single parent, or a grandparent reachable through
772  *     a single parent common to all key fields in the 'REF'
773  *
774  * In order to increase pushability we use the COND_EQUAL sets
775  * to resolve cases (2) above) where multiple parents are refered.
776  * If needed to make a child pushable, we replace parent
777  * references with another from the COND_EQUAL sets which make
778  * it pushable .
779  ****************************************************************/
is_pushable_as_child(AQP::Table_access * table)780 bool ndb_pushed_builder_ctx::is_pushable_as_child(AQP::Table_access *table) {
781   DBUG_TRACE;
782   const uint root_no = m_join_root->get_access_no();
783   const uint tab_no = table->get_access_no();
784   DBUG_ASSERT(tab_no > root_no);
785 
786   if (!maybe_pushable(table, PUSHABLE_AS_CHILD)) {
787     return false;
788   }
789 
790   const AQP::enum_access_type root_type = m_join_root->get_access_type();
791   const AQP::enum_access_type access_type = table->get_access_type();
792 
793   if (!(ndbcluster_is_lookup_operation(access_type) ||
794         access_type == AQP::AT_ORDERED_INDEX_SCAN)) {
795     EXPLAIN_NO_PUSH(
796         "Can't push table '%s' as child, 'type' must be a 'ref' access",
797         table->get_table()->alias);
798     table->set_table_properties(table->get_table_properties() &
799                                 ~PUSHABLE_AS_CHILD);
800     return false;
801   }
802 
803   // There is a limitation in not allowing LOOKUP - (index)SCAN operations
804   if (access_type == AQP::AT_ORDERED_INDEX_SCAN &&
805       ndbcluster_is_lookup_operation(root_type)) {
806     EXPLAIN_NO_PUSH(
807         "Push of table '%s' as scan-child "
808         "with lookup-root '%s' not implemented",
809         table->get_table()->alias, m_join_root->get_table()->alias);
810     // 'table' may still be PUSHABLE_AS_CHILD with another parent
811     return false;
812   }
813 
814   const uint no_of_key_fields = table->get_no_of_key_fields();
815   if (unlikely(no_of_key_fields > ndb_pushed_join::MAX_LINKED_KEYS)) {
816     EXPLAIN_NO_PUSH(
817         "Can't push table '%s' as child, "
818         "too many ref'ed parent fields",
819         table->get_table()->alias);
820     table->set_table_properties(
821         table->get_table_properties() &
822         ~PUSHABLE_AS_CHILD);  // Permanently disable as child
823     return false;
824   }
825 
826   for (uint i = tab_no; i > root_no; i--) {
827     if (m_plan.get_table_access(i)->uses_join_cache()) {
828       EXPLAIN_NO_PUSH(
829           "Cannot push table '%s' as child of table '%s'. Doing so "
830           "would prevent using join buffer for table '%s'.",
831           table->get_table()->alias, m_join_root->get_table()->alias,
832           m_plan.get_table_access(i)->get_table()->alias);
833       return false;
834     }
835   }
836 
837   // Check that we do not exceed the max number of pushable operations.
838   const uint internal_ops_needed = internal_operation_count(access_type);
839   if (unlikely(m_internal_op_count + internal_ops_needed >
840                NDB_SPJ_MAX_TREE_NODES)) {
841     EXPLAIN_NO_PUSH(
842         "Cannot push table '%s' as child of '%s'. Max number"
843         " of pushable tables exceeded.",
844         table->get_table()->alias, m_join_root->get_table()->alias);
845     return false;
846   }
847   m_internal_op_count += internal_ops_needed;
848 
849   DBUG_PRINT("info",
850              ("Table:%d, Checking %d REF keys", tab_no, no_of_key_fields));
851 
852   /**
853    * Calculate the set of possible parents for each non-const_item KEY_PART
854    * from the table. In addition to the parent table directly referred
855    * by the KEY_PART, any tables in *same join nest*, available by usage of
856    * equality sets are also added as a possible parent.
857    *
858    * The set of 'key_parents[]' are saved for later usage by ::optimize_*(),
859    * which will select the actual parent to be used for each table.
860    *
861    * We also aggregate the set of 'all_parents' referred by the keys.
862    * This is used for checking whether table is pushable.
863    */
864   ndb_table_access_map all_parents;
865   ndb_table_access_map *key_parents =
866       new (*THR_MALLOC) ndb_table_access_map[no_of_key_fields];
867   m_tables[tab_no].m_key_parents = key_parents;
868 
869   for (uint key_part_no = 0; key_part_no < no_of_key_fields; key_part_no++) {
870     const Item *const key_item = table->get_key_field(key_part_no);
871     const KEY_PART_INFO *key_part = table->get_key_part_info(key_part_no);
872 
873     if (key_item->const_item())  // REF is a literal or field from const-table
874     {
875       DBUG_PRINT("info", (" Item type:%d is 'const_item'", key_item->type()));
876       if (!is_const_item_pushable(key_item, key_part)) {
877         return false;
878       }
879     } else if (key_item->type() == Item::FIELD_ITEM) {
880       /**
881        * Calculate all parents FIELD_ITEM may refer - Including those
882        * available through usage of equality sets. All field_parents
883        * will be from within the same join_nest.
884        * Only parents within m_join_scope are considered.
885        */
886       ndb_table_access_map field_parents;
887       if (!is_field_item_pushable(table, key_item, key_part, field_parents)) {
888         return false;
889       }
890       // Save the found key_parents[], aggregate total set of parents referable.
891       key_parents[key_part_no] = field_parents;
892       all_parents.add(field_parents);
893     } else {
894       EXPLAIN_NO_PUSH(
895           "Can't push table '%s' as child, "
896           "column '%s' does neither 'ref' a column nor a constant",
897           table->get_table()->alias, key_part->field->field_name);
898       table->set_table_properties(
899           table->get_table_properties() &
900           ~PUSHABLE_AS_CHILD);  // Permanently disable as child
901 
902       return false;
903     }
904   }  // for (uint key_part_no= 0 ...
905 
906   // If no parent candidates within current m_join_scope, table is unpushable.
907   if (all_parents.is_clear_all()) {
908     EXPLAIN_NO_PUSH(
909         "Can't push table '%s' as child of '%s', "
910         "no parent-child dependency exists between these tables",
911         table->get_table()->alias, m_join_root->get_table()->alias);
912     return false;
913   }
914 
915   /**
916    * Try to push condition to 'table'. Whatever we could not push of the
917    * condition is a 'server side condition' which the server has to
918    * evaluate later. The existence of such conditions may effect the join
919    * pushability of tables, so we need to try to push conditions first.
920    */
921   const Item *pending_cond = table->get_condition();
922   if (pending_cond != nullptr &&
923       current_thd->optimizer_switch_flag(
924           OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN)) {
925     ha_ndbcluster *handler =
926         down_cast<ha_ndbcluster *>(table->get_table()->file);
927 
928     const bool other_tbls_ok = false;
929     handler->m_cond.prep_cond_push(pending_cond, other_tbls_ok);
930     pending_cond = handler->m_cond.m_remainder_cond;
931   }
932   if (pending_cond != nullptr) {
933     /**
934      * An anti join will always have an 'antijoin_null_cond' attached.
935      * The general rule is that we do not allow any tables having unpushed
936      * conditions to be pushed as part of a SPJ operation. However, this
937      * special 'antijoin_null_cond' could be ignored, as the same NULL-only
938      * filtering is done by the antijoin execution at the server.
939      */
940     if (!(table->is_antijoin() && is_antijoin_null_cond(pending_cond))) {
941       m_has_pending_cond.add(tab_no);
942     }
943   }
944   if (!ndbcluster_is_lookup_operation(table->get_access_type())) {
945     // Check extra limitations on when index scan is pushable,
946     if (!is_pushable_as_child_scan(table, all_parents)) {
947       return false;
948     }
949   }
950 
951   const ndb_table_access_map inner_nest(m_tables[tab_no].m_inner_nest);
952   if (!inner_nest.contain(all_parents)) {  // Is not a plain inner-join
953 
954     ndb_table_access_map depend_parents;
955 
956     /**
957      * Some key_parents[] could have dependencies outside of embedding_nests.
958      * Calculate the actual nest dependencies and check join pushability.
959      */
960     for (uint i = 0; i < no_of_key_fields; i++) {
961       if (!key_parents[i].is_clear_all()) {  // Key refers a parent field
962 #ifndef DBUG_OFF
963         // Verify requirement that all field_parents are from within same nest
964         {
965           const uint last = key_parents[i].last_table(tab_no);
966           ndb_table_access_map nest(m_tables[last].m_inner_nest);
967           nest.add(last);
968           DBUG_ASSERT(nest.contain(key_parents[i]));
969         }
970 #endif
971         const uint first = key_parents[i].first_table();
972         depend_parents.add(first);
973       }
974     }
975 
976     /**
977      * In the (unlikely) case of parent references to tables not
978      * in our embedding join nests at all, we have to make sure that we do
979      * not cause extra dependencies to be added between the referred join nests.
980      */
981     const ndb_table_access_map embedding_nests(
982         m_tables[tab_no].embedding_nests());
983     if (unlikely(!embedding_nests.contain(depend_parents))) {
984       if (!is_outer_nests_referable(table, depend_parents)) {
985         return false;
986       }
987     }
988 
989     /**
990      * Calculate contribution to the 'nest dependency', which is the ancestor
991      * dependencies to tables not being part of this inner_nest themself.
992      * These ancestor dependencies are set as the required 'm_ancestors'
993      * on the 'first_inner' table in each nest, and later used to enforce
994      * ::optimize_query_plan() to use these tables as (grand-)parents
995      */
996     const uint first_inner = m_tables[tab_no].m_first_inner;
997     // Only interested in the upper-nest-level dependencies:
998     depend_parents.intersect(m_tables[first_inner].embedding_nests());
999 
1000     // Can these outer parent dependencies co-exists with existing
1001     // ancestor dependencies?
1002     if (!depend_parents.is_clear_all() &&
1003         !m_tables[first_inner].m_ancestors.is_clear_all()) {
1004       ndb_table_access_map nest_dependencies(depend_parents);
1005       nest_dependencies.add(m_tables[first_inner].m_ancestors);
1006 
1007       uint ancestor_no = first_inner;
1008       while (!embedding_nests.contain(nest_dependencies)) {
1009         ancestor_no = nest_dependencies.last_table(ancestor_no - 1);
1010         nest_dependencies.clear_bit(ancestor_no);
1011 
1012         // If remaining dependencies are unavailable from parent, we can't push
1013         if (!m_tables[ancestor_no].embedding_nests().contain(
1014                 nest_dependencies)) {
1015           const AQP::Table_access *const parent =
1016               m_plan.get_table_access(ancestor_no);
1017           EXPLAIN_NO_PUSH(
1018               "Can't push table '%s' as child of '%s', "
1019               "as it would make the parent table '%s' "
1020               "depend on table(s) outside of its join-nest",
1021               table->get_table()->alias, m_join_root->get_table()->alias,
1022               parent->get_table()->alias);
1023           return false;
1024         }
1025       }
1026     }
1027     m_tables[first_inner].m_ancestors.add(depend_parents);
1028     DBUG_ASSERT(!m_tables[first_inner].m_ancestors.contain(first_inner));
1029   }
1030 
1031   m_join_scope.add(tab_no);
1032   return true;
1033 }  // ndb_pushed_builder_ctx::is_pushable_as_child
1034 
1035 /***************************************************************
1036  *  is_pushable_within_nest() / is_pushable_as_child_scan()
1037  *
1038  * There are additional limitation on when an index scan is pushable
1039  * relative to a (single row) primary key or unique key lookup operation.
1040  *
1041  * Such limitations exists for index scan operation being outer- or
1042  * semi-joined: Consider the query:
1043  *
1044  * select * from t1 left join t2
1045  *   on t1.attr=t2.ordered_index
1046  *   where predicate(t1.row, t2. row);
1047  *
1048  * Where 'predicate' cannot be pushed to the ndb. (a 'pending_cond', above!)
1049  * The ndb api may then return:
1050  *
1051  * +---------+---------+
1052  * | t1.row1 | t2.row1 | (First batch)
1053  * | t1.row2 | t2.row1 |
1054  * ..... (NextReq).....
1055  * | t1.row1 | t2.row2 | (Next batch)
1056  * +---------+---------+
1057  *
1058  * Since we could not return all t2 rows matching 't1.row1' in the first
1059  * batch, it is repeated for the next batch of t2 rows. From mysqld POW it
1060  * will appear as a different row, even if it is the same rows as returned
1061  * in the first batch. This works just fine when the nest loop joiner
1062  * create a plain INNER JOIN result; the different instances of 't1.row1'
1063  * would just appear a bit out of order. However OUTER JOIN is a different
1064  * matter:
1065  *
1066  * Assume that the rows [t1.row1, t2.row1] from the first batch does not
1067  * satisfies 'predicate'. As there are no more 't1.row1's in this batch,
1068  * mysqld will conclude it has seen all t1.row1's without any matching
1069  * t2 rows, Thus it will create a NULL extended t2 row in the (outer joined)
1070  * result set.
1071  *
1072  * As the same t1.row1 will be returned from the NDB API in the next batch,
1073  * mysqld will create a result row also for this instance - Either with yet
1074  * another NULL-extended t2 row, or possibly one or multiple matching rows.
1075  * In either case resulting in an incorrect result set. Like:
1076  * +---------+---------+
1077  * | t1.row1 | NULL    | -> Error!
1078  * | t1.row2 | t2.row1 |
1079  * | t1.row1 | t2.row2 |
1080  * +---------+---------+
1081  *
1082  * So in order to allow an outer joined index scan to be pushed, we need
1083  * to check that a row returned from a pushed index-scan will not later
1084  * be rejected by mysqld - i.e. the join has to be fully evaluated by SPJ
1085  * (in companion with the SPJ API):
1086  *
1087  *  1a) There should be no 'pending_cond' (unpushed conditions) on the
1088  *      table.
1089  *  1b) Neither could any *other* tables within the same inner_join nest
1090  *      have pending_cond's. (An inner join nest require matching rows
1091  *      from all tables in the nest: A non-matching pending_cond on a row
1092  *      from any table in the nest, will also eliminate the rows from the
1093  *      other tables. (Possibly creating false NULL-extensions)
1094  *  1c) Neither should any tables within the upper nests have
1095  *      pending_cond's. Consider the nest structure t1, (t2, (t3)),
1096  *      where t1 and 'this' table t3 are scans. If t2 has a pending
1097  *      condition, that condition may eliminate rows from the embedded
1098  *      (outer joined) t3 nest, and result in false NULL extended rows
1099  *      when t3 rows are fetched in multiple batches.
1100  *      (Note that this restriction does not apply to the uppermost nest
1101  *      containing t1: A non-matching condition on that table will eliminate
1102  *      the t1 row as well, thus there will be no extra NULL extended
1103  *      rows in the result set.
1104  *
1105  * 2)   There should be no unpushed tables in:
1106  * 2b)  In this inner_join nest.
1107  * 2c)  In any upper nests of this table.
1108  *
1109  *      This case is similar as for pending_cond: A non-match when mysqld
1110  *      joins in the rows from the unpushed table may eliminate rows
1111  *      returned from the pushed joins as well, resulting in extra
1112  *      NULL extended rows.
1113  *
1114  * 3)   In addition the join condition may explicitly specify dependencies
1115  *      on tables which are not in either of the upper_nests,
1116  *      eg t1, (t2,t3), (t4), where t4 has a join condition on t3.
1117  *      If either:
1118  * 3a)  t2 or t3 has an unpushed condition, possibly eliminating returned
1119  *      (t2,t3) rows, and the t4 rows depending on these being NOT NULL.
1120  * 3b)  t2 or t3 are not pushed, mysqld doesn't matching rows from these
1121  *      tables, which also eliminate the t4 rows, possibly resulting in
1122  *      extra NULL extended rows.
1123  *
1124  * Note that ::is_pushable_as_child_scan() can only check these conditions for
1125  * tables preceeding it in the query plan. ::validate_join_nest() will later
1126  * do similar checks when we have completed a nest level. The later check
1127  * would be sufficient, however we prefer to 'fail fast'.
1128  *
1129  ****************************************************************/
is_pushable_within_nest(const AQP::Table_access * table,ndb_table_access_map nest,const char * nest_type)1130 bool ndb_pushed_builder_ctx::is_pushable_within_nest(
1131     const AQP::Table_access *table, ndb_table_access_map nest,
1132     const char *nest_type) {
1133   DBUG_TRACE;
1134   DBUG_ASSERT(!ndbcluster_is_lookup_operation(table->get_access_type()));
1135   const uint tab_no = table->get_access_no();
1136 
1137   // Logic below assume that 'this' table is not part of the 'nest'.
1138   nest.clear_bit(tab_no);
1139 
1140   /**
1141    * 1) Check if outer- or semi-joined table depends on 'unpushed condition'
1142    */
1143   if (unlikely(m_has_pending_cond.contain(tab_no))) {  // 1a)
1144     // This table has unpushed condition
1145     EXPLAIN_NO_PUSH(
1146         "Can't push %s joined table '%s' as child of '%s', "
1147         "table condition can not be fully evaluated by pushed join",
1148         nest_type, table->get_table()->alias, m_join_root->get_table()->alias);
1149     return false;
1150   }
1151 
1152   if (unlikely(m_has_pending_cond.is_overlapping(nest))) {  // 1b,1c:
1153     // Other (lookup tables) withing nest has unpushed condition
1154     ndb_table_access_map pending_conditions(m_has_pending_cond);
1155     pending_conditions.intersect(nest);
1156     // Report the closest violating table, may be multiple.
1157     const uint violating = pending_conditions.last_table(tab_no);
1158     EXPLAIN_NO_PUSH(
1159         "Can't push %s joined table '%s' as child of '%s', "
1160         "condition on its dependant table '%s' is not pushed down",
1161         nest_type, table->get_table()->alias, m_join_root->get_table()->alias,
1162         m_plan.get_table_access(violating)->get_table()->alias);
1163     return false;
1164   }
1165 
1166   /**
1167    * 2) Check if outer- or semi-joined table depends on 'unpushed tables'
1168    */
1169   if (unlikely(!m_join_scope.contain(nest))) {  // 2b,2c
1170     ndb_table_access_map unpushed_tables(nest);
1171     unpushed_tables.subtract(m_join_scope);
1172     // Report the closest unpushed table, may be multiple.
1173     const uint violating = unpushed_tables.last_table(tab_no);
1174     EXPLAIN_NO_PUSH(
1175         "Can't push %s joined table '%s' as child of '%s', "
1176         "table '%s' in its dependant join-nest(s) is not part of the "
1177         "pushed join",
1178         nest_type, table->get_table()->alias, m_join_root->get_table()->alias,
1179         m_plan.get_table_access(violating)->get_table()->alias);
1180     return false;
1181   }
1182   return true;
1183 }
1184 
is_pushable_as_child_scan(const AQP::Table_access * table,const ndb_table_access_map all_parents)1185 bool ndb_pushed_builder_ctx::is_pushable_as_child_scan(
1186     const AQP::Table_access *table, const ndb_table_access_map all_parents) {
1187   DBUG_TRACE;
1188   DBUG_ASSERT(!ndbcluster_is_lookup_operation(table->get_access_type()));
1189 
1190   const uint root_no = m_join_root->get_access_no();
1191   const uint tab_no = table->get_access_no();
1192 
1193   if (m_tables[tab_no].isOuterJoined(m_tables[root_no])) {
1194     /**
1195      * Is an outer join relative to root. Even if tab_no is inner_joined with
1196      * another parent than 'root', any restrictions on scan operations still
1197      * apply.
1198      */
1199 
1200     /**
1201      * Online upgrade, check if we are connected to a 'ndb' allowing us to push
1202      * outer joined scan operation (ver >= 8.0.20), Else we reject pushing.
1203      */
1204     if (unlikely(!NdbQueryBuilder::outerJoinedScanSupported(m_thd_ndb->ndb))) {
1205       EXPLAIN_NO_PUSH(
1206           "Can't push table '%s' as child of '%s', "
1207           "outer join of scan-child not implemented",
1208           table->get_table()->alias, m_join_root->get_table()->alias);
1209       return false;
1210     }
1211 
1212     /**
1213      * Calculate the set of tables being outer joined relative to root.
1214      * i.e. the tables which may be incorrectly NULL extended due to
1215      * unpushed conditions and tables. These are the tables we check
1216      * the above 1b,1c,2b and 2c cases against.
1217      */
1218     ndb_table_access_map outer_join_nests(m_tables[tab_no].embedding_nests());
1219     outer_join_nests.subtract(full_inner_nest(root_no, tab_no));
1220 
1221     const char *join_type = table->is_antijoin() ? "anti" : "outer";
1222     if (!is_pushable_within_nest(table, outer_join_nests, join_type)) {
1223       return false;
1224     }
1225 
1226     /**
1227      * 3) Check if any tables outside of the embedding nest are referred.
1228      */
1229     const ndb_table_access_map embedding_nests(
1230         m_tables[tab_no].embedding_nests());
1231     if (unlikely(!embedding_nests.contain(all_parents))) {           // 3)
1232       if (unlikely(!embedding_nests.contain(m_has_pending_cond))) {  // 3a)
1233         EXPLAIN_NO_PUSH(
1234             "Can't push %s joined table '%s' as child of '%s', "
1235             "exists unpushed condition in join-nests it depends on",
1236             join_type, table->get_table()->alias,
1237             m_join_root->get_table()->alias);
1238         return false;
1239       }
1240 
1241       // Calculate all unpushed tables prior to this table.
1242       ndb_table_access_map unpushed_tables;
1243       unpushed_tables.set_prefix(tab_no);
1244       unpushed_tables.subtract(m_const_scope);
1245       unpushed_tables.subtract(m_join_scope);
1246 
1247       /**
1248        * Note that the check below is a bit too strict, we check:
1249        *  'Are there any unpushed tables outside of our embedding nests',
1250        *  instead of 'Do we refer tables from nests outside embedding nests,
1251        *  having unpushed tables'. As we alread know 'all_parents' are not
1252        *  contained in 'embedding'.
1253        * The outcome should be the same except if we have parent refs to
1254        * multiple non-embedded nests. (very unlikely)
1255        */
1256       if (unlikely(!embedding_nests.contain(unpushed_tables))) {  // 3b)
1257         EXPLAIN_NO_PUSH(
1258             "Can't push %s joined table '%s' as child of '%s', "
1259             "table depends on join-nests with unpushed tables",
1260             join_type, table->get_table()->alias,
1261             m_join_root->get_table()->alias);
1262         return false;
1263       }
1264     }
1265   }  // end 'outer joined scan'
1266 
1267   /**
1268    * As for outer joins, there are restrictions for semi joins:
1269    *
1270    * Scan-scan result may return the same ancestor-scan rowset
1271    * multiple times when rowset from child scan has to be fetched
1272    * in multiple batches (as above). This is fine for nested loop
1273    * evaluations of pure loops, as it should just produce the total
1274    * set of join combinations - in any order.
1275    *
1276    * However, the different semi join strategies (FirstMatch,
1277    * Loosescan, Duplicate Weedout) requires that skipping
1278    * a row (and its nested loop ancestors) is 'permanent' such
1279    * that it will never reappear in later batches.
1280    *
1281    * So we do not (yet) allow an index-scan to be semi-joined.
1282    *
1283    * Note that it is the semi_join properties relative to the
1284    * other tables we join with which matter - A table joining
1285    * with another table within the same semi_join nest is an
1286    * INNER JOIN wrt. that other table. (Which is pushable)
1287    */
1288 
1289   if (table->is_sj_firstmatch() &&
1290       NdbQueryBuilder::outerJoinedScanSupported(m_thd_ndb->ndb)) {
1291     // 'table' is part of a semi-join
1292     // (We support semi-join only if firstMatch strategy is used)
1293     DBUG_ASSERT(m_tables[tab_no].m_sj_nest.contain(table->get_access_no()));
1294 
1295     if (!is_pushable_within_nest(table, m_tables[tab_no].m_sj_nest, "semi")) {
1296       return false;
1297     }
1298     if (table->get_first_sj_inner() == (int)tab_no) {
1299       /**
1300        * In order to do correct firstmatch duplicate elimination in
1301        * SPJ, we need to ensure that the table to eliminate duplicates
1302        * from is the parent of the firstmast-sj-nest -> enforce it
1303        * as a mandatory ancestor of the sj-nest.
1304        */
1305       const int firstmatch_return = table->get_firstmatch_return();
1306       if (!all_parents.contain(firstmatch_return)) {
1307         EXPLAIN_NO_PUSH(
1308             "Can't push table '%s' as child of '%s', "
1309             "the FirstMatch-return '%s' can not be made the parent of sj-nest",
1310             table->get_table()->alias, m_join_root->get_table()->alias,
1311             m_plan.get_table_access(firstmatch_return)->get_table()->alias);
1312         return false;
1313       }
1314       m_tables[tab_no].m_ancestors.add(firstmatch_return);
1315     }
1316   } else if (!m_tables[tab_no].m_sj_nest.is_clear_all()) {
1317     if (!m_tables[tab_no].m_sj_nest.contain(m_join_scope)) {
1318       // Semi-joined relative to some other tables in join_scope
1319       EXPLAIN_NO_PUSH(
1320           "Can't push table '%s' as child of '%s', "
1321           "semi join of scan-child not implemented",
1322           table->get_table()->alias, m_join_root->get_table()->alias);
1323       return false;
1324     }
1325   } else if (!m_tables[root_no].m_sj_nest.is_clear_all()) {
1326     // Root is part of a semi join, table is not
1327     EXPLAIN_NO_PUSH(
1328         "Can't push table '%s' as child of '%s', "
1329         "not members of same semi join 'nest'",
1330         table->get_table()->alias, m_join_root->get_table()->alias);
1331     return false;
1332   }
1333   // end 'semi_join' handling
1334 
1335   /**
1336    * Note, for both 'outer join', and 'semi joins restriction above:
1337    *
1338    * The restriction could have been lifted if we could
1339    * somehow ensure that all rows from a child scan are fetched
1340    * before we move to the next ancestor row.
1341    *
1342    * Which is why we do not force the same restrictions on lookup.
1343    */
1344 
1345   return true;
1346 }  // ndb_pushed_builder_ctx::is_pushable_as_child_scan
1347 
1348 /***************************************************************
1349  *
1350  * is_outer_nests_referable()
1351  *
1352  * In the (unlikely) case of parent references to tables not
1353  * in our embedding join nests, we have to make sure that we do
1354  * not cause extra dependencies to be added between the join nests.
1355  * (Which would have changed the join semantic specified in query)
1356  *
1357  * If this table has multiple dependencies, it can only be added to
1358  * the set of pushed tables if the dependent tables themself
1359  * depends, or could be make dependent, on each other.
1360  *
1361  * Such new dependencies can only be added iff all 'depend_parents'
1362  * are in the same 'inner join nest', i.e. we can not add *new*
1363  * dependencies on outer joined tables (or nests).
1364  *
1365  * A typical example is t1 oj (t2) oj (t3) oj (t4), where t4.join_cond
1366  * refers *both* the non_embedding tables t2 and t3. In such cases t4 can not
1367  * be pushed unless t3 already has a join condition depending on t2.
1368  * Note that the header file ha_ndbcluster_push.h contains more
1369  * extensive comments regarding this.
1370  *
1371  * Algorithm:
1372  * 1. Calculate the minimum set of 'dependencies' for the
1373  *    key_parents[].
1374  *
1375  * 2. Check the 'dependencies' set, starting at the last (the
1376  *    table closest to this table). Check that it either already
1377  *    exists a dependency between each such table and the remaining
1378  *    dependant tables, or that we are allowed to add the required
1379  *    dependencies.
1380  ***************************************************************/
is_outer_nests_referable(const AQP::Table_access * table,const ndb_table_access_map depend_parents)1381 bool ndb_pushed_builder_ctx::is_outer_nests_referable(
1382     const AQP::Table_access *table, const ndb_table_access_map depend_parents) {
1383   DBUG_TRACE;
1384 
1385   const uint tab_no = table->get_access_no();
1386   const uint first_inner = m_tables[tab_no].m_first_inner;
1387   const ndb_table_access_map embedding_nests(
1388       m_tables[tab_no].embedding_nests());
1389   DBUG_ASSERT(!embedding_nests.contain(depend_parents));
1390 
1391   /**
1392    * Include nest-level ancestor dependencies already enforced.
1393    */
1394   ndb_table_access_map dependencies(depend_parents);
1395   dependencies.add(m_tables[first_inner].m_ancestors);
1396 
1397   /**
1398    * Check that all parents we depend on are available from within the
1399    * embedding nests. This include upper_nests previously extended
1400    * with previous references to tables not in the direct line of
1401    * upper nests. Which then become a part of later embedded_nests being
1402    * referrable.
1403    */
1404   {
1405     const uint parent_no = dependencies.last_table(tab_no - 1);
1406     dependencies.clear_bit(parent_no);
1407 
1408     // If remaining dependencies are unavailable from parent, we can't push
1409     if (!m_tables[parent_no].embedding_nests().contain(dependencies)) {
1410       const AQP::Table_access *const parent =
1411           m_plan.get_table_access(parent_no);
1412       EXPLAIN_NO_PUSH(
1413           "Can't push table '%s' as child of '%s', "
1414           "as it would make the parent table '%s' "
1415           "depend on table(s) outside of its join-nest",
1416           table->get_table()->alias, m_join_root->get_table()->alias,
1417           parent->get_table()->alias);
1418       return false;
1419     }
1420 
1421     /**
1422      * Allow all tables in the referred parents nest to become
1423      * part of the set of later referrable upper_nests.
1424      */
1425     if (parent_no < first_inner) {
1426       // referred nest is not embedded within current inner_nest
1427       DBUG_ASSERT(m_tables[parent_no].m_last_inner < tab_no);
1428 
1429       /**
1430        * Referring the outer-joined parent, introduce the requirement
1431        * that all our upper_nest table either has to be in the same
1432        * inner_nest as the parent, or be in the parents upper_nest.
1433        * Rebuild our upper_nests to reflect this.
1434        */
1435       ndb_table_access_map new_upper_nests(m_tables[parent_no].m_upper_nests);
1436       new_upper_nests.add(full_inner_nest(parent_no, tab_no));
1437       m_tables[first_inner].m_upper_nests = new_upper_nests;
1438       m_tables[tab_no].m_upper_nests = new_upper_nests;
1439     }
1440   }
1441   return true;
1442 }
1443 
1444 /*****************************************************************************
1445  * validate_join_nest()
1446  *
1447  * A join-nest has been completed by ::is_pushable_with_root().
1448  * If the join nest is outer joined with other tables in the pushed join, and
1449  * if this nest, or other nests embedded within it contains (outer joined)
1450  * table scans, an extra 'validate' of the pushed joins is required:
1451  *
1452  * We need to 'validate' that none of these 'invalid' cases exists for
1453  * the join nest:
1454  *
1455  *  1) Some of the tables in the nest were not pushed.
1456  *  2) Some of the pushed tables in the nest has (remaining parts of)
1457  *     conditions not being pushed.
1458  *  3) This nest, or some nests embedded within it, has a 'FOUND_MATCH' trigger,
1459  *     condition covering tables in this nest. (Which effectively means the
1460  *     condition act as a filter condition on this nest, as in 2) )
1461  *
1462  * The above restriction are similar to the ones checked for outer joined
1463  * table scans in is_pushable_as_child(), where we preferably try to catch
1464  * these restrictions. However, at that point in time we are not able to
1465  * perform this check for tables later in the query plan.
1466  *
1467  * So we need similar checks for validating the entire nest when it has been
1468  * completed. If the nest fails the 'validate', no outer joined table scans
1469  * should have been pushed as part of the nest, or in nests embedded within
1470  * this nest. Thus they have to be removed from the pushed join.
1471  * (Using ::remove_pushable())
1472  *
1473  * Note that validate_join_nest() check the entire nest, so the similar
1474  * checks on outer joined scans could have been skipped from
1475  * is_pushable_as_child(). However, we want to catch these non pushable
1476  * tables as early as possible, so we effectively duplicates these checks.
1477  ******************************************************************************/
validate_join_nest(ndb_table_access_map inner_nest,const uint first_inner,const uint last_inner,const char * nest_type)1478 void ndb_pushed_builder_ctx::validate_join_nest(ndb_table_access_map inner_nest,
1479                                                 const uint first_inner,
1480                                                 const uint last_inner,
1481                                                 const char *nest_type) {
1482   DBUG_TRACE;
1483   if (first_inner <= m_join_root->get_access_no()) return;
1484 
1485   // This nest, or nests embedded within it, has scan operations?
1486   const bool nest_has_scans =
1487       (m_scan_operations.first_table(first_inner) < m_plan.get_access_count());
1488   if (nest_has_scans) {
1489     ndb_table_access_map filter_cond;
1490 
1491     /**
1492      * Check conditions inside nest(s) for possible FOUND_MATCH-triggers.
1493      * These are effectively evaluated 'higher up' in the nest structure
1494      * when we have found a join-match, or created a null-extension
1495      * for all 'used_tables()' in the trigger condition.
1496      * So we collect the aggregated map of tables possibly affected by
1497      * these MATCH-filters in 'filter_cond'
1498      *
1499      * Example: select straight_join *
1500      *          from
1501      *            t1 left join
1502      *              (t1 as t2 join t1 as t3 on t3.a = t2.b)
1503      *            on t2.a = t1.b
1504      *          where (t2.c > t1.c or t1.c < 0);
1505      *
1506      * or: 't1 oj (t2,t3) where t2.c > t1.c or t1.c < 0'
1507      *
1508      * The where condition refers columns from the outer joined nest (t2,t3)
1509      * which are possibly NULL extended. Thus, the where cond is encapsulated in
1510      * a triggered-FOUND_MATCH(t2,t3), effectively forcing the cond. to be
1511      * evaluated only when we have a non-NULL extended match for t2,t3.
1512      * For some (legacy?) reason the optimizer will attach the trigger condition
1513      * to table t2 in the query plan 't1,t2,t3', as all referred tables(t1,t2)
1514      * are available at this point.
1515      * However, this ignores the encapsulating FOUND_MATCH(t2,t3) trigger,
1516      * which require the condition to also have a matching t3 row. The
1517      * WalkItem below will identify such triggers and calculate the real table
1518      * coverage of them.
1519      *
1520      * Note that 'explain format=tree' will represent such filters in a more
1521      * sensible way: (We don't use the Iterators here (yet) though)
1522      *
1523      * -> Filter: ((t2.c > t1.c) or (t1.c < 0))
1524      *   -> Nested loop left join
1525      *     -> Table scan on t1
1526      *     -> Nested loop inner join
1527      *       -> Index lookup on t2 using PRIMARY (a=t1.b),
1528      *       -> Index lookup on t3 using PRIMARY (a=t2.b)
1529      *
1530      * The Iterators place the filter on 'top of' the t1..t3 evaluation.
1531      * The FOUND_MATCH(t2,t3) has also been eliminated, as we know there is
1532      * a (t2,t3) match at this point of execution.
1533      */
1534     for (uint tab_no = first_inner; tab_no <= last_inner; tab_no++) {
1535       AQP::Table_access *table = m_plan.get_table_access(tab_no);
1536       const Item *cond = table->get_condition();
1537       if (cond != nullptr) {
1538         // Condition could possibly be a 'antijoin_null_cond', in which case
1539         // the pending_cond flag has been cleared, it should then be ignored.
1540         if (m_join_scope.contain(tab_no) && !m_has_pending_cond.contain(tab_no))
1541           continue;
1542 
1543         struct {
1544           table_map nest_scope;   // Aggregated 'inner_tables' scope of triggers
1545           table_map found_match;  // FOUND_MATCH-trigger scope
1546         } trig_cond = {0, 0};
1547 
1548         // Check 'cond' for match trigger / filters
1549         WalkItem(const_cast<Item *>(cond), enum_walk::PREFIX,
1550                  [&trig_cond](Item *item) {
1551                    const Item_func_trig_cond *func_trig =
1552                        GetTriggerCondOrNull(item);
1553                    if (func_trig != nullptr) {
1554                      /**
1555                       * The FOUND_MATCH-trigger may be encapsulated inside
1556                       * multiple IS_NOT_NULL_COMPL-triggers, which defines
1557                       * the scope of the triggers. Aggregate these
1558                       * 'inner_tables' scopes.
1559                       */
1560                      trig_cond.nest_scope |= func_trig->get_inner_tables();
1561 
1562                      if (func_trig->get_trig_type() ==
1563                          Item_func_trig_cond::FOUND_MATCH) {
1564                        // The FOUND_MATCH-trigger is evaluated on top of
1565                        // the collected trigger nest_scope.
1566                        trig_cond.found_match |= trig_cond.nest_scope;
1567                        return true;  // break out of this cond-branch
1568                      }
1569                    }
1570                    return false;  // continue WalkItem
1571                  });              // End WalkItem' and lambda func
1572 
1573         if (trig_cond.found_match != 0) {
1574           const ndb_table_access_map map = get_table_map(trig_cond.found_match);
1575 
1576           /**
1577            * Only FOUND_MATCH-triggers partly overlapping join_scope will
1578            * restrict push. (Else it is completely evaluated either before
1579            * or after the pushed_join, thus does not affect it.
1580            */
1581           if (map.is_overlapping(m_join_scope) && !map.contain(m_join_scope)) {
1582             filter_cond.add(map);
1583           }
1584         }
1585       }
1586     }
1587 
1588     // Check each of the 3 reject reasons from the topmost comment
1589     const bool nest_has_unpushed = !m_join_scope.contain(inner_nest);
1590     const bool nest_has_filter_cond = inner_nest.is_overlapping(filter_cond);
1591     const bool nest_has_pending_cond =
1592         inner_nest.is_overlapping(m_has_pending_cond);
1593 
1594     if (nest_has_pending_cond || nest_has_unpushed || nest_has_filter_cond) {
1595       /**
1596        * Check all pushed scan operations in this nest, and nests embedded
1597        * within it. Note that it is the rows from scans in the upper nest
1598        * which may be repeated, creating false NULL extended rows from scans
1599        * in inner_nests.
1600        */
1601       for (uint tab_no = m_scan_operations.first_table(first_inner);
1602            tab_no <= last_inner;
1603            tab_no = m_scan_operations.first_table(tab_no + 1)) {
1604         DBUG_ASSERT(m_join_scope.contain(tab_no));
1605         const AQP::Table_access *const table = m_plan.get_table_access(tab_no);
1606 
1607         /**
1608          * Could have checked all 3 reject conditions at once, but would
1609          * like to provide separate EXPLAIN_NO_PUSH's for each of them.
1610          */
1611         if (nest_has_unpushed) {
1612           EXPLAIN_NO_PUSH(
1613               "Can't push %s joined table '%s' as child of '%s', "
1614               "some tables in embedding join-nest(s) are not part of pushed "
1615               "join",
1616               nest_type, table->get_table()->alias,
1617               m_join_root->get_table()->alias);
1618           remove_pushable(table);
1619         } else if (nest_has_pending_cond) {
1620           EXPLAIN_NO_PUSH(
1621               "Can't push %s joined table '%s' as child of '%s', "
1622               "join-nest containing the table has pending unpushed_conditions",
1623               nest_type, table->get_table()->alias,
1624               m_join_root->get_table()->alias);
1625           remove_pushable(table);
1626         } else if (nest_has_filter_cond) {
1627           EXPLAIN_NO_PUSH(
1628               "Can't push %s joined table '%s' as child of '%s', "
1629               "join-nest containing the table has a FILTER conditions",
1630               nest_type, table->get_table()->alias,
1631               m_join_root->get_table()->alias);
1632           remove_pushable(table);
1633         }
1634       }
1635     }
1636   }  // nest_has_scans
1637 }  // ndb_pushed_builder_ctx::validate_join_nest
1638 
1639 /**********************************************************************
1640  * ::remove_pushable()
1641  *
1642  * A Table was first included in a pushed join query, but later found to
1643  * not be pushable. Thus it has to be removed by this method.
1644  *
1645  * All other pushed tables are checked for dependencies on the table
1646  * being removed, and possible cascade-removed if they can no longer
1647  * be part of the pushed join without the romoved table(s).
1648  **********************************************************************/
remove_pushable(const AQP::Table_access * table)1649 void ndb_pushed_builder_ctx::remove_pushable(const AQP::Table_access *table) {
1650   DBUG_TRACE;
1651 
1652   const uint me = table->get_access_no();
1653   DBUG_ASSERT(m_join_scope.contain(me));
1654   m_join_scope.clear_bit(me);
1655 
1656   // Cascade remove of tables depending on 'me'
1657   for (uint tab_no = me + 1; tab_no < m_plan.get_access_count(); tab_no++) {
1658     if (m_join_scope.contain(tab_no)) {
1659       table = m_plan.get_table_access(tab_no);
1660       ndb_table_access_map *key_parents = m_tables[tab_no].m_key_parents;
1661 
1662       for (uint i = 0; i < table->get_no_of_key_fields(); i++) {
1663         if (!key_parents[i].is_clear_all()) {
1664           // Was referring some parent field(s) (not const, or params)
1665           // Remove parent referrences not in join_scope any more
1666           key_parents[i].intersect(m_join_scope);
1667 
1668           if (key_parents[i].is_clear_all()) {
1669             // All preceeding parent tables removed from join_scope.
1670             m_join_scope.clear_bit(tab_no);  // Cascade remove of this table
1671             break;
1672           }
1673         }
1674       }
1675     }
1676     m_tables[tab_no].m_ancestors.intersect(m_join_scope);
1677   }
1678   // Remove 'pending_cond' and 'scan_operations' not pushed any longer
1679   m_has_pending_cond.intersect(m_join_scope);
1680   m_scan_operations.intersect(m_join_scope);
1681 }  // ndb_pushed_builder_ctx::remove_pushable
1682 
1683 /*********************
1684  * This method examines a key item (could be part of a lookup key or a scan
1685  * bound) for a table access operation and calculates the set of possible
1686  * parents. (These are possible parent table access operations in the query
1687  * tree that will be pushed to the ndb.)
1688  *
1689  * @param[in] table The table access operation to which the key item belongs.
1690  * @param[in] key_item The key_item to examine
1691  * @param[in] key_part Metatdata about the key item.
1692  * @param[out] field_parents The set of possible parents for 'key_item'
1693  * ('join_root' if keys are constant).
1694  * @return True if at least one possible parent was found. (False means that
1695  * operation cannot be pushed).
1696  */
is_field_item_pushable(AQP::Table_access * table,const Item * key_item,const KEY_PART_INFO * key_part,ndb_table_access_map & field_parents)1697 bool ndb_pushed_builder_ctx::is_field_item_pushable(
1698     AQP::Table_access *table, const Item *key_item,
1699     const KEY_PART_INFO *key_part, ndb_table_access_map &field_parents) {
1700   DBUG_TRACE;
1701   const uint tab_no = table->get_access_no();
1702   DBUG_ASSERT(key_item->type() == Item::FIELD_ITEM);
1703 
1704   const Item_field *const key_item_field =
1705       static_cast<const Item_field *>(key_item);
1706 
1707   DBUG_PRINT(
1708       "info",
1709       ("keyPart:%d, field:%s.%s", (int)(key_item - table->get_key_field(0)),
1710        key_item_field->field->table->alias, key_item_field->field->field_name));
1711 
1712   if (!key_item_field->field->eq_def(key_part->field)) {
1713     EXPLAIN_NO_PUSH(
1714         "Can't push table '%s' as child, "
1715         "column '%s' does not have same datatype as ref'ed "
1716         "column '%s.%s'",
1717         table->get_table()->alias, key_part->field->field_name,
1718         key_item_field->field->table->alias, key_item_field->field->field_name);
1719     table->set_table_properties(
1720         table->get_table_properties() &
1721         ~PUSHABLE_AS_CHILD);  // Permanently disable as child
1722     return false;
1723   }
1724 
1725   if (key_item_field->field->is_virtual_gcol()) {
1726     EXPLAIN_NO_PUSH("Can't push condition on virtual generated column '%s.%s'",
1727                     key_item_field->field->table->alias,
1728                     key_item_field->field->field_name);
1729     return false;
1730   }
1731 
1732   /**
1733    * Below this point 'key_item_field' is a candidate for refering a parent
1734    * table in a pushed join. It should either directly refer a parent common to
1735    * all FIELD_ITEMs, or refer a grandparent of this common parent. There are
1736    * different cases which should be handled:
1737    *
1738    *  1) 'key_item_field' may already refer one of the parent available within
1739    * our pushed scope. 2)  By using the equality set, we may find alternative
1740    * parent references which may make this a pushed join.
1741    */
1742 
1743   ///////////////////////////////////////////////////////////////////
1744   // 0) Prepare for calculating parent candidates for this FIELD_ITEM
1745   //
1746   field_parents.clear_all();
1747 
1748   ////////////////////////////////////////////////////////////////////
1749   // 1) Add our existing parent reference to the set of parent candidates
1750   //
1751   const uint referred_table_no = get_table_no(key_item_field);
1752   if (m_join_scope.contain(referred_table_no)) {
1753     field_parents.add(referred_table_no);
1754   }
1755 
1756   //////////////////////////////////////////////////////////////////
1757   // 2) Use the equality set to possibly find more parent candidates
1758   //    usable by substituting existing 'key_item_field'
1759   //
1760   Item_equal *item_equal = table->get_item_equal(key_item_field);
1761   if (item_equal) {
1762     AQP::Equal_set_iterator equal_iter(*item_equal);
1763     const Item_field *substitute_field;
1764     while ((substitute_field = equal_iter.next()) != NULL) {
1765       if (substitute_field != key_item_field) {
1766         const uint substitute_table_no = get_table_no(substitute_field);
1767         if (m_join_scope.contain(substitute_table_no)) {
1768           DBUG_PRINT("info",
1769                      (" join_items[%d] %s.%s can be replaced with %s.%s",
1770                       (int)(key_item - table->get_key_field(0)),
1771                       get_referred_table_access_name(key_item_field),
1772                       get_referred_field_name(key_item_field),
1773                       get_referred_table_access_name(substitute_field),
1774                       get_referred_field_name(substitute_field)));
1775 
1776           field_parents.add(substitute_table_no);
1777         }
1778       }
1779     }  // while(substitute_field != NULL)
1780   }
1781   if (!field_parents.is_clear_all()) {
1782     return true;
1783   }
1784 
1785   if (m_const_scope.contain(referred_table_no)) {
1786     // This key item is const. and did not cause the set of possible parents
1787     // to be recalculated. Reuse what we had before this key item.
1788     DBUG_ASSERT(field_parents.is_clear_all());
1789 
1790     /**
1791      * Field referrence is a 'paramValue' to a column value evaluated
1792      * prior to the root of this pushed join candidate. Some restrictions
1793      * applies to when a field reference is allowed in a pushed join:
1794      */
1795     if (ndbcluster_is_lookup_operation(m_join_root->get_access_type())) {
1796       /**
1797        * EQRefIterator may optimize away key reads if the key
1798        * for a requested row is the same as the previous.
1799        * Thus, iff this is the root of a pushed lookup join
1800        * we do not want it to contain childs with references
1801        * to columns 'outside' the the pushed joins, as these
1802        * may still change between calls to
1803        * EQRefIterator::Read() independent of the root key
1804        * itself being the same.
1805        */
1806       EXPLAIN_NO_PUSH(
1807           "Cannot push table '%s' as child of '%s', since "
1808           "it referes to column '%s.%s' prior to a "
1809           "potential 'const' root.",
1810           table->get_table()->alias, m_join_root->get_table()->alias,
1811           get_referred_table_access_name(key_item_field),
1812           get_referred_field_name(key_item_field));
1813       return false;
1814     } else {
1815       /**
1816        * Scan queries cannot be pushed if the pushed query may refer column
1817        * values (paramValues) from rows stored in a join cache.
1818        */
1819       const TABLE *const referred_tab = key_item_field->field->table;
1820       uint access_no = tab_no;
1821       do {
1822         if (m_plan.get_table_access(access_no)->uses_join_cache()) {
1823           EXPLAIN_NO_PUSH(
1824               "Cannot push table '%s' as child of '%s', since "
1825               "it referes to column '%s.%s' which will be stored "
1826               "in a join buffer.",
1827               table->get_table()->alias, m_join_root->get_table()->alias,
1828               get_referred_table_access_name(key_item_field),
1829               get_referred_field_name(key_item_field));
1830           return false;
1831         }
1832         DBUG_ASSERT(access_no > 0);
1833         access_no--;
1834       } while (m_plan.get_table_access(access_no)->get_table() != referred_tab);
1835 
1836     }  // if (!ndbcluster_is_lookup_operation(root_type)
1837     return true;
1838   } else {
1839     EXPLAIN_NO_PUSH(
1840         "Can't push table '%s' as child of '%s', "
1841         "column '%s.%s' is outside scope of pushable join",
1842         table->get_table()->alias, m_join_root->get_table()->alias,
1843         get_referred_table_access_name(key_item_field),
1844         get_referred_field_name(key_item_field));
1845     return false;
1846   }
1847 }  // ndb_pushed_builder_ctx::is_field_item_pushable()
1848 
is_const_item_pushable(const Item * key_item,const KEY_PART_INFO * key_part)1849 bool ndb_pushed_builder_ctx::is_const_item_pushable(
1850     const Item *key_item, const KEY_PART_INFO *key_part) {
1851   DBUG_TRACE;
1852   DBUG_ASSERT(key_item->const_item());
1853 
1854   /**
1855    * Propagate Items constant value to Field containing the value of this
1856    * key_part:
1857    */
1858   Field *const field = key_part->field;
1859   const int error =
1860       const_cast<Item *>(key_item)->save_in_field_no_warnings(field, true);
1861   if (unlikely(error)) {
1862     DBUG_PRINT("info", ("Failed to store constant Item into Field -> not"
1863                         " pushable"));
1864     return false;
1865   }
1866   if (field->is_real_null()) {
1867     DBUG_PRINT("info", ("NULL constValues in key -> not pushable"));
1868     return false;  // TODO, handle graceful -> continue?
1869   }
1870   return true;
1871 }  // ndb_pushed_builder_ctx::is_const_item_pushable()
1872 
1873 /**
1874  * ::optimize_query_plan()
1875  *
1876  * Decide the final execution order for the pushed joins. That mainly
1877  * involves deciding which table to be used as the 'm_parent'.
1878  *
1879  * The m_parent is choosen based on the available m_key_parents[]
1880  * which were set up by ::is_pushable_as_child(), and possibly later
1881  * modified (reduced) by ::validate_join_nest().
1882  *
1883  * When multiple parent candidates are available, we choose the one
1884  * closest to the root, which will result in the most 'bushy' tree
1885  * structure and the highest possible parallelism. Note that SPJ block
1886  * will build its own execution plan (based on whats being set up here)
1887  * which possible sequentialize the execution of these parallel branches.
1888  * (See WL#11164)
1889  */
optimize_query_plan()1890 int ndb_pushed_builder_ctx::optimize_query_plan() {
1891   DBUG_TRACE;
1892   const uint root_no = m_join_root->get_access_no();
1893   const uint last_table = m_plan.get_access_count() - 1;
1894 
1895   // Find an optimal m_parent to be used when joining the tables
1896   for (uint tab_no = last_table; tab_no > root_no; tab_no--) {
1897     if (!m_join_scope.contain(tab_no)) continue;
1898     pushed_tables &table = m_tables[tab_no];
1899 
1900     /**
1901      * Calculate the set of possible parents for the table, where:
1902      *  - 'common' are those we may refer (possibly through the EQ-sets)
1903      *     such that all FIELD_ITEMs are from the same parent.
1904      *  - 'extended' are those parents refered from some of the
1905      *     FIELD_ITEMs, and having the rest of the referred FIELD_ITEM
1906      *     tables available as 'grandparent refs'
1907      *     (The SPJ block can handle field references to any ancestor
1908      *      operation, not just the (direct) parent).
1909      *
1910      * In addition there are firm dependencies between some parents
1911      * such that all 'depend_parents' must be referred as an ancestors
1912      * of the table. By default 'depend_parents' will at least contain
1913      * the most 'grandparent' of the extended parents.
1914      */
1915     ndb_table_access_map *key_parents = table.m_key_parents;
1916     ndb_table_access_map common_parents(m_join_scope);
1917     ndb_table_access_map extend_parents;
1918     ndb_table_access_map depend_parents;
1919 
1920     for (uint i = 0;
1921          i < m_plan.get_table_access(tab_no)->get_no_of_key_fields(); i++) {
1922       DBUG_ASSERT(m_join_scope.contain(key_parents[i]));
1923       if (!key_parents[i].is_clear_all()) {  // Key refers a parent field
1924         /**
1925          * Calculate 'common_parents' as the set of possible 'field_parents'
1926          * available from all 'key_part'.
1927          */
1928         common_parents.intersect(key_parents[i]);
1929 
1930         /**
1931          * 'Extended' parents are refered from some 'FIELD_ITEM', and contain
1932          * all parents directly referred, or available as 'depend_parents'.
1933          * The later excludes those before the first (grand-)parent
1934          * available from all 'field_parents' (first_grandparent).
1935          * However, it also introduce a dependency of those
1936          * tables to really be available as grand parents.
1937          */
1938         extend_parents.add(key_parents[i]);
1939 
1940         const uint first = key_parents[i].first_table(root_no);
1941         depend_parents.add(first);
1942       }
1943     }
1944 
1945     /**
1946      * Previous childs might already have enforced some ancestors to be
1947      * available through this table due to some ancestors being referred by
1948      * them, add these.
1949      */
1950     depend_parents.add(table.m_ancestors);
1951 
1952     /**
1953      * Same goes for nest-level dependencies: The 'first' in each nest
1954      * may enforce ancestor dependencies on the members of the nest.
1955      * If this table is the 'first' itself, it is embedded within the
1956      * nest controlled by the 'first_upper'.
1957      */
1958     if (table.m_first_inner < tab_no)
1959       depend_parents.add(m_tables[table.m_first_inner].m_ancestors);
1960     else if (table.m_first_upper > 0)
1961       depend_parents.add(m_tables[table.m_first_upper].m_ancestors);
1962 
1963     /**
1964      * All 'depend_parents' has to be fulfilled, starting from the 'last',
1965      * closest to this tab_no. The 'depend_parents' not directly referred
1966      * as a parent from this table, will be fulfilled by adding them as required
1967      * ancestors of the choosen parent, see below.
1968      * Find the first dependency to fulfill:
1969      */
1970     const uint depends_on_parent = depend_parents.last_table(tab_no - 1);
1971 
1972     /**
1973      * We try to find a parent within our own nest among the common_
1974      * or extend_parents, but also takes the required depends_on_parent
1975      * into consideration. Establish the lowest parent candidate
1976      * we may accept.
1977      */
1978     const uint first_candidate =
1979         std::max(depends_on_parent, table.m_first_inner);
1980 
1981     /**
1982      * Find a parent among common_parent (preferred) or extend_parent
1983      * if possible, else choose the first we depends_on.
1984      *
1985      * Choose parent to be the first possible among 'parents'.
1986      * Result in the most 'bushy' query plan, enabling most parallelism
1987      */
1988     uint parent_no = common_parents.first_table(first_candidate);
1989     if (parent_no >= tab_no) {  // Not found
1990       parent_no = extend_parents.first_table(first_candidate);
1991       if (parent_no >= tab_no) {  // Not found
1992         parent_no = depends_on_parent;
1993       }
1994     }
1995     DBUG_ASSERT(parent_no < tab_no);
1996     table.m_parent = parent_no;
1997 
1998     /**
1999      * Any remaining ancestor dependencies for this table has to be
2000      * added to the selected parent in order to be taken into account
2001      * for parent calculation for its ancestors.
2002      */
2003     depend_parents.clear_bit(parent_no);
2004     m_tables[parent_no].m_ancestors.add(depend_parents);
2005 
2006     /**
2007      * Similar for nest-level dependencies: Any dependencies to tables outside
2008      * of this inner nest are enforces as mandatory nest-ancestor dependencies.
2009      */
2010     depend_parents.subtract(table.m_inner_nest);
2011     m_tables[table.m_first_inner].m_ancestors.add(depend_parents);
2012   }
2013 
2014   /* Collect the full set of ancestors available through the selected 'm_parent'
2015    */
2016   for (uint tab_no = root_no + 1; tab_no <= last_table; tab_no++) {
2017     if (m_join_scope.contain(tab_no)) {
2018       pushed_tables &table = m_tables[tab_no];
2019       const uint parent_no = table.m_parent;
2020       table.m_ancestors = m_tables[parent_no].m_ancestors;
2021       table.m_ancestors.add(parent_no);
2022     }
2023   }
2024   return 0;
2025 }  // ndb_pushed_builder_ctx::optimize_query_plan
2026 
collect_key_refs(const AQP::Table_access * table,const Item * key_refs[]) const2027 void ndb_pushed_builder_ctx::collect_key_refs(const AQP::Table_access *table,
2028                                               const Item *key_refs[]) const {
2029   DBUG_TRACE;
2030 
2031   const uint tab_no = table->get_access_no();
2032   const uint parent_no = m_tables[tab_no].m_parent;
2033   const ndb_table_access_map ancestors = m_tables[tab_no].m_ancestors;
2034 
2035   DBUG_ASSERT(m_join_scope.contain(ancestors));
2036   DBUG_ASSERT(ancestors.contain(parent_no));
2037 
2038   /**
2039    * If there are any key_fields with 'current_parents' different from
2040    * our selected 'parent', we have to find substitutes for
2041    * those key_fields within the equality set.
2042    **/
2043   for (uint key_part_no = 0; key_part_no < table->get_no_of_key_fields();
2044        key_part_no++) {
2045     const Item *const key_item = table->get_key_field(key_part_no);
2046     key_refs[key_part_no] = key_item;
2047 
2048     DBUG_ASSERT(key_item->const_item() || key_item->type() == Item::FIELD_ITEM);
2049 
2050     if (key_item->type() == Item::FIELD_ITEM) {
2051       const Item_field *join_item = static_cast<const Item_field *>(key_item);
2052       uint referred_table_no = get_table_no(join_item);
2053       Item_equal *item_equal;
2054 
2055       if (referred_table_no != parent_no &&
2056           (item_equal = table->get_item_equal(join_item)) != NULL) {
2057         AQP::Equal_set_iterator iter(*item_equal);
2058         const Item_field *substitute_field;
2059         while ((substitute_field = iter.next()) != NULL) {
2060           ///////////////////////////////////////////////////////////
2061           // Prefer to replace join_item with ref. to selected parent.
2062           //
2063           const uint substitute_table_no = get_table_no(substitute_field);
2064           if (substitute_table_no == parent_no) {
2065             DBUG_PRINT("info",
2066                        (" Replacing key_refs[%d] %s.%s with %s.%s (parent)",
2067                         key_part_no, get_referred_table_access_name(join_item),
2068                         get_referred_field_name(join_item),
2069                         get_referred_table_access_name(substitute_field),
2070                         get_referred_field_name(substitute_field)));
2071 
2072             referred_table_no = substitute_table_no;
2073             key_refs[key_part_no] = join_item = substitute_field;
2074             break;
2075           } else if (ancestors.contain(substitute_table_no)) {
2076             DBUG_ASSERT(substitute_table_no <= parent_no);
2077 
2078             //////////////////////////////////////////////////////////////////////
2079             // Second best is to replace join_item with closest grandparent ref.
2080             // In this case we will continue to search for the common parent
2081             // match: Updates key_refs[] if:
2082             //   1): Replace incorrect refs of tables not being an 'ancestor'.
2083             //   2): Found a better substitute closer to selected parent
2084             //
2085             if (!ancestors.contain(referred_table_no) ||  // 1
2086                 referred_table_no < substitute_table_no)  // 2)
2087             {
2088               DBUG_PRINT(
2089                   "info",
2090                   (" Replacing key_refs[%d] %s.%s with %s.%s (grandparent)",
2091                    key_part_no, get_referred_table_access_name(join_item),
2092                    get_referred_field_name(join_item),
2093                    get_referred_table_access_name(substitute_field),
2094                    get_referred_field_name(substitute_field)));
2095 
2096               referred_table_no = substitute_table_no;
2097               key_refs[key_part_no] = join_item = substitute_field;
2098             }
2099           }
2100         }  // while (substitute...
2101 
2102         DBUG_ASSERT(referred_table_no == parent_no ||
2103                     ancestors.contain(referred_table_no) ||
2104                     m_const_scope.contain(
2105                         referred_table_no));  // Is a 'const' paramValue
2106       }
2107     }  // Item::FIELD_ITEM
2108   }
2109 
2110   key_refs[table->get_no_of_key_fields()] = NULL;
2111 }  // ndb_pushed_builder_ctx::collect_key_refs()
2112 
2113 /**
2114  * For the specified table; build the set of NdbQueryOperands defining
2115  * the (index-) key value for fetching rows from the table.
2116  *
2117  * Key values may consist of a mix of const-, param- and linkedValue(),
2118  * as collected by the utility method ::collect_key_refs().
2119  *
2120  * A linkedValue() should preferably refer a value from the 'm_parent'
2121  * of the table. If the referred field is not available from parent,
2122  * another ancestor may also be used. In the later case, SPJ will
2123  * need to store the referred ancestor value, such that it can be located
2124  * by the correlation-ids through the chain of ancestors.
2125  *
2126  * SPJ API will normally deduct the parent / ancestor topology based
2127  * on the table(s) being referred by the linkedValues(). In case of multiple
2128  * tables being referred, the API will check that the set of ancestors
2129  * depends on (are ancestors of-) each other, such that all referred tables
2130  * are available through the chain of ancestors.
2131  *
2132  * In rare cases we may introduce extra parent dependencies in order to
2133  * establish a common set of ancestors. To maintain the join semantics, this
2134  * is only supported when the added dependencies are to tables in same
2135  * inner join-nest. Restriction applying to the above is checked by
2136  * is_pushable_as_child(). However ::build_key() need to enforce the
2137  * added dependencies by calling NdbQueryOptions::setParent(). (below)
2138  */
build_key(const AQP::Table_access * table,const NdbQueryOperand * op_key[],NdbQueryOptions * key_options)2139 int ndb_pushed_builder_ctx::build_key(const AQP::Table_access *table,
2140                                       const NdbQueryOperand *op_key[],
2141                                       NdbQueryOptions *key_options) {
2142   DBUG_TRACE;
2143   DBUG_ASSERT(m_join_scope.contain(table->get_access_no()));
2144 
2145   const KEY *const key = &table->get_table()->key_info[table->get_index_no()];
2146   op_key[0] = NULL;
2147 
2148   if (table == m_join_root) {
2149     if (ndbcluster_is_lookup_operation(table->get_access_type())) {
2150       for (uint i = 0; i < key->user_defined_key_parts; i++) {
2151         op_key[i] = m_builder->paramValue();
2152         if (unlikely(op_key[i] == NULL)) {
2153           return -1;
2154         }
2155       }
2156       op_key[key->user_defined_key_parts] = NULL;
2157     }
2158   } else {
2159     const uint key_fields = table->get_no_of_key_fields();
2160     DBUG_ASSERT(key_fields > 0 && key_fields <= key->user_defined_key_parts);
2161     uint map[ndb_pushed_join::MAX_LINKED_KEYS + 1];
2162 
2163     if (ndbcluster_is_lookup_operation(table->get_access_type())) {
2164       const ha_ndbcluster *handler =
2165           down_cast<ha_ndbcluster *>(table->get_table()->file);
2166       ndbcluster_build_key_map(
2167           handler->m_table, handler->m_index[table->get_index_no()], key, map);
2168     } else {
2169       for (uint ix = 0; ix < key_fields; ix++) {
2170         map[ix] = ix;
2171       }
2172     }
2173 
2174     const Item *join_items[ndb_pushed_join::MAX_LINKED_KEYS + 1];
2175     collect_key_refs(table, join_items);
2176 
2177     ndb_table_access_map referred_parents;
2178     const KEY_PART_INFO *key_part = key->key_part;
2179     for (uint i = 0; i < key_fields; i++, key_part++) {
2180       const Item *const item = join_items[i];
2181       op_key[map[i]] = NULL;
2182 
2183       DBUG_ASSERT(item->const_item() == item->const_for_execution());
2184       if (item->const_item()) {
2185         /**
2186          * Propagate Items constant value to Field containing the value of this
2187          * key_part:
2188          */
2189         Field *const field = key_part->field;
2190         DBUG_ASSERT(!field->is_real_null());
2191         const uchar *const ptr =
2192             (field->real_type() == MYSQL_TYPE_VARCHAR)
2193                 ? field->field_ptr() + field->get_length_bytes()
2194                 : field->field_ptr();
2195 
2196         op_key[map[i]] = m_builder->constValue(ptr, field->data_length());
2197       } else {
2198         DBUG_ASSERT(item->type() == Item::FIELD_ITEM);
2199         const Item_field *const field_item =
2200             static_cast<const Item_field *>(item);
2201         const uint referred_table_no = get_table_no(field_item);
2202         referred_parents.add(referred_table_no);
2203 
2204         if (m_join_scope.contain(referred_table_no)) {
2205           // Locate the parent operation for this 'join_items[]'.
2206           // May refer any of the preceding parent tables
2207           const NdbQueryOperationDef *const parent_op =
2208               m_tables[referred_table_no].m_op;
2209           DBUG_ASSERT(parent_op != NULL);
2210 
2211           // TODO use field_index ??
2212           op_key[map[i]] =
2213               m_builder->linkedValue(parent_op, field_item->field_name);
2214         } else {
2215           DBUG_ASSERT(m_const_scope.contain(referred_table_no));
2216           // Outside scope of join plan, Handle as parameter as its value
2217           // will be known when we are ready to execute this query.
2218           if (unlikely(m_fld_refs >= ndb_pushed_join::MAX_REFERRED_FIELDS)) {
2219             DBUG_PRINT("info", ("Too many Field refs ( >= MAX_REFERRED_FIELDS) "
2220                                 "encountered"));
2221             return -1;  // TODO, handle gracefull -> continue?
2222           }
2223           m_referred_fields[m_fld_refs++] = field_item->field;
2224           op_key[map[i]] = m_builder->paramValue();
2225         }
2226       }
2227 
2228       if (unlikely(op_key[map[i]] == NULL)) {
2229         return -1;
2230       }
2231     }
2232     op_key[key_fields] = NULL;
2233 
2234     // Might have to explicit set the designated parent.
2235     const uint tab_no = table->get_access_no();
2236     const uint parent_no = m_tables[tab_no].m_parent;
2237     if (!referred_parents.contain(parent_no)) {
2238       // Add the parent as a new dependency
2239       DBUG_ASSERT(m_tables[parent_no].m_op != NULL);
2240       key_options->setParent(m_tables[parent_no].m_op);
2241     }
2242   }
2243   return 0;
2244 }  // ndb_pushed_builder_ctx::build_key()
2245 
2246 /**
2247  * Call SPJ API to build a NdbQuery
2248  */
build_query()2249 int ndb_pushed_builder_ctx::build_query() {
2250   DBUG_TRACE;
2251 
2252   DBUG_PRINT("enter",
2253              ("Table %d as root is pushable", m_join_root->get_access_no()));
2254   DBUG_EXECUTE("info", m_join_root->dbug_print(););
2255 
2256   const uint root_no = m_join_root->get_access_no();
2257   DBUG_ASSERT(m_join_scope.contain(root_no));
2258 
2259   if (m_builder == NULL) {
2260     m_builder = NdbQueryBuilder::create();
2261     if (unlikely(m_builder == NULL)) {
2262       return HA_ERR_OUT_OF_MEM;
2263     }
2264   }
2265 
2266   for (uint tab_no = root_no; tab_no < m_plan.get_access_count(); tab_no++) {
2267     if (!m_join_scope.contain(tab_no)) continue;
2268 
2269     const AQP::Table_access *const table = m_plan.get_table_access(tab_no);
2270     const AQP::enum_access_type access_type = table->get_access_type();
2271     ha_ndbcluster *handler =
2272         down_cast<ha_ndbcluster *>(table->get_table()->file);
2273 
2274     NdbQueryOptions options;
2275     const NdbQueryOperand *op_key[ndb_pushed_join::MAX_KEY_PART + 1];
2276     if (table->get_index_no() >= 0) {
2277       const int error = build_key(table, op_key, &options);
2278       if (unlikely(error)) return error;
2279     }
2280 
2281     if (table != m_join_root) {
2282       DBUG_ASSERT(m_tables[tab_no].m_parent != MAX_TABLES);
2283       const uint parent_no = m_tables[tab_no].m_parent;
2284 
2285       if (m_tables[tab_no].isInnerJoined(m_tables[parent_no])) {
2286         // 'tab_no' is inner joined with its parent
2287         options.setMatchType(NdbQueryOptions::MatchNonNull);
2288       }
2289 
2290       if (table->is_sj_firstmatch()) {
2291         /**
2292          * Is a Firstmatch'ed semijoin_nest. In order to let SPJ API
2293          * do firstMatch elimination of duplicated rows, we need to ensure:
2294          *  1) The entire semijoined-nest has been pushed down.
2295          *  2) There are no unpushed conditions in the above sj-nest.
2296          *
2297          * ... else we might end up returning a firstMatched'ed row,
2298          *  which later turns out to be a non-match due to eiter 1) or 2).
2299          */
2300         const int last_sj_inner = table->get_last_sj_inner();
2301         const ndb_table_access_map semijoin(m_tables[last_sj_inner].m_sj_nest);
2302         if (m_join_scope.contain(semijoin) &&
2303             !m_has_pending_cond.is_overlapping(semijoin)) {
2304           options.setMatchType(NdbQueryOptions::MatchFirst);
2305         }
2306       }
2307 
2308       if (table->is_antijoin()) {
2309         DBUG_ASSERT(m_tables[tab_no].isOuterJoined(m_tables[parent_no]));
2310         const ndb_table_access_map antijoin_scope(
2311             get_tables_in_range(tab_no, m_tables[tab_no].m_last_inner));
2312 
2313         /**
2314          * From SPJ point of view, antijoin is a normal outer join. So once
2315          * we have accounted for the special antijoin_null_cond added to such
2316          * queries, no special handling is required for antijoin's wrt.
2317          * query correctness.
2318          *
2319          * However, as an added optimization, the SPJ API may eliminate the
2320          * upper-table rows not matching the 'Not exists' requirement, if:
2321          *  1) The entire (anti-)outer-joined-nest has been pushed down
2322          *  2) There are no unpushed conditions in the above join-nest.
2323          * -> or: 'antijoin-nest is completely evaluated by SPJ'
2324          *
2325          * Note that this is a pure optimization: Any returned rows supposed
2326          * to 'Not exist' will simply be eliminated by the mysql server.
2327          * -> We do join-pushdown of such antijoins even if the check below
2328          * does not allow us to setMatchType('MatchNullOnly')
2329          */
2330         if (m_join_scope.contain(antijoin_scope) &&
2331             !m_has_pending_cond.is_overlapping(antijoin_scope)) {
2332           const uint first_upper = m_tables[tab_no].m_first_upper;
2333           ndb_table_access_map upper_nest(full_inner_nest(first_upper, tab_no));
2334           upper_nest.intersect(m_join_scope);
2335 
2336           if (upper_nest.contain(parent_no)) {
2337             /**
2338              * Antijoin is relative to the *upper_nest*. Thus we can only
2339              * eliminate found matches if they are relative the upper_nest.
2340              * Example: '(t1 oj (t2)) where not exists (t3 where t3.x = t1.y)'
2341              *
2342              * This nest structure is such that the upper of 'antijoin t3' is
2343              * t1. Thus we can only do match elimination of such a query when it
2344              * is built with 't3.parent == t1'.
2345              */
2346             options.setMatchType(NdbQueryOptions::MatchNullOnly);
2347           } else {
2348             /**
2349              * Else, subquery condition do not refer upper_nest.
2350              * Example: '(t1 oj (t2)) where not exists (t3 where t3.x = t2.y)'
2351              * Due to the nest structure, we still have t3.upper = t1.
2352              * However, the where condition dependencies will result in:
2353              * '3.parent == t2'. Specifying antijoin for this query may
2354              * eliminate matching rows from t2, while t1 rows will still
2355              * exists (with t2 NULL-extended).
2356              * However, we can still specify the less restrictive firstMatch
2357              * for such queries.
2358              */
2359             options.setMatchType(NdbQueryOptions::MatchFirst);
2360           }
2361         }
2362       }
2363 
2364       /**
2365        * Inform SPJ API about the join nest dependencies. Needed in those
2366        * cases where the are no linkedValues determining which inner_
2367        * and upper_nest a table is a member of. SPJ API need this info
2368        * in order to correctly generate NULL extended outer join results.
2369        *
2370        * Example: t1 outer join (t2 inner join t3), where t3s join condition
2371        * does not refer t2. Thus, t3 will likely become an outer joined
2372        * child of t1 in the QueryTree. From the parent-child POW, t2,t3
2373        * will look like two separate outer joined tables, like:
2374        * 't1, outer join (t2), outer join (t3)'.
2375        *
2376        * Such queries need to set the join nest dependencies, such that
2377        * the NdbQuery interface is able to correcly generate NULL extended
2378        * rows.
2379        *
2380        * Below we add these nest dependencies even when not strictly required.
2381        * The API will just ignore such redundant nest dependencies.
2382        */
2383       if (m_tables[tab_no].isOuterJoined(m_tables[parent_no])) {
2384         ndb_table_access_map inner_nest(m_tables[tab_no].m_inner_nest);
2385         inner_nest.intersect(m_join_scope);
2386         if (!inner_nest.is_clear_all()) {
2387           // Table not first in its join_nest, set firstInner which it
2388           // depends on
2389           const uint real_first_inner =
2390               inner_nest.first_table(m_tables[tab_no].m_first_inner);
2391           options.setFirstInnerJoin(m_tables[real_first_inner].m_op);
2392 
2393         } else if (m_tables[tab_no].m_first_upper >= 0) {
2394           const uint first_upper = m_tables[tab_no].m_first_upper;
2395           ndb_table_access_map upper_nest(full_inner_nest(first_upper, tab_no));
2396           upper_nest.intersect(m_join_scope);
2397           if (!upper_nest.is_clear_all()) {
2398             // There is an upper nest which we outer join with
2399             const uint real_first_upper =
2400                 upper_nest.first_table(m_tables[tab_no].m_first_upper);
2401             options.setUpperJoin(m_tables[real_first_upper].m_op);
2402           }
2403         }
2404       }
2405     }  // if '!m_join_root'
2406 
2407     const NdbQueryOperationDef *query_op = NULL;
2408     if (ndbcluster_is_lookup_operation(access_type)) {
2409       // Primary key access assumed
2410       if (access_type == AQP::AT_PRIMARY_KEY ||
2411           access_type == AQP::AT_MULTI_PRIMARY_KEY) {
2412         DBUG_PRINT("info", ("Operation is 'primary-key-lookup'"));
2413         query_op = m_builder->readTuple(handler->m_table, op_key, &options);
2414       } else {
2415         DBUG_ASSERT(access_type == AQP::AT_UNIQUE_KEY);
2416         DBUG_PRINT("info", ("Operation is 'unique-index-lookup'"));
2417         const NdbDictionary::Index *const index =
2418             handler->m_index[table->get_index_no()].unique_index;
2419         DBUG_ASSERT(index);
2420         query_op =
2421             m_builder->readTuple(index, handler->m_table, op_key, &options);
2422       }
2423     }  // ndbcluster_is_lookup_operation()
2424 
2425     /**
2426      * AT_MULTI_MIXED may have 'ranges' which are pure single key lookups also.
2427      * In our current implementation these are converted into range access in
2428      * the pushed MRR implementation. However, the future plan is to build both
2429      * RANGE and KEY pushable joins for these.
2430      */
2431     else if (access_type == AQP::AT_ORDERED_INDEX_SCAN ||
2432              access_type == AQP::AT_MULTI_MIXED) {
2433       DBUG_ASSERT(table->get_index_no() >= 0);
2434       DBUG_ASSERT(handler->m_index[table->get_index_no()].index != NULL);
2435 
2436       DBUG_PRINT("info", ("Operation is 'equal-range-lookup'"));
2437       DBUG_PRINT(
2438           "info",
2439           ("Creating scanIndex on index id:%d, name:%s", table->get_index_no(),
2440            handler->m_index[table->get_index_no()].index->getName()));
2441 
2442       const NdbQueryIndexBound bounds(op_key);
2443       query_op =
2444           m_builder->scanIndex(handler->m_index[table->get_index_no()].index,
2445                                handler->m_table, &bounds, &options);
2446     } else if (access_type == AQP::AT_TABLE_SCAN) {
2447       DBUG_PRINT("info", ("Operation is 'table scan'"));
2448       query_op = m_builder->scanTable(handler->m_table, &options);
2449     } else {
2450       DBUG_ASSERT(false);
2451     }
2452 
2453     if (unlikely(!query_op)) return -1;
2454 
2455     m_tables[tab_no].m_op = query_op;
2456   }  // for (join_cnt= m_join_root->get_access_no();
2457      // join_cnt<plan.get_access_count(); join_cnt++)
2458 
2459   return 0;
2460 }  // ndb_pushed_builder_ctx::build_query()
2461 
2462 /**
2463  * Fill in ix_map[] to map from KEY_PART_INFO[] order into
2464  * primary key / unique key order of key fields.
2465  */
ndbcluster_build_key_map(const NDBTAB * table,const NDB_INDEX_DATA & index,const KEY * key_def,uint ix_map[])2466 void ndbcluster_build_key_map(const NDBTAB *table, const NDB_INDEX_DATA &index,
2467                               const KEY *key_def, uint ix_map[]) {
2468   uint ix;
2469 
2470   if (index.unique_index_attrid_map)  // UNIQUE_ORDERED_INDEX or UNIQUE_INDEX
2471   {
2472     for (ix = 0; ix < key_def->user_defined_key_parts; ix++) {
2473       ix_map[ix] = index.unique_index_attrid_map[ix];
2474     }
2475   } else  // Primary key does not have a 'unique_index_attrid_map'
2476   {
2477     KEY_PART_INFO *key_part;
2478     uint key_pos = 0;
2479     int columnnr = 0;
2480     assert(index.type == PRIMARY_KEY_ORDERED_INDEX ||
2481            index.type == PRIMARY_KEY_INDEX);
2482 
2483     for (ix = 0, key_part = key_def->key_part;
2484          ix < key_def->user_defined_key_parts; ix++, key_part++) {
2485       // As NdbColumnImpl::m_keyInfoPos isn't available through
2486       // NDB API we have to calculate it ourself, else we could:
2487       // ix_map[ix]= table->getColumn(key_part->fieldnr-1)->m_impl.m_keyInfoPos;
2488 
2489       if (key_part->fieldnr < columnnr) {
2490         // PK columns are not in same order as the columns are defined in the
2491         // table, Restart PK search from first column:
2492         key_pos = 0;
2493         columnnr = 0;
2494       }
2495 
2496       while (columnnr < key_part->fieldnr - 1) {
2497         if (table->getColumn(columnnr++)->getPrimaryKey()) key_pos++;
2498       }
2499 
2500       assert(table->getColumn(columnnr)->getPrimaryKey());
2501       ix_map[ix] = key_pos;
2502 
2503       columnnr++;
2504       key_pos++;
2505     }
2506   }
2507 }  // ndbcluster_build_key_map
2508