1 /*
2 Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24 /**
25 @file
26
27 @brief
28 This file defines various classes and methods used for pushing queries
29 to the ndb data node (for execution by the SPJ block).
30 */
31
32 #include "storage/ndb/plugin/ha_ndbcluster_push.h"
33
34 #include "my_dbug.h"
35 #include "sql/abstract_query_plan.h"
36 #include "sql/current_thd.h"
37 #include "sql/sql_class.h"
38 #include "sql/sql_lex.h"
39 #include "storage/ndb/include/ndb_version.h"
40 #include "storage/ndb/include/ndbapi/NdbApi.hpp"
41 #include "storage/ndb/include/ndbapi/NdbInterpretedCode.hpp"
42 #include "storage/ndb/plugin/ha_ndbcluster.h"
43 #include "storage/ndb/plugin/ha_ndbcluster_cond.h"
44 #include "storage/ndb/plugin/ndb_thd.h"
45 #include "storage/ndb/src/ndbapi/NdbQueryBuilder.hpp"
46 #include "storage/ndb/src/ndbapi/NdbQueryOperation.hpp"
47
48 typedef NdbDictionary::Table NDBTAB;
49
50 /**
51 * antijoin_null_cond is inserted by the optimizer when it create the
52 * special antijoin-NULL-condition. It serves as a token to uniquely
53 * identify such a NULL-condition. Also see similar usage of it
54 * when building the iterators in sql_executor.cc
55 */
56 extern const char *antijoin_null_cond;
57
58 /*
59 Explain why an operation could not be pushed
60 @param[in] msgfmt printf style format string.
61 */
62 #define EXPLAIN_NO_PUSH(msgfmt, ...) \
63 do { \
64 if (unlikely(current_thd->lex->is_explain())) { \
65 push_warning_printf(current_thd, Sql_condition::SL_NOTE, ER_YES, \
66 (msgfmt), __VA_ARGS__); \
67 } \
68 } while (0)
69
get_referred_field_name(const Item_field * field_item)70 static inline const char *get_referred_field_name(
71 const Item_field *field_item) {
72 DBUG_ASSERT(field_item->type() == Item::FIELD_ITEM);
73 return field_item->field->field_name;
74 }
75
get_referred_table_access_name(const Item_field * field_item)76 static const char *get_referred_table_access_name(
77 const Item_field *field_item) {
78 DBUG_ASSERT(field_item->type() == Item::FIELD_ITEM);
79 return field_item->field->table->alias;
80 }
81
ndbcluster_is_lookup_operation(AQP::enum_access_type accessType)82 static bool ndbcluster_is_lookup_operation(AQP::enum_access_type accessType) {
83 return accessType == AQP::AT_PRIMARY_KEY ||
84 accessType == AQP::AT_MULTI_PRIMARY_KEY ||
85 accessType == AQP::AT_UNIQUE_KEY;
86 }
87
first_table(uint start) const88 uint ndb_table_access_map::first_table(uint start) const {
89 for (uint table_no = start; table_no < length(); table_no++) {
90 if (contain(table_no)) return table_no;
91 }
92 return length();
93 }
94
GetTriggerCondOrNull(const Item * item)95 static const Item_func_trig_cond *GetTriggerCondOrNull(const Item *item) {
96 if (item->type() == Item::FUNC_ITEM &&
97 down_cast<const Item_func *>(item)->functype() ==
98 Item_bool_func2::TRIG_COND_FUNC) {
99 return down_cast<const Item_func_trig_cond *>(item);
100 } else {
101 return nullptr;
102 }
103 }
104
105 /**
106 * Check if the specified 'item' is a antijoin-NULL-condition.
107 * This condition is constructed such that all rows being 'matches'
108 * are filtered away, and only the non-(anti)matches will pass.
109 *
110 * Logic inspired by similar code in sql_executor.cc.
111 */
is_antijoin_null_cond(const Item * item)112 static bool is_antijoin_null_cond(const Item *item) {
113 const Item_func_trig_cond *trig_cond = GetTriggerCondOrNull(item);
114 if (trig_cond != nullptr &&
115 trig_cond->get_trig_type() == Item_func_trig_cond::IS_NOT_NULL_COMPL) {
116 const Item *inner_cond = trig_cond->arguments()[0];
117 const Item_func_trig_cond *inner_trig_cond =
118 GetTriggerCondOrNull(inner_cond);
119 if (inner_trig_cond != nullptr) {
120 const Item *inner_inner_cond = inner_trig_cond->arguments()[0];
121 if (inner_inner_cond->item_name.ptr() == antijoin_null_cond) {
122 return true;
123 }
124 }
125 }
126 return false;
127 }
128
last_table(uint start) const129 uint ndb_table_access_map::last_table(uint start) const {
130 uint table_no = start;
131 while (true) {
132 if (contain(table_no))
133 return table_no;
134 else if (table_no == 0)
135 return length();
136 table_no--;
137 }
138 }
139
ndb_pushed_join(const ndb_pushed_builder_ctx & builder,const NdbQueryDef * query_def)140 ndb_pushed_join::ndb_pushed_join(const ndb_pushed_builder_ctx &builder,
141 const NdbQueryDef *query_def)
142 : m_query_def(query_def),
143 m_operation_count(0),
144 m_field_count(builder.m_fld_refs) {
145 DBUG_ASSERT(query_def != NULL);
146 DBUG_ASSERT(builder.m_fld_refs <= MAX_REFERRED_FIELDS);
147 ndb_table_access_map searched;
148 for (uint tab_no = 0; searched != builder.m_join_scope; tab_no++) {
149 const AQP::Table_access *const join_tab =
150 builder.m_plan.get_table_access(tab_no);
151 if (builder.m_join_scope.contain(tab_no)) {
152 DBUG_ASSERT(m_operation_count < MAX_PUSHED_OPERATIONS);
153 m_tables[m_operation_count++] = join_tab->get_table();
154 searched.add(tab_no);
155 }
156 }
157 for (uint i = 0; i < builder.m_fld_refs; i++) {
158 m_referred_fields[i] = builder.m_referred_fields[i];
159 }
160 }
161
~ndb_pushed_join()162 ndb_pushed_join::~ndb_pushed_join() {
163 if (m_query_def) m_query_def->destroy();
164 }
165
match_definition(int type,const NDB_INDEX_DATA * idx) const166 bool ndb_pushed_join::match_definition(int type, // NdbQueryOperationDef::Type,
167 const NDB_INDEX_DATA *idx) const {
168 const NdbQueryOperationDef *const root_operation =
169 m_query_def->getQueryOperation((uint)0);
170 const NdbQueryOperationDef::Type def_type = root_operation->getType();
171
172 if (def_type != type) {
173 DBUG_PRINT(
174 "info",
175 ("Cannot execute push join. Root operation prepared as %s "
176 "not executable as %s",
177 NdbQueryOperationDef::getTypeName(def_type),
178 NdbQueryOperationDef::getTypeName((NdbQueryOperationDef::Type)type)));
179 return false;
180 }
181 const NdbDictionary::Index *const expected_index = root_operation->getIndex();
182
183 // Check that we still use the same index as when the query was prepared.
184 switch (def_type) {
185 case NdbQueryOperationDef::PrimaryKeyAccess:
186 DBUG_ASSERT(idx != NULL);
187 DBUG_ASSERT(idx->unique_index == expected_index);
188 break;
189
190 case NdbQueryOperationDef::UniqueIndexAccess:
191 DBUG_ASSERT(idx != NULL);
192 // DBUG_ASSERT(idx->unique_index == expected_index);
193 if (idx->unique_index != expected_index) {
194 DBUG_PRINT("info",
195 ("Actual index %s differs from expected index %s."
196 "Therefore, join cannot be pushed.",
197 idx->unique_index->getName(), expected_index->getName()));
198 return false;
199 }
200 break;
201
202 case NdbQueryOperationDef::TableScan:
203 DBUG_ASSERT(idx == NULL && expected_index == NULL);
204 break;
205
206 case NdbQueryOperationDef::OrderedIndexScan:
207 DBUG_ASSERT(idx != NULL);
208 // DBUG_ASSERT(idx->index == expected_index);
209 if (idx->index != expected_index) {
210 DBUG_PRINT("info", ("Actual index %s differs from expected index %s. "
211 "Therefore, join cannot be pushed.",
212 idx->index->getName(), expected_index->getName()));
213 return false;
214 }
215 break;
216
217 default:
218 DBUG_ASSERT(false);
219 break;
220 }
221
222 /**
223 * There may be referrences to Field values from tables outside the scope of
224 * our pushed join which are supplied as paramValues().
225 * If any of these are NULL values, join can't be pushed.
226 *
227 * Note that the 'Late NULL filtering' in the Iterator::Read() methods will
228 * eliminate such NULL-key Read's anyway, so not pushing these joins
229 * should be a non-issue.
230 */
231 for (uint i = 0; i < get_field_referrences_count(); i++) {
232 Field *field = m_referred_fields[i];
233 if (field->is_real_null()) {
234 DBUG_PRINT("info",
235 ("paramValue is NULL, can not execute as pushed join"));
236 return false;
237 }
238 }
239
240 return true;
241 }
242
243 #ifdef WORDS_BIGENDIAN
244 /**
245 * Determine if a specific column type is represented in a format which is
246 * sensitive to the endian format of the underlying platform.
247 */
is_endian_sensible_type(const Field * field)248 static bool is_endian_sensible_type(const Field *field) {
249 const enum_field_types type = field->real_type();
250 switch (type) {
251 // Most numerics are endian sensible, note the int24 though.
252 // Note: Enum dont have its own type, represented as an int.
253 case MYSQL_TYPE_SHORT:
254 case MYSQL_TYPE_LONG:
255 case MYSQL_TYPE_LONGLONG:
256 case MYSQL_TYPE_FLOAT:
257 case MYSQL_TYPE_DOUBLE:
258 // Deprecated temporal types were 8/4 byte integers
259 case MYSQL_TYPE_DATETIME:
260 case MYSQL_TYPE_TIMESTAMP:
261 return true;
262
263 // The new temporal data types did it right, not endian sensitive
264 case MYSQL_TYPE_NEWDATE:
265 case MYSQL_TYPE_TIME2:
266 case MYSQL_TYPE_DATETIME2:
267 case MYSQL_TYPE_TIMESTAMP2:
268 // The Tiny type is a single byte, so endianness does not matter
269 case MYSQL_TYPE_TINY:
270 // Year is also a 'tiny', single byte
271 case MYSQL_TYPE_YEAR:
272 // Odly enough, The int24 is *not* stored in an endian sensible format
273 case MYSQL_TYPE_INT24:
274 // The (deprecated) Time type was handled as an int24.
275 case MYSQL_TYPE_TIME:
276 // Decimal is basically a char string variant.
277 case MYSQL_TYPE_DECIMAL:
278 case MYSQL_TYPE_NEWDECIMAL:
279 // Other datatypes (char, blob, json, ..) is not an endian concern
280 default:
281 return false;
282 }
283 }
284 #endif
285
make_query_instance(NdbTransaction * trans,const NdbQueryParamValue * keyFieldParams,uint paramCnt) const286 NdbQuery *ndb_pushed_join::make_query_instance(
287 NdbTransaction *trans, const NdbQueryParamValue *keyFieldParams,
288 uint paramCnt) const {
289 DBUG_TRACE;
290 DBUG_PRINT("info", ("executing chain of %d pushed joins."
291 " First table is %s, accessed as %s.",
292 get_operation_count(), get_table(0)->alias,
293 NdbQueryOperationDef::getTypeName(
294 m_query_def->getQueryOperation((uint)0)->getType())));
295
296 const NdbQueryParamValue *paramValues = keyFieldParams;
297
298 /**
299 * There may be referrences to Field values from tables outside the scope of
300 * our pushed join: These are expected to be supplied as paramValues()
301 * after the keyFieldParams[].
302 */
303 uint outer_fields = get_field_referrences_count();
304 NdbQueryParamValue *extendedParams = NULL;
305 if (unlikely(outer_fields > 0)) {
306 uint size = sizeof(NdbQueryParamValue) * (paramCnt + outer_fields);
307 extendedParams = reinterpret_cast<NdbQueryParamValue *>(my_alloca(size));
308 // Copy specified keyFieldParams[] first
309 for (uint i = 0; i < paramCnt; i++) {
310 new (extendedParams + i) NdbQueryParamValue(keyFieldParams[i]);
311 }
312
313 // There may be referrences to Field values from tables outside the scope of
314 // our pushed join: These are expected to be supplied as paramValues()
315 for (uint i = 0; i < outer_fields; i++) {
316 Field *field = m_referred_fields[i];
317 DBUG_ASSERT(!field->is_real_null()); // Checked by ::check_if_pushable()
318 uchar *raw = field->field_ptr();
319
320 #ifdef WORDS_BIGENDIAN
321 if (field->table->s->db_low_byte_first &&
322 is_endian_sensible_type(field)) {
323 const uint32 field_length = field->pack_length();
324 raw = static_cast<uchar *>(my_alloca(field_length));
325
326 // Byte order is swapped to get the correct endian format.
327 const uchar *last = field->field_ptr() + field_length;
328 for (uint pos = 0; pos < field_length; pos++) raw[pos] = *(--last);
329 }
330 #else
331 // Little endian platforms are expected to be only 'low_byte_first'
332 DBUG_ASSERT(field->table->s->db_low_byte_first);
333 #endif
334
335 new (extendedParams + paramCnt + i) NdbQueryParamValue(raw, false);
336 }
337 paramValues = extendedParams;
338 }
339
340 NdbQuery *query = trans->createQuery(&get_query_def(), paramValues);
341 if (unlikely(extendedParams != NULL)) {
342 for (uint i = 0; i < paramCnt + outer_fields; i++) {
343 extendedParams[i].~NdbQueryParamValue();
344 }
345 }
346 return query;
347 }
348
349 /////////////////////////////////////////
350
ndb_pushed_builder_ctx(const Thd_ndb * thd_ndb,AQP::Table_access * root)351 ndb_pushed_builder_ctx::ndb_pushed_builder_ctx(const Thd_ndb *thd_ndb,
352 AQP::Table_access *root)
353 : m_thd_ndb(thd_ndb),
354 m_plan(*root->get_join_plan()),
355 m_join_root(root),
356 m_join_scope(),
357 m_const_scope(),
358 m_scan_operations(),
359 m_has_pending_cond(),
360 m_internal_op_count(0),
361 m_fld_refs(0),
362 m_builder(nullptr) {}
363
~ndb_pushed_builder_ctx()364 ndb_pushed_builder_ctx::~ndb_pushed_builder_ctx() {
365 if (m_builder) {
366 m_builder->destroy();
367 }
368 }
369
getNdbError() const370 const NdbError &ndb_pushed_builder_ctx::getNdbError() const {
371 DBUG_ASSERT(m_builder);
372 return m_builder->getNdbError();
373 }
374
maybe_pushable(AQP::Table_access * table,join_pushability check)375 bool ndb_pushed_builder_ctx::maybe_pushable(AQP::Table_access *table,
376 join_pushability check) {
377 DBUG_TRACE;
378 TABLE *tab = table->get_table();
379
380 if (tab == nullptr) {
381 // There could be unused tables allocated in the 'plan', skip these
382 return false;
383 }
384
385 if (tab->s->db_type()->db_type != DB_TYPE_NDBCLUSTER) {
386 // Ignore non-NDBCLUSTER tables.
387 DBUG_PRINT("info",
388 ("Table '%s' not in ndb engine, not pushable", tab->alias));
389 return false;
390 }
391
392 if (table->get_table()->file->member_of_pushed_join()) {
393 return false; // Already pushed
394 }
395
396 uint pushable = table->get_table_properties();
397 if (pushable & PUSHABILITY_KNOWN) {
398 return ((pushable & check) == check);
399 }
400
401 bool allowed = false;
402 const char *reason = nullptr;
403 pushable = 0; // Assume not pushable
404
405 switch (table->get_access_type()) {
406 case AQP::AT_VOID:
407 DBUG_ASSERT(false);
408 reason = "UNKNOWN";
409 break;
410
411 case AQP::AT_FIXED:
412 reason = "optimized away, or const'ified by optimizer";
413 break;
414
415 case AQP::AT_UNDECIDED:
416 reason = "Access type was not chosen at 'prepare' time";
417 break;
418
419 case AQP::AT_OTHER:
420 reason = table->get_other_access_reason();
421 break;
422
423 default:
424 const ha_ndbcluster *handler =
425 down_cast<ha_ndbcluster *>(table->get_table()->file);
426
427 if (handler->maybe_pushable_join(reason)) {
428 allowed = true;
429 pushable = PUSHABLE_AS_CHILD | PUSHABLE_AS_PARENT;
430 }
431 break;
432 } // switch
433
434 if (reason != nullptr) {
435 DBUG_ASSERT(!allowed);
436 EXPLAIN_NO_PUSH("Table '%s' is not pushable: %s", tab->alias, reason);
437 }
438 table->set_table_properties(pushable | PUSHABILITY_KNOWN);
439 return allowed;
440 } // ndb_pushed_builder_ctx::maybe_pushable
441
442 /**
443 * Get *internal* table_no of table referred by 'key_item'
444 */
get_table_no(const Item * key_item) const445 uint ndb_pushed_builder_ctx::get_table_no(const Item *key_item) const {
446 DBUG_ASSERT(key_item->type() == Item::FIELD_ITEM);
447 const uint count = m_plan.get_access_count();
448 const table_map bitmap = key_item->used_tables();
449
450 for (uint i = 0; i < count; i++) {
451 TABLE *table = m_plan.get_table_access(i)->get_table();
452 if (table != nullptr && table->pos_in_table_list != nullptr) {
453 const table_map map = table->pos_in_table_list->map();
454 if (bitmap & map) {
455 DBUG_ASSERT((bitmap & ~map) == 0); // No other tables in 'bitmap'
456 return i;
457 }
458 }
459 }
460 return MAX_TABLES;
461 }
462
463 /**
464 * Get a ndb_table_access_map containg all tables [first..last]
465 */
get_tables_in_range(uint first,uint last)466 static ndb_table_access_map get_tables_in_range(uint first, uint last) {
467 ndb_table_access_map table_map;
468 for (uint i = first; i <= last; i++) {
469 table_map.add(i);
470 }
471 return table_map;
472 }
473
474 /**
475 * Translate a table_map from external to internal table enumeration
476 */
get_table_map(table_map external_map) const477 ndb_table_access_map ndb_pushed_builder_ctx::get_table_map(
478 table_map external_map) const {
479 ndb_table_access_map internal_map;
480 const uint count = m_plan.get_access_count();
481 table_map bitmap = (external_map & ~PSEUDO_TABLE_BITS);
482
483 for (uint i = 0; bitmap != 0 && i < count; i++) {
484 TABLE *table = m_plan.get_table_access(i)->get_table();
485 if (table != nullptr && table->pos_in_table_list != nullptr) {
486 const table_map map = table->pos_in_table_list->map();
487 if (bitmap & map) {
488 internal_map.add(i);
489 bitmap &= ~map; // clear handled table
490 }
491 }
492 }
493 DBUG_ASSERT(bitmap == 0);
494 return internal_map;
495 }
496
497 /**
498 * Main entry point to build a pushed join having 'join_root'
499 * as it first operation.
500 *
501 * If the root operation is pushable, we append as many 'child'
502 * operations as possible to the pushed join.
503 *
504 * This currently is implemented as a 3 pass algorithm:
505 *
506 * 1) Analyze each child and add it to 'm_join_scope' as
507 * 'pushable' if it qualifies as such. Part of this phase
508 * is also calculations of possible parents for each table.
509 *
510 * 2) Determine the parent to be used among the set of possible
511 * parents. This is decided based on simple heuristic where
512 * the goal is to employ filters as soon as possible, and utilize
513 * the parallelism of the SPJ block whenever considdered optimal.
514 *
515 * 3) Build the pushed query.
516 *
517 */
make_pushed_join(const ndb_pushed_join * & pushed_join)518 int ndb_pushed_builder_ctx::make_pushed_join(
519 const ndb_pushed_join *&pushed_join) {
520 DBUG_TRACE;
521 pushed_join = NULL;
522
523 if (is_pushable_with_root()) {
524 int error;
525 error = optimize_query_plan();
526 if (unlikely(error)) return error;
527
528 error = build_query();
529 if (unlikely(error)) return error;
530
531 const NdbQueryDef *const query_def = m_builder->prepare(m_thd_ndb->ndb);
532 if (unlikely(query_def == NULL))
533 return -1; // Get error with ::getNdbError()
534
535 pushed_join = new ndb_pushed_join(*this, query_def);
536 if (unlikely(pushed_join == NULL)) return HA_ERR_OUT_OF_MEM;
537
538 DBUG_PRINT("info", ("Created pushed join with %d child operations",
539 pushed_join->get_operation_count() - 1));
540 }
541 return 0;
542 } // ndb_pushed_builder_ctx::make_pushed_join()
543
544 /**
545 * Find the number SPJ operations needed to execute a given access type.
546 * (Unique index lookups are translated to two single table lookups internally.)
547 */
internal_operation_count(AQP::enum_access_type accessType)548 uint internal_operation_count(AQP::enum_access_type accessType) {
549 switch (accessType) {
550 case AQP::AT_PRIMARY_KEY:
551 case AQP::AT_ORDERED_INDEX_SCAN:
552 case AQP::AT_MULTI_PRIMARY_KEY:
553 case AQP::AT_MULTI_MIXED:
554 case AQP::AT_TABLE_SCAN:
555 return 1;
556
557 // Unique key lookups is mapped to two primary key lookups internally.
558 case AQP::AT_UNIQUE_KEY:
559 case AQP::AT_MULTI_UNIQUE_KEY:
560 return 2;
561
562 default:
563 // Other access types are not pushable, so seeing them here is an error.
564 DBUG_ASSERT(false);
565 return 2;
566 }
567 }
568
569 /**
570 * If there is a pushable query starting with 'root'; add as many
571 * child operations as possible to this 'ndb_pushed_builder_ctx' starting
572 * with that join_root.
573 */
is_pushable_with_root()574 bool ndb_pushed_builder_ctx::is_pushable_with_root() {
575 DBUG_TRACE;
576
577 if (!maybe_pushable(m_join_root, PUSHABLE_AS_PARENT)) {
578 return false;
579 }
580
581 const uint root_no = m_join_root->get_access_no();
582 const AQP::enum_access_type access_type = m_join_root->get_access_type();
583 DBUG_ASSERT(access_type != AQP::AT_VOID);
584
585 if (access_type == AQP::AT_MULTI_UNIQUE_KEY) {
586 EXPLAIN_NO_PUSH(
587 "Table '%s' is not pushable, "
588 "access type 'MULTI_UNIQUE_KEY' not implemented",
589 m_join_root->get_table()->alias);
590 return false;
591 }
592
593 if (m_join_root->filesort_before_join()) {
594 EXPLAIN_NO_PUSH(
595 "Table '%s' is not pushable, "
596 "need filesort before joining child tables",
597 m_join_root->get_table()->alias);
598 return false;
599 }
600
601 /**
602 * Past this point we know at least root to be pushable as parent
603 * operation. Search remaining tables appendable if '::is_pushable_as_child()'
604 */
605 DBUG_PRINT("info",
606 ("Table %d is pushable as root", m_join_root->get_access_no()));
607 DBUG_EXECUTE("info", m_join_root->dbug_print(););
608 m_fld_refs = 0;
609 m_const_scope.set_prefix(root_no);
610 m_join_scope.add(root_no);
611 m_internal_op_count = internal_operation_count(access_type);
612
613 /**
614 * Analyze tables below 'm_join_root' as potential members of a pushed
615 * join query starting with root.
616 * As part of analyzing the outer join and semi join structure,
617 * we use the join- and semi-join-nest structures set up by the optimizer,
618 * available trough the Abstract Query Plan (AQP) interface.
619 * See further documentation of how the nest structure is
620 * represented in m_tables[] in ha_ndbcluster_push.h.
621 */
622 {
623 const uint last_table = m_plan.get_access_count() - 1;
624 DBUG_ASSERT(m_plan.get_table_access(0)->get_first_inner() == 0);
625 DBUG_ASSERT(m_plan.get_table_access(0)->get_last_inner() == last_table);
626
627 ndb_table_access_map upper_nests;
628 ndb_table_access_map inner_nest;
629 ndb_table_access_map sj_nest;
630
631 uint first_inner = m_join_root->get_first_inner();
632 uint last_inner = m_join_root->get_last_inner();
633 int first_upper = m_join_root->get_first_upper();
634 if (root_no > first_inner) {
635 // m_join_root was not the 'first_inner' in nest;
636 // Last_inner / first_upper is only reliable read at first_inner:
637 last_inner = m_plan.get_table_access(first_inner)->get_last_inner();
638 first_upper = m_plan.get_table_access(first_inner)->get_first_upper();
639 }
640 int first_sj_inner = m_join_root->get_first_sj_inner();
641
642 m_tables[root_no].m_first_inner = first_inner;
643 m_tables[root_no].m_last_inner = last_inner;
644 m_tables[root_no].m_first_upper = first_upper;
645
646 for (uint tab_no = root_no; tab_no <= last_table; tab_no++) {
647 AQP::Table_access *table = m_plan.get_table_access(tab_no);
648
649 // Set up join-nest for this tab_no
650 if (table->get_first_inner() == first_inner) {
651 // Still in the join-nest starting at 'first_inner'
652 m_tables[tab_no] = m_tables[first_inner];
653 } else {
654 DBUG_ASSERT(table->get_first_inner() == tab_no);
655
656 // Enter new inner nest
657 upper_nests = m_tables[first_inner].m_upper_nests;
658 upper_nests.add(inner_nest);
659 inner_nest.clear_all();
660 first_upper = first_inner;
661 first_inner = tab_no;
662 last_inner = table->get_last_inner();
663
664 m_tables[first_inner].m_first_inner = first_inner;
665 m_tables[first_inner].m_last_inner = last_inner;
666 m_tables[first_inner].m_first_upper = first_upper;
667 m_tables[first_inner].m_upper_nests = upper_nests;
668 }
669 m_tables[tab_no].m_inner_nest = inner_nest;
670 inner_nest.add(tab_no);
671
672 /**
673 * Build similar info for sj_nest. Note that sj_nests are not nested
674 * inside other sj_nests. Thus there are no 'upper_sj_nests', and the
675 * logic for leaving a sj_nest becomes much simpler.
676 * (No un-nesting of nests)
677 */
678 if (table->get_first_sj_inner() >= 0) {
679 if (table->get_first_sj_inner() == first_sj_inner) {
680 // still within same sj_nest starting at first_sj_inner.
681 } else if (table->get_first_sj_inner() == (int)tab_no) {
682 // Start new sj_nest
683 first_sj_inner = table->get_first_sj_inner();
684 sj_nest.clear_all();
685 }
686 sj_nest.add(tab_no);
687 } else {
688 // Not in a sj_nest any longer
689 first_sj_inner = -1;
690 sj_nest.clear_all();
691 }
692 m_tables[tab_no].m_sj_nest = sj_nest;
693
694 /**
695 * Use is_pushable_as_child() to analyze whether this table is
696 * pushable as part of query starting with 'root'. Note that
697 * outer- and semi-joined table scans can not be completely analyzed
698 * by is_pushable_as_child(): Pushability also depends on that all
699 * later tables in the same nest are pushed, and that there are no
700 * unpushed conditions for any (later) tables in this nest.
701 * These extra conditions are later checked by validate_join_nest(),
702 * when the nest is completed. This may cause some tables which passed
703 * the first pushability check, to later fail and be removed. This
704 * also has a cascading effect on any tables depending on those
705 * being removed. (See validate_join_nest() and remove_pushable())
706 */
707 if (table == m_join_root || // root, already known pushable
708 is_pushable_as_child(table)) { // else, check child pushable
709 if (!ndbcluster_is_lookup_operation(table->get_access_type())) {
710 // A pushable table scan, collect in bitmap for later checks
711 m_scan_operations.add(tab_no);
712 }
713 }
714
715 /**
716 * This table can be the last inner table of join-nest(s).
717 * That will require additional pushability checks of entire nest
718 */
719 if (table->get_last_sj_inner() == (int)tab_no) {
720 if (first_sj_inner > (int)root_no) { // Leaving the semi_join nest
721 // Phase 2 of pushability check, see big comment above.
722 validate_join_nest(sj_nest, first_sj_inner, tab_no, "semi");
723 }
724 first_sj_inner = -1;
725 sj_nest.clear_all();
726 }
727
728 /**
729 * Note that the same tab_no may unwind several inner join-nests.
730 * ... all having the same 'last_inner' (this tab_no)
731 */
732 while (tab_no == last_inner && // End of current join-nest, and
733 first_upper >= 0) { // has an embedding upper nest
734 if (first_inner > root_no) { // Leaving an outer joined nest
735 // Phase 2 of pushability check, see big comment above.
736 validate_join_nest(inner_nest, first_inner, tab_no, "outer");
737 }
738
739 // The upper_nest becomes our new inner_nest when we 'unwind'.
740 ndb_table_access_map upper_nest(upper_nests);
741 upper_nest.subtract(m_tables[first_upper].m_upper_nests);
742 inner_nest = upper_nest;
743 upper_nests = m_tables[first_upper].m_upper_nests;
744 first_inner = first_upper;
745
746 /**
747 * Note that we may 'unwind' to a nest level above where we started as
748 * root. m_tables[first_upper] will then not hold the last_inner,
749 * first_upper, so we need to read it from the AQP interface instead.
750 */
751 last_inner = m_plan.get_table_access(first_upper)->get_last_inner();
752 first_upper = m_plan.get_table_access(first_upper)->get_first_upper();
753
754 } // while 'leaving a nest'
755 } // for tab_no [root_no..last_table]
756 DBUG_ASSERT(upper_nests.is_clear_all());
757 }
758 DBUG_ASSERT(m_join_scope.contain(root_no));
759 return (m_join_scope.last_table() > root_no); // Anything pushed?
760 } // ndb_pushed_builder_ctx::is_pushable_with_root()
761
762 /***************************************************************
763 * is_pushable_as_child()
764 *
765 * Determines if the specified child ('table') can be appended to
766 * an existing chain of previously pushed join operations.
767 *
768 * To be considered pushable the child operation should:
769 *
770 * 1) Have an REF to the previous parent operations.
771 * 2) Refer only a single parent, or a grandparent reachable through
772 * a single parent common to all key fields in the 'REF'
773 *
774 * In order to increase pushability we use the COND_EQUAL sets
775 * to resolve cases (2) above) where multiple parents are refered.
776 * If needed to make a child pushable, we replace parent
777 * references with another from the COND_EQUAL sets which make
778 * it pushable .
779 ****************************************************************/
is_pushable_as_child(AQP::Table_access * table)780 bool ndb_pushed_builder_ctx::is_pushable_as_child(AQP::Table_access *table) {
781 DBUG_TRACE;
782 const uint root_no = m_join_root->get_access_no();
783 const uint tab_no = table->get_access_no();
784 DBUG_ASSERT(tab_no > root_no);
785
786 if (!maybe_pushable(table, PUSHABLE_AS_CHILD)) {
787 return false;
788 }
789
790 const AQP::enum_access_type root_type = m_join_root->get_access_type();
791 const AQP::enum_access_type access_type = table->get_access_type();
792
793 if (!(ndbcluster_is_lookup_operation(access_type) ||
794 access_type == AQP::AT_ORDERED_INDEX_SCAN)) {
795 EXPLAIN_NO_PUSH(
796 "Can't push table '%s' as child, 'type' must be a 'ref' access",
797 table->get_table()->alias);
798 table->set_table_properties(table->get_table_properties() &
799 ~PUSHABLE_AS_CHILD);
800 return false;
801 }
802
803 // There is a limitation in not allowing LOOKUP - (index)SCAN operations
804 if (access_type == AQP::AT_ORDERED_INDEX_SCAN &&
805 ndbcluster_is_lookup_operation(root_type)) {
806 EXPLAIN_NO_PUSH(
807 "Push of table '%s' as scan-child "
808 "with lookup-root '%s' not implemented",
809 table->get_table()->alias, m_join_root->get_table()->alias);
810 // 'table' may still be PUSHABLE_AS_CHILD with another parent
811 return false;
812 }
813
814 const uint no_of_key_fields = table->get_no_of_key_fields();
815 if (unlikely(no_of_key_fields > ndb_pushed_join::MAX_LINKED_KEYS)) {
816 EXPLAIN_NO_PUSH(
817 "Can't push table '%s' as child, "
818 "too many ref'ed parent fields",
819 table->get_table()->alias);
820 table->set_table_properties(
821 table->get_table_properties() &
822 ~PUSHABLE_AS_CHILD); // Permanently disable as child
823 return false;
824 }
825
826 for (uint i = tab_no; i > root_no; i--) {
827 if (m_plan.get_table_access(i)->uses_join_cache()) {
828 EXPLAIN_NO_PUSH(
829 "Cannot push table '%s' as child of table '%s'. Doing so "
830 "would prevent using join buffer for table '%s'.",
831 table->get_table()->alias, m_join_root->get_table()->alias,
832 m_plan.get_table_access(i)->get_table()->alias);
833 return false;
834 }
835 }
836
837 // Check that we do not exceed the max number of pushable operations.
838 const uint internal_ops_needed = internal_operation_count(access_type);
839 if (unlikely(m_internal_op_count + internal_ops_needed >
840 NDB_SPJ_MAX_TREE_NODES)) {
841 EXPLAIN_NO_PUSH(
842 "Cannot push table '%s' as child of '%s'. Max number"
843 " of pushable tables exceeded.",
844 table->get_table()->alias, m_join_root->get_table()->alias);
845 return false;
846 }
847 m_internal_op_count += internal_ops_needed;
848
849 DBUG_PRINT("info",
850 ("Table:%d, Checking %d REF keys", tab_no, no_of_key_fields));
851
852 /**
853 * Calculate the set of possible parents for each non-const_item KEY_PART
854 * from the table. In addition to the parent table directly referred
855 * by the KEY_PART, any tables in *same join nest*, available by usage of
856 * equality sets are also added as a possible parent.
857 *
858 * The set of 'key_parents[]' are saved for later usage by ::optimize_*(),
859 * which will select the actual parent to be used for each table.
860 *
861 * We also aggregate the set of 'all_parents' referred by the keys.
862 * This is used for checking whether table is pushable.
863 */
864 ndb_table_access_map all_parents;
865 ndb_table_access_map *key_parents =
866 new (*THR_MALLOC) ndb_table_access_map[no_of_key_fields];
867 m_tables[tab_no].m_key_parents = key_parents;
868
869 for (uint key_part_no = 0; key_part_no < no_of_key_fields; key_part_no++) {
870 const Item *const key_item = table->get_key_field(key_part_no);
871 const KEY_PART_INFO *key_part = table->get_key_part_info(key_part_no);
872
873 if (key_item->const_item()) // REF is a literal or field from const-table
874 {
875 DBUG_PRINT("info", (" Item type:%d is 'const_item'", key_item->type()));
876 if (!is_const_item_pushable(key_item, key_part)) {
877 return false;
878 }
879 } else if (key_item->type() == Item::FIELD_ITEM) {
880 /**
881 * Calculate all parents FIELD_ITEM may refer - Including those
882 * available through usage of equality sets. All field_parents
883 * will be from within the same join_nest.
884 * Only parents within m_join_scope are considered.
885 */
886 ndb_table_access_map field_parents;
887 if (!is_field_item_pushable(table, key_item, key_part, field_parents)) {
888 return false;
889 }
890 // Save the found key_parents[], aggregate total set of parents referable.
891 key_parents[key_part_no] = field_parents;
892 all_parents.add(field_parents);
893 } else {
894 EXPLAIN_NO_PUSH(
895 "Can't push table '%s' as child, "
896 "column '%s' does neither 'ref' a column nor a constant",
897 table->get_table()->alias, key_part->field->field_name);
898 table->set_table_properties(
899 table->get_table_properties() &
900 ~PUSHABLE_AS_CHILD); // Permanently disable as child
901
902 return false;
903 }
904 } // for (uint key_part_no= 0 ...
905
906 // If no parent candidates within current m_join_scope, table is unpushable.
907 if (all_parents.is_clear_all()) {
908 EXPLAIN_NO_PUSH(
909 "Can't push table '%s' as child of '%s', "
910 "no parent-child dependency exists between these tables",
911 table->get_table()->alias, m_join_root->get_table()->alias);
912 return false;
913 }
914
915 /**
916 * Try to push condition to 'table'. Whatever we could not push of the
917 * condition is a 'server side condition' which the server has to
918 * evaluate later. The existence of such conditions may effect the join
919 * pushability of tables, so we need to try to push conditions first.
920 */
921 const Item *pending_cond = table->get_condition();
922 if (pending_cond != nullptr &&
923 current_thd->optimizer_switch_flag(
924 OPTIMIZER_SWITCH_ENGINE_CONDITION_PUSHDOWN)) {
925 ha_ndbcluster *handler =
926 down_cast<ha_ndbcluster *>(table->get_table()->file);
927
928 const bool other_tbls_ok = false;
929 handler->m_cond.prep_cond_push(pending_cond, other_tbls_ok);
930 pending_cond = handler->m_cond.m_remainder_cond;
931 }
932 if (pending_cond != nullptr) {
933 /**
934 * An anti join will always have an 'antijoin_null_cond' attached.
935 * The general rule is that we do not allow any tables having unpushed
936 * conditions to be pushed as part of a SPJ operation. However, this
937 * special 'antijoin_null_cond' could be ignored, as the same NULL-only
938 * filtering is done by the antijoin execution at the server.
939 */
940 if (!(table->is_antijoin() && is_antijoin_null_cond(pending_cond))) {
941 m_has_pending_cond.add(tab_no);
942 }
943 }
944 if (!ndbcluster_is_lookup_operation(table->get_access_type())) {
945 // Check extra limitations on when index scan is pushable,
946 if (!is_pushable_as_child_scan(table, all_parents)) {
947 return false;
948 }
949 }
950
951 const ndb_table_access_map inner_nest(m_tables[tab_no].m_inner_nest);
952 if (!inner_nest.contain(all_parents)) { // Is not a plain inner-join
953
954 ndb_table_access_map depend_parents;
955
956 /**
957 * Some key_parents[] could have dependencies outside of embedding_nests.
958 * Calculate the actual nest dependencies and check join pushability.
959 */
960 for (uint i = 0; i < no_of_key_fields; i++) {
961 if (!key_parents[i].is_clear_all()) { // Key refers a parent field
962 #ifndef DBUG_OFF
963 // Verify requirement that all field_parents are from within same nest
964 {
965 const uint last = key_parents[i].last_table(tab_no);
966 ndb_table_access_map nest(m_tables[last].m_inner_nest);
967 nest.add(last);
968 DBUG_ASSERT(nest.contain(key_parents[i]));
969 }
970 #endif
971 const uint first = key_parents[i].first_table();
972 depend_parents.add(first);
973 }
974 }
975
976 /**
977 * In the (unlikely) case of parent references to tables not
978 * in our embedding join nests at all, we have to make sure that we do
979 * not cause extra dependencies to be added between the referred join nests.
980 */
981 const ndb_table_access_map embedding_nests(
982 m_tables[tab_no].embedding_nests());
983 if (unlikely(!embedding_nests.contain(depend_parents))) {
984 if (!is_outer_nests_referable(table, depend_parents)) {
985 return false;
986 }
987 }
988
989 /**
990 * Calculate contribution to the 'nest dependency', which is the ancestor
991 * dependencies to tables not being part of this inner_nest themself.
992 * These ancestor dependencies are set as the required 'm_ancestors'
993 * on the 'first_inner' table in each nest, and later used to enforce
994 * ::optimize_query_plan() to use these tables as (grand-)parents
995 */
996 const uint first_inner = m_tables[tab_no].m_first_inner;
997 // Only interested in the upper-nest-level dependencies:
998 depend_parents.intersect(m_tables[first_inner].embedding_nests());
999
1000 // Can these outer parent dependencies co-exists with existing
1001 // ancestor dependencies?
1002 if (!depend_parents.is_clear_all() &&
1003 !m_tables[first_inner].m_ancestors.is_clear_all()) {
1004 ndb_table_access_map nest_dependencies(depend_parents);
1005 nest_dependencies.add(m_tables[first_inner].m_ancestors);
1006
1007 uint ancestor_no = first_inner;
1008 while (!embedding_nests.contain(nest_dependencies)) {
1009 ancestor_no = nest_dependencies.last_table(ancestor_no - 1);
1010 nest_dependencies.clear_bit(ancestor_no);
1011
1012 // If remaining dependencies are unavailable from parent, we can't push
1013 if (!m_tables[ancestor_no].embedding_nests().contain(
1014 nest_dependencies)) {
1015 const AQP::Table_access *const parent =
1016 m_plan.get_table_access(ancestor_no);
1017 EXPLAIN_NO_PUSH(
1018 "Can't push table '%s' as child of '%s', "
1019 "as it would make the parent table '%s' "
1020 "depend on table(s) outside of its join-nest",
1021 table->get_table()->alias, m_join_root->get_table()->alias,
1022 parent->get_table()->alias);
1023 return false;
1024 }
1025 }
1026 }
1027 m_tables[first_inner].m_ancestors.add(depend_parents);
1028 DBUG_ASSERT(!m_tables[first_inner].m_ancestors.contain(first_inner));
1029 }
1030
1031 m_join_scope.add(tab_no);
1032 return true;
1033 } // ndb_pushed_builder_ctx::is_pushable_as_child
1034
1035 /***************************************************************
1036 * is_pushable_within_nest() / is_pushable_as_child_scan()
1037 *
1038 * There are additional limitation on when an index scan is pushable
1039 * relative to a (single row) primary key or unique key lookup operation.
1040 *
1041 * Such limitations exists for index scan operation being outer- or
1042 * semi-joined: Consider the query:
1043 *
1044 * select * from t1 left join t2
1045 * on t1.attr=t2.ordered_index
1046 * where predicate(t1.row, t2. row);
1047 *
1048 * Where 'predicate' cannot be pushed to the ndb. (a 'pending_cond', above!)
1049 * The ndb api may then return:
1050 *
1051 * +---------+---------+
1052 * | t1.row1 | t2.row1 | (First batch)
1053 * | t1.row2 | t2.row1 |
1054 * ..... (NextReq).....
1055 * | t1.row1 | t2.row2 | (Next batch)
1056 * +---------+---------+
1057 *
1058 * Since we could not return all t2 rows matching 't1.row1' in the first
1059 * batch, it is repeated for the next batch of t2 rows. From mysqld POW it
1060 * will appear as a different row, even if it is the same rows as returned
1061 * in the first batch. This works just fine when the nest loop joiner
1062 * create a plain INNER JOIN result; the different instances of 't1.row1'
1063 * would just appear a bit out of order. However OUTER JOIN is a different
1064 * matter:
1065 *
1066 * Assume that the rows [t1.row1, t2.row1] from the first batch does not
1067 * satisfies 'predicate'. As there are no more 't1.row1's in this batch,
1068 * mysqld will conclude it has seen all t1.row1's without any matching
1069 * t2 rows, Thus it will create a NULL extended t2 row in the (outer joined)
1070 * result set.
1071 *
1072 * As the same t1.row1 will be returned from the NDB API in the next batch,
1073 * mysqld will create a result row also for this instance - Either with yet
1074 * another NULL-extended t2 row, or possibly one or multiple matching rows.
1075 * In either case resulting in an incorrect result set. Like:
1076 * +---------+---------+
1077 * | t1.row1 | NULL | -> Error!
1078 * | t1.row2 | t2.row1 |
1079 * | t1.row1 | t2.row2 |
1080 * +---------+---------+
1081 *
1082 * So in order to allow an outer joined index scan to be pushed, we need
1083 * to check that a row returned from a pushed index-scan will not later
1084 * be rejected by mysqld - i.e. the join has to be fully evaluated by SPJ
1085 * (in companion with the SPJ API):
1086 *
1087 * 1a) There should be no 'pending_cond' (unpushed conditions) on the
1088 * table.
1089 * 1b) Neither could any *other* tables within the same inner_join nest
1090 * have pending_cond's. (An inner join nest require matching rows
1091 * from all tables in the nest: A non-matching pending_cond on a row
1092 * from any table in the nest, will also eliminate the rows from the
1093 * other tables. (Possibly creating false NULL-extensions)
1094 * 1c) Neither should any tables within the upper nests have
1095 * pending_cond's. Consider the nest structure t1, (t2, (t3)),
1096 * where t1 and 'this' table t3 are scans. If t2 has a pending
1097 * condition, that condition may eliminate rows from the embedded
1098 * (outer joined) t3 nest, and result in false NULL extended rows
1099 * when t3 rows are fetched in multiple batches.
1100 * (Note that this restriction does not apply to the uppermost nest
1101 * containing t1: A non-matching condition on that table will eliminate
1102 * the t1 row as well, thus there will be no extra NULL extended
1103 * rows in the result set.
1104 *
1105 * 2) There should be no unpushed tables in:
1106 * 2b) In this inner_join nest.
1107 * 2c) In any upper nests of this table.
1108 *
1109 * This case is similar as for pending_cond: A non-match when mysqld
1110 * joins in the rows from the unpushed table may eliminate rows
1111 * returned from the pushed joins as well, resulting in extra
1112 * NULL extended rows.
1113 *
1114 * 3) In addition the join condition may explicitly specify dependencies
1115 * on tables which are not in either of the upper_nests,
1116 * eg t1, (t2,t3), (t4), where t4 has a join condition on t3.
1117 * If either:
1118 * 3a) t2 or t3 has an unpushed condition, possibly eliminating returned
1119 * (t2,t3) rows, and the t4 rows depending on these being NOT NULL.
1120 * 3b) t2 or t3 are not pushed, mysqld doesn't matching rows from these
1121 * tables, which also eliminate the t4 rows, possibly resulting in
1122 * extra NULL extended rows.
1123 *
1124 * Note that ::is_pushable_as_child_scan() can only check these conditions for
1125 * tables preceeding it in the query plan. ::validate_join_nest() will later
1126 * do similar checks when we have completed a nest level. The later check
1127 * would be sufficient, however we prefer to 'fail fast'.
1128 *
1129 ****************************************************************/
is_pushable_within_nest(const AQP::Table_access * table,ndb_table_access_map nest,const char * nest_type)1130 bool ndb_pushed_builder_ctx::is_pushable_within_nest(
1131 const AQP::Table_access *table, ndb_table_access_map nest,
1132 const char *nest_type) {
1133 DBUG_TRACE;
1134 DBUG_ASSERT(!ndbcluster_is_lookup_operation(table->get_access_type()));
1135 const uint tab_no = table->get_access_no();
1136
1137 // Logic below assume that 'this' table is not part of the 'nest'.
1138 nest.clear_bit(tab_no);
1139
1140 /**
1141 * 1) Check if outer- or semi-joined table depends on 'unpushed condition'
1142 */
1143 if (unlikely(m_has_pending_cond.contain(tab_no))) { // 1a)
1144 // This table has unpushed condition
1145 EXPLAIN_NO_PUSH(
1146 "Can't push %s joined table '%s' as child of '%s', "
1147 "table condition can not be fully evaluated by pushed join",
1148 nest_type, table->get_table()->alias, m_join_root->get_table()->alias);
1149 return false;
1150 }
1151
1152 if (unlikely(m_has_pending_cond.is_overlapping(nest))) { // 1b,1c:
1153 // Other (lookup tables) withing nest has unpushed condition
1154 ndb_table_access_map pending_conditions(m_has_pending_cond);
1155 pending_conditions.intersect(nest);
1156 // Report the closest violating table, may be multiple.
1157 const uint violating = pending_conditions.last_table(tab_no);
1158 EXPLAIN_NO_PUSH(
1159 "Can't push %s joined table '%s' as child of '%s', "
1160 "condition on its dependant table '%s' is not pushed down",
1161 nest_type, table->get_table()->alias, m_join_root->get_table()->alias,
1162 m_plan.get_table_access(violating)->get_table()->alias);
1163 return false;
1164 }
1165
1166 /**
1167 * 2) Check if outer- or semi-joined table depends on 'unpushed tables'
1168 */
1169 if (unlikely(!m_join_scope.contain(nest))) { // 2b,2c
1170 ndb_table_access_map unpushed_tables(nest);
1171 unpushed_tables.subtract(m_join_scope);
1172 // Report the closest unpushed table, may be multiple.
1173 const uint violating = unpushed_tables.last_table(tab_no);
1174 EXPLAIN_NO_PUSH(
1175 "Can't push %s joined table '%s' as child of '%s', "
1176 "table '%s' in its dependant join-nest(s) is not part of the "
1177 "pushed join",
1178 nest_type, table->get_table()->alias, m_join_root->get_table()->alias,
1179 m_plan.get_table_access(violating)->get_table()->alias);
1180 return false;
1181 }
1182 return true;
1183 }
1184
is_pushable_as_child_scan(const AQP::Table_access * table,const ndb_table_access_map all_parents)1185 bool ndb_pushed_builder_ctx::is_pushable_as_child_scan(
1186 const AQP::Table_access *table, const ndb_table_access_map all_parents) {
1187 DBUG_TRACE;
1188 DBUG_ASSERT(!ndbcluster_is_lookup_operation(table->get_access_type()));
1189
1190 const uint root_no = m_join_root->get_access_no();
1191 const uint tab_no = table->get_access_no();
1192
1193 if (m_tables[tab_no].isOuterJoined(m_tables[root_no])) {
1194 /**
1195 * Is an outer join relative to root. Even if tab_no is inner_joined with
1196 * another parent than 'root', any restrictions on scan operations still
1197 * apply.
1198 */
1199
1200 /**
1201 * Online upgrade, check if we are connected to a 'ndb' allowing us to push
1202 * outer joined scan operation (ver >= 8.0.20), Else we reject pushing.
1203 */
1204 if (unlikely(!NdbQueryBuilder::outerJoinedScanSupported(m_thd_ndb->ndb))) {
1205 EXPLAIN_NO_PUSH(
1206 "Can't push table '%s' as child of '%s', "
1207 "outer join of scan-child not implemented",
1208 table->get_table()->alias, m_join_root->get_table()->alias);
1209 return false;
1210 }
1211
1212 /**
1213 * Calculate the set of tables being outer joined relative to root.
1214 * i.e. the tables which may be incorrectly NULL extended due to
1215 * unpushed conditions and tables. These are the tables we check
1216 * the above 1b,1c,2b and 2c cases against.
1217 */
1218 ndb_table_access_map outer_join_nests(m_tables[tab_no].embedding_nests());
1219 outer_join_nests.subtract(full_inner_nest(root_no, tab_no));
1220
1221 const char *join_type = table->is_antijoin() ? "anti" : "outer";
1222 if (!is_pushable_within_nest(table, outer_join_nests, join_type)) {
1223 return false;
1224 }
1225
1226 /**
1227 * 3) Check if any tables outside of the embedding nest are referred.
1228 */
1229 const ndb_table_access_map embedding_nests(
1230 m_tables[tab_no].embedding_nests());
1231 if (unlikely(!embedding_nests.contain(all_parents))) { // 3)
1232 if (unlikely(!embedding_nests.contain(m_has_pending_cond))) { // 3a)
1233 EXPLAIN_NO_PUSH(
1234 "Can't push %s joined table '%s' as child of '%s', "
1235 "exists unpushed condition in join-nests it depends on",
1236 join_type, table->get_table()->alias,
1237 m_join_root->get_table()->alias);
1238 return false;
1239 }
1240
1241 // Calculate all unpushed tables prior to this table.
1242 ndb_table_access_map unpushed_tables;
1243 unpushed_tables.set_prefix(tab_no);
1244 unpushed_tables.subtract(m_const_scope);
1245 unpushed_tables.subtract(m_join_scope);
1246
1247 /**
1248 * Note that the check below is a bit too strict, we check:
1249 * 'Are there any unpushed tables outside of our embedding nests',
1250 * instead of 'Do we refer tables from nests outside embedding nests,
1251 * having unpushed tables'. As we alread know 'all_parents' are not
1252 * contained in 'embedding'.
1253 * The outcome should be the same except if we have parent refs to
1254 * multiple non-embedded nests. (very unlikely)
1255 */
1256 if (unlikely(!embedding_nests.contain(unpushed_tables))) { // 3b)
1257 EXPLAIN_NO_PUSH(
1258 "Can't push %s joined table '%s' as child of '%s', "
1259 "table depends on join-nests with unpushed tables",
1260 join_type, table->get_table()->alias,
1261 m_join_root->get_table()->alias);
1262 return false;
1263 }
1264 }
1265 } // end 'outer joined scan'
1266
1267 /**
1268 * As for outer joins, there are restrictions for semi joins:
1269 *
1270 * Scan-scan result may return the same ancestor-scan rowset
1271 * multiple times when rowset from child scan has to be fetched
1272 * in multiple batches (as above). This is fine for nested loop
1273 * evaluations of pure loops, as it should just produce the total
1274 * set of join combinations - in any order.
1275 *
1276 * However, the different semi join strategies (FirstMatch,
1277 * Loosescan, Duplicate Weedout) requires that skipping
1278 * a row (and its nested loop ancestors) is 'permanent' such
1279 * that it will never reappear in later batches.
1280 *
1281 * So we do not (yet) allow an index-scan to be semi-joined.
1282 *
1283 * Note that it is the semi_join properties relative to the
1284 * other tables we join with which matter - A table joining
1285 * with another table within the same semi_join nest is an
1286 * INNER JOIN wrt. that other table. (Which is pushable)
1287 */
1288
1289 if (table->is_sj_firstmatch() &&
1290 NdbQueryBuilder::outerJoinedScanSupported(m_thd_ndb->ndb)) {
1291 // 'table' is part of a semi-join
1292 // (We support semi-join only if firstMatch strategy is used)
1293 DBUG_ASSERT(m_tables[tab_no].m_sj_nest.contain(table->get_access_no()));
1294
1295 if (!is_pushable_within_nest(table, m_tables[tab_no].m_sj_nest, "semi")) {
1296 return false;
1297 }
1298 if (table->get_first_sj_inner() == (int)tab_no) {
1299 /**
1300 * In order to do correct firstmatch duplicate elimination in
1301 * SPJ, we need to ensure that the table to eliminate duplicates
1302 * from is the parent of the firstmast-sj-nest -> enforce it
1303 * as a mandatory ancestor of the sj-nest.
1304 */
1305 const int firstmatch_return = table->get_firstmatch_return();
1306 if (!all_parents.contain(firstmatch_return)) {
1307 EXPLAIN_NO_PUSH(
1308 "Can't push table '%s' as child of '%s', "
1309 "the FirstMatch-return '%s' can not be made the parent of sj-nest",
1310 table->get_table()->alias, m_join_root->get_table()->alias,
1311 m_plan.get_table_access(firstmatch_return)->get_table()->alias);
1312 return false;
1313 }
1314 m_tables[tab_no].m_ancestors.add(firstmatch_return);
1315 }
1316 } else if (!m_tables[tab_no].m_sj_nest.is_clear_all()) {
1317 if (!m_tables[tab_no].m_sj_nest.contain(m_join_scope)) {
1318 // Semi-joined relative to some other tables in join_scope
1319 EXPLAIN_NO_PUSH(
1320 "Can't push table '%s' as child of '%s', "
1321 "semi join of scan-child not implemented",
1322 table->get_table()->alias, m_join_root->get_table()->alias);
1323 return false;
1324 }
1325 } else if (!m_tables[root_no].m_sj_nest.is_clear_all()) {
1326 // Root is part of a semi join, table is not
1327 EXPLAIN_NO_PUSH(
1328 "Can't push table '%s' as child of '%s', "
1329 "not members of same semi join 'nest'",
1330 table->get_table()->alias, m_join_root->get_table()->alias);
1331 return false;
1332 }
1333 // end 'semi_join' handling
1334
1335 /**
1336 * Note, for both 'outer join', and 'semi joins restriction above:
1337 *
1338 * The restriction could have been lifted if we could
1339 * somehow ensure that all rows from a child scan are fetched
1340 * before we move to the next ancestor row.
1341 *
1342 * Which is why we do not force the same restrictions on lookup.
1343 */
1344
1345 return true;
1346 } // ndb_pushed_builder_ctx::is_pushable_as_child_scan
1347
1348 /***************************************************************
1349 *
1350 * is_outer_nests_referable()
1351 *
1352 * In the (unlikely) case of parent references to tables not
1353 * in our embedding join nests, we have to make sure that we do
1354 * not cause extra dependencies to be added between the join nests.
1355 * (Which would have changed the join semantic specified in query)
1356 *
1357 * If this table has multiple dependencies, it can only be added to
1358 * the set of pushed tables if the dependent tables themself
1359 * depends, or could be make dependent, on each other.
1360 *
1361 * Such new dependencies can only be added iff all 'depend_parents'
1362 * are in the same 'inner join nest', i.e. we can not add *new*
1363 * dependencies on outer joined tables (or nests).
1364 *
1365 * A typical example is t1 oj (t2) oj (t3) oj (t4), where t4.join_cond
1366 * refers *both* the non_embedding tables t2 and t3. In such cases t4 can not
1367 * be pushed unless t3 already has a join condition depending on t2.
1368 * Note that the header file ha_ndbcluster_push.h contains more
1369 * extensive comments regarding this.
1370 *
1371 * Algorithm:
1372 * 1. Calculate the minimum set of 'dependencies' for the
1373 * key_parents[].
1374 *
1375 * 2. Check the 'dependencies' set, starting at the last (the
1376 * table closest to this table). Check that it either already
1377 * exists a dependency between each such table and the remaining
1378 * dependant tables, or that we are allowed to add the required
1379 * dependencies.
1380 ***************************************************************/
is_outer_nests_referable(const AQP::Table_access * table,const ndb_table_access_map depend_parents)1381 bool ndb_pushed_builder_ctx::is_outer_nests_referable(
1382 const AQP::Table_access *table, const ndb_table_access_map depend_parents) {
1383 DBUG_TRACE;
1384
1385 const uint tab_no = table->get_access_no();
1386 const uint first_inner = m_tables[tab_no].m_first_inner;
1387 const ndb_table_access_map embedding_nests(
1388 m_tables[tab_no].embedding_nests());
1389 DBUG_ASSERT(!embedding_nests.contain(depend_parents));
1390
1391 /**
1392 * Include nest-level ancestor dependencies already enforced.
1393 */
1394 ndb_table_access_map dependencies(depend_parents);
1395 dependencies.add(m_tables[first_inner].m_ancestors);
1396
1397 /**
1398 * Check that all parents we depend on are available from within the
1399 * embedding nests. This include upper_nests previously extended
1400 * with previous references to tables not in the direct line of
1401 * upper nests. Which then become a part of later embedded_nests being
1402 * referrable.
1403 */
1404 {
1405 const uint parent_no = dependencies.last_table(tab_no - 1);
1406 dependencies.clear_bit(parent_no);
1407
1408 // If remaining dependencies are unavailable from parent, we can't push
1409 if (!m_tables[parent_no].embedding_nests().contain(dependencies)) {
1410 const AQP::Table_access *const parent =
1411 m_plan.get_table_access(parent_no);
1412 EXPLAIN_NO_PUSH(
1413 "Can't push table '%s' as child of '%s', "
1414 "as it would make the parent table '%s' "
1415 "depend on table(s) outside of its join-nest",
1416 table->get_table()->alias, m_join_root->get_table()->alias,
1417 parent->get_table()->alias);
1418 return false;
1419 }
1420
1421 /**
1422 * Allow all tables in the referred parents nest to become
1423 * part of the set of later referrable upper_nests.
1424 */
1425 if (parent_no < first_inner) {
1426 // referred nest is not embedded within current inner_nest
1427 DBUG_ASSERT(m_tables[parent_no].m_last_inner < tab_no);
1428
1429 /**
1430 * Referring the outer-joined parent, introduce the requirement
1431 * that all our upper_nest table either has to be in the same
1432 * inner_nest as the parent, or be in the parents upper_nest.
1433 * Rebuild our upper_nests to reflect this.
1434 */
1435 ndb_table_access_map new_upper_nests(m_tables[parent_no].m_upper_nests);
1436 new_upper_nests.add(full_inner_nest(parent_no, tab_no));
1437 m_tables[first_inner].m_upper_nests = new_upper_nests;
1438 m_tables[tab_no].m_upper_nests = new_upper_nests;
1439 }
1440 }
1441 return true;
1442 }
1443
1444 /*****************************************************************************
1445 * validate_join_nest()
1446 *
1447 * A join-nest has been completed by ::is_pushable_with_root().
1448 * If the join nest is outer joined with other tables in the pushed join, and
1449 * if this nest, or other nests embedded within it contains (outer joined)
1450 * table scans, an extra 'validate' of the pushed joins is required:
1451 *
1452 * We need to 'validate' that none of these 'invalid' cases exists for
1453 * the join nest:
1454 *
1455 * 1) Some of the tables in the nest were not pushed.
1456 * 2) Some of the pushed tables in the nest has (remaining parts of)
1457 * conditions not being pushed.
1458 * 3) This nest, or some nests embedded within it, has a 'FOUND_MATCH' trigger,
1459 * condition covering tables in this nest. (Which effectively means the
1460 * condition act as a filter condition on this nest, as in 2) )
1461 *
1462 * The above restriction are similar to the ones checked for outer joined
1463 * table scans in is_pushable_as_child(), where we preferably try to catch
1464 * these restrictions. However, at that point in time we are not able to
1465 * perform this check for tables later in the query plan.
1466 *
1467 * So we need similar checks for validating the entire nest when it has been
1468 * completed. If the nest fails the 'validate', no outer joined table scans
1469 * should have been pushed as part of the nest, or in nests embedded within
1470 * this nest. Thus they have to be removed from the pushed join.
1471 * (Using ::remove_pushable())
1472 *
1473 * Note that validate_join_nest() check the entire nest, so the similar
1474 * checks on outer joined scans could have been skipped from
1475 * is_pushable_as_child(). However, we want to catch these non pushable
1476 * tables as early as possible, so we effectively duplicates these checks.
1477 ******************************************************************************/
validate_join_nest(ndb_table_access_map inner_nest,const uint first_inner,const uint last_inner,const char * nest_type)1478 void ndb_pushed_builder_ctx::validate_join_nest(ndb_table_access_map inner_nest,
1479 const uint first_inner,
1480 const uint last_inner,
1481 const char *nest_type) {
1482 DBUG_TRACE;
1483 if (first_inner <= m_join_root->get_access_no()) return;
1484
1485 // This nest, or nests embedded within it, has scan operations?
1486 const bool nest_has_scans =
1487 (m_scan_operations.first_table(first_inner) < m_plan.get_access_count());
1488 if (nest_has_scans) {
1489 ndb_table_access_map filter_cond;
1490
1491 /**
1492 * Check conditions inside nest(s) for possible FOUND_MATCH-triggers.
1493 * These are effectively evaluated 'higher up' in the nest structure
1494 * when we have found a join-match, or created a null-extension
1495 * for all 'used_tables()' in the trigger condition.
1496 * So we collect the aggregated map of tables possibly affected by
1497 * these MATCH-filters in 'filter_cond'
1498 *
1499 * Example: select straight_join *
1500 * from
1501 * t1 left join
1502 * (t1 as t2 join t1 as t3 on t3.a = t2.b)
1503 * on t2.a = t1.b
1504 * where (t2.c > t1.c or t1.c < 0);
1505 *
1506 * or: 't1 oj (t2,t3) where t2.c > t1.c or t1.c < 0'
1507 *
1508 * The where condition refers columns from the outer joined nest (t2,t3)
1509 * which are possibly NULL extended. Thus, the where cond is encapsulated in
1510 * a triggered-FOUND_MATCH(t2,t3), effectively forcing the cond. to be
1511 * evaluated only when we have a non-NULL extended match for t2,t3.
1512 * For some (legacy?) reason the optimizer will attach the trigger condition
1513 * to table t2 in the query plan 't1,t2,t3', as all referred tables(t1,t2)
1514 * are available at this point.
1515 * However, this ignores the encapsulating FOUND_MATCH(t2,t3) trigger,
1516 * which require the condition to also have a matching t3 row. The
1517 * WalkItem below will identify such triggers and calculate the real table
1518 * coverage of them.
1519 *
1520 * Note that 'explain format=tree' will represent such filters in a more
1521 * sensible way: (We don't use the Iterators here (yet) though)
1522 *
1523 * -> Filter: ((t2.c > t1.c) or (t1.c < 0))
1524 * -> Nested loop left join
1525 * -> Table scan on t1
1526 * -> Nested loop inner join
1527 * -> Index lookup on t2 using PRIMARY (a=t1.b),
1528 * -> Index lookup on t3 using PRIMARY (a=t2.b)
1529 *
1530 * The Iterators place the filter on 'top of' the t1..t3 evaluation.
1531 * The FOUND_MATCH(t2,t3) has also been eliminated, as we know there is
1532 * a (t2,t3) match at this point of execution.
1533 */
1534 for (uint tab_no = first_inner; tab_no <= last_inner; tab_no++) {
1535 AQP::Table_access *table = m_plan.get_table_access(tab_no);
1536 const Item *cond = table->get_condition();
1537 if (cond != nullptr) {
1538 // Condition could possibly be a 'antijoin_null_cond', in which case
1539 // the pending_cond flag has been cleared, it should then be ignored.
1540 if (m_join_scope.contain(tab_no) && !m_has_pending_cond.contain(tab_no))
1541 continue;
1542
1543 struct {
1544 table_map nest_scope; // Aggregated 'inner_tables' scope of triggers
1545 table_map found_match; // FOUND_MATCH-trigger scope
1546 } trig_cond = {0, 0};
1547
1548 // Check 'cond' for match trigger / filters
1549 WalkItem(const_cast<Item *>(cond), enum_walk::PREFIX,
1550 [&trig_cond](Item *item) {
1551 const Item_func_trig_cond *func_trig =
1552 GetTriggerCondOrNull(item);
1553 if (func_trig != nullptr) {
1554 /**
1555 * The FOUND_MATCH-trigger may be encapsulated inside
1556 * multiple IS_NOT_NULL_COMPL-triggers, which defines
1557 * the scope of the triggers. Aggregate these
1558 * 'inner_tables' scopes.
1559 */
1560 trig_cond.nest_scope |= func_trig->get_inner_tables();
1561
1562 if (func_trig->get_trig_type() ==
1563 Item_func_trig_cond::FOUND_MATCH) {
1564 // The FOUND_MATCH-trigger is evaluated on top of
1565 // the collected trigger nest_scope.
1566 trig_cond.found_match |= trig_cond.nest_scope;
1567 return true; // break out of this cond-branch
1568 }
1569 }
1570 return false; // continue WalkItem
1571 }); // End WalkItem' and lambda func
1572
1573 if (trig_cond.found_match != 0) {
1574 const ndb_table_access_map map = get_table_map(trig_cond.found_match);
1575
1576 /**
1577 * Only FOUND_MATCH-triggers partly overlapping join_scope will
1578 * restrict push. (Else it is completely evaluated either before
1579 * or after the pushed_join, thus does not affect it.
1580 */
1581 if (map.is_overlapping(m_join_scope) && !map.contain(m_join_scope)) {
1582 filter_cond.add(map);
1583 }
1584 }
1585 }
1586 }
1587
1588 // Check each of the 3 reject reasons from the topmost comment
1589 const bool nest_has_unpushed = !m_join_scope.contain(inner_nest);
1590 const bool nest_has_filter_cond = inner_nest.is_overlapping(filter_cond);
1591 const bool nest_has_pending_cond =
1592 inner_nest.is_overlapping(m_has_pending_cond);
1593
1594 if (nest_has_pending_cond || nest_has_unpushed || nest_has_filter_cond) {
1595 /**
1596 * Check all pushed scan operations in this nest, and nests embedded
1597 * within it. Note that it is the rows from scans in the upper nest
1598 * which may be repeated, creating false NULL extended rows from scans
1599 * in inner_nests.
1600 */
1601 for (uint tab_no = m_scan_operations.first_table(first_inner);
1602 tab_no <= last_inner;
1603 tab_no = m_scan_operations.first_table(tab_no + 1)) {
1604 DBUG_ASSERT(m_join_scope.contain(tab_no));
1605 const AQP::Table_access *const table = m_plan.get_table_access(tab_no);
1606
1607 /**
1608 * Could have checked all 3 reject conditions at once, but would
1609 * like to provide separate EXPLAIN_NO_PUSH's for each of them.
1610 */
1611 if (nest_has_unpushed) {
1612 EXPLAIN_NO_PUSH(
1613 "Can't push %s joined table '%s' as child of '%s', "
1614 "some tables in embedding join-nest(s) are not part of pushed "
1615 "join",
1616 nest_type, table->get_table()->alias,
1617 m_join_root->get_table()->alias);
1618 remove_pushable(table);
1619 } else if (nest_has_pending_cond) {
1620 EXPLAIN_NO_PUSH(
1621 "Can't push %s joined table '%s' as child of '%s', "
1622 "join-nest containing the table has pending unpushed_conditions",
1623 nest_type, table->get_table()->alias,
1624 m_join_root->get_table()->alias);
1625 remove_pushable(table);
1626 } else if (nest_has_filter_cond) {
1627 EXPLAIN_NO_PUSH(
1628 "Can't push %s joined table '%s' as child of '%s', "
1629 "join-nest containing the table has a FILTER conditions",
1630 nest_type, table->get_table()->alias,
1631 m_join_root->get_table()->alias);
1632 remove_pushable(table);
1633 }
1634 }
1635 }
1636 } // nest_has_scans
1637 } // ndb_pushed_builder_ctx::validate_join_nest
1638
1639 /**********************************************************************
1640 * ::remove_pushable()
1641 *
1642 * A Table was first included in a pushed join query, but later found to
1643 * not be pushable. Thus it has to be removed by this method.
1644 *
1645 * All other pushed tables are checked for dependencies on the table
1646 * being removed, and possible cascade-removed if they can no longer
1647 * be part of the pushed join without the romoved table(s).
1648 **********************************************************************/
remove_pushable(const AQP::Table_access * table)1649 void ndb_pushed_builder_ctx::remove_pushable(const AQP::Table_access *table) {
1650 DBUG_TRACE;
1651
1652 const uint me = table->get_access_no();
1653 DBUG_ASSERT(m_join_scope.contain(me));
1654 m_join_scope.clear_bit(me);
1655
1656 // Cascade remove of tables depending on 'me'
1657 for (uint tab_no = me + 1; tab_no < m_plan.get_access_count(); tab_no++) {
1658 if (m_join_scope.contain(tab_no)) {
1659 table = m_plan.get_table_access(tab_no);
1660 ndb_table_access_map *key_parents = m_tables[tab_no].m_key_parents;
1661
1662 for (uint i = 0; i < table->get_no_of_key_fields(); i++) {
1663 if (!key_parents[i].is_clear_all()) {
1664 // Was referring some parent field(s) (not const, or params)
1665 // Remove parent referrences not in join_scope any more
1666 key_parents[i].intersect(m_join_scope);
1667
1668 if (key_parents[i].is_clear_all()) {
1669 // All preceeding parent tables removed from join_scope.
1670 m_join_scope.clear_bit(tab_no); // Cascade remove of this table
1671 break;
1672 }
1673 }
1674 }
1675 }
1676 m_tables[tab_no].m_ancestors.intersect(m_join_scope);
1677 }
1678 // Remove 'pending_cond' and 'scan_operations' not pushed any longer
1679 m_has_pending_cond.intersect(m_join_scope);
1680 m_scan_operations.intersect(m_join_scope);
1681 } // ndb_pushed_builder_ctx::remove_pushable
1682
1683 /*********************
1684 * This method examines a key item (could be part of a lookup key or a scan
1685 * bound) for a table access operation and calculates the set of possible
1686 * parents. (These are possible parent table access operations in the query
1687 * tree that will be pushed to the ndb.)
1688 *
1689 * @param[in] table The table access operation to which the key item belongs.
1690 * @param[in] key_item The key_item to examine
1691 * @param[in] key_part Metatdata about the key item.
1692 * @param[out] field_parents The set of possible parents for 'key_item'
1693 * ('join_root' if keys are constant).
1694 * @return True if at least one possible parent was found. (False means that
1695 * operation cannot be pushed).
1696 */
is_field_item_pushable(AQP::Table_access * table,const Item * key_item,const KEY_PART_INFO * key_part,ndb_table_access_map & field_parents)1697 bool ndb_pushed_builder_ctx::is_field_item_pushable(
1698 AQP::Table_access *table, const Item *key_item,
1699 const KEY_PART_INFO *key_part, ndb_table_access_map &field_parents) {
1700 DBUG_TRACE;
1701 const uint tab_no = table->get_access_no();
1702 DBUG_ASSERT(key_item->type() == Item::FIELD_ITEM);
1703
1704 const Item_field *const key_item_field =
1705 static_cast<const Item_field *>(key_item);
1706
1707 DBUG_PRINT(
1708 "info",
1709 ("keyPart:%d, field:%s.%s", (int)(key_item - table->get_key_field(0)),
1710 key_item_field->field->table->alias, key_item_field->field->field_name));
1711
1712 if (!key_item_field->field->eq_def(key_part->field)) {
1713 EXPLAIN_NO_PUSH(
1714 "Can't push table '%s' as child, "
1715 "column '%s' does not have same datatype as ref'ed "
1716 "column '%s.%s'",
1717 table->get_table()->alias, key_part->field->field_name,
1718 key_item_field->field->table->alias, key_item_field->field->field_name);
1719 table->set_table_properties(
1720 table->get_table_properties() &
1721 ~PUSHABLE_AS_CHILD); // Permanently disable as child
1722 return false;
1723 }
1724
1725 if (key_item_field->field->is_virtual_gcol()) {
1726 EXPLAIN_NO_PUSH("Can't push condition on virtual generated column '%s.%s'",
1727 key_item_field->field->table->alias,
1728 key_item_field->field->field_name);
1729 return false;
1730 }
1731
1732 /**
1733 * Below this point 'key_item_field' is a candidate for refering a parent
1734 * table in a pushed join. It should either directly refer a parent common to
1735 * all FIELD_ITEMs, or refer a grandparent of this common parent. There are
1736 * different cases which should be handled:
1737 *
1738 * 1) 'key_item_field' may already refer one of the parent available within
1739 * our pushed scope. 2) By using the equality set, we may find alternative
1740 * parent references which may make this a pushed join.
1741 */
1742
1743 ///////////////////////////////////////////////////////////////////
1744 // 0) Prepare for calculating parent candidates for this FIELD_ITEM
1745 //
1746 field_parents.clear_all();
1747
1748 ////////////////////////////////////////////////////////////////////
1749 // 1) Add our existing parent reference to the set of parent candidates
1750 //
1751 const uint referred_table_no = get_table_no(key_item_field);
1752 if (m_join_scope.contain(referred_table_no)) {
1753 field_parents.add(referred_table_no);
1754 }
1755
1756 //////////////////////////////////////////////////////////////////
1757 // 2) Use the equality set to possibly find more parent candidates
1758 // usable by substituting existing 'key_item_field'
1759 //
1760 Item_equal *item_equal = table->get_item_equal(key_item_field);
1761 if (item_equal) {
1762 AQP::Equal_set_iterator equal_iter(*item_equal);
1763 const Item_field *substitute_field;
1764 while ((substitute_field = equal_iter.next()) != NULL) {
1765 if (substitute_field != key_item_field) {
1766 const uint substitute_table_no = get_table_no(substitute_field);
1767 if (m_join_scope.contain(substitute_table_no)) {
1768 DBUG_PRINT("info",
1769 (" join_items[%d] %s.%s can be replaced with %s.%s",
1770 (int)(key_item - table->get_key_field(0)),
1771 get_referred_table_access_name(key_item_field),
1772 get_referred_field_name(key_item_field),
1773 get_referred_table_access_name(substitute_field),
1774 get_referred_field_name(substitute_field)));
1775
1776 field_parents.add(substitute_table_no);
1777 }
1778 }
1779 } // while(substitute_field != NULL)
1780 }
1781 if (!field_parents.is_clear_all()) {
1782 return true;
1783 }
1784
1785 if (m_const_scope.contain(referred_table_no)) {
1786 // This key item is const. and did not cause the set of possible parents
1787 // to be recalculated. Reuse what we had before this key item.
1788 DBUG_ASSERT(field_parents.is_clear_all());
1789
1790 /**
1791 * Field referrence is a 'paramValue' to a column value evaluated
1792 * prior to the root of this pushed join candidate. Some restrictions
1793 * applies to when a field reference is allowed in a pushed join:
1794 */
1795 if (ndbcluster_is_lookup_operation(m_join_root->get_access_type())) {
1796 /**
1797 * EQRefIterator may optimize away key reads if the key
1798 * for a requested row is the same as the previous.
1799 * Thus, iff this is the root of a pushed lookup join
1800 * we do not want it to contain childs with references
1801 * to columns 'outside' the the pushed joins, as these
1802 * may still change between calls to
1803 * EQRefIterator::Read() independent of the root key
1804 * itself being the same.
1805 */
1806 EXPLAIN_NO_PUSH(
1807 "Cannot push table '%s' as child of '%s', since "
1808 "it referes to column '%s.%s' prior to a "
1809 "potential 'const' root.",
1810 table->get_table()->alias, m_join_root->get_table()->alias,
1811 get_referred_table_access_name(key_item_field),
1812 get_referred_field_name(key_item_field));
1813 return false;
1814 } else {
1815 /**
1816 * Scan queries cannot be pushed if the pushed query may refer column
1817 * values (paramValues) from rows stored in a join cache.
1818 */
1819 const TABLE *const referred_tab = key_item_field->field->table;
1820 uint access_no = tab_no;
1821 do {
1822 if (m_plan.get_table_access(access_no)->uses_join_cache()) {
1823 EXPLAIN_NO_PUSH(
1824 "Cannot push table '%s' as child of '%s', since "
1825 "it referes to column '%s.%s' which will be stored "
1826 "in a join buffer.",
1827 table->get_table()->alias, m_join_root->get_table()->alias,
1828 get_referred_table_access_name(key_item_field),
1829 get_referred_field_name(key_item_field));
1830 return false;
1831 }
1832 DBUG_ASSERT(access_no > 0);
1833 access_no--;
1834 } while (m_plan.get_table_access(access_no)->get_table() != referred_tab);
1835
1836 } // if (!ndbcluster_is_lookup_operation(root_type)
1837 return true;
1838 } else {
1839 EXPLAIN_NO_PUSH(
1840 "Can't push table '%s' as child of '%s', "
1841 "column '%s.%s' is outside scope of pushable join",
1842 table->get_table()->alias, m_join_root->get_table()->alias,
1843 get_referred_table_access_name(key_item_field),
1844 get_referred_field_name(key_item_field));
1845 return false;
1846 }
1847 } // ndb_pushed_builder_ctx::is_field_item_pushable()
1848
is_const_item_pushable(const Item * key_item,const KEY_PART_INFO * key_part)1849 bool ndb_pushed_builder_ctx::is_const_item_pushable(
1850 const Item *key_item, const KEY_PART_INFO *key_part) {
1851 DBUG_TRACE;
1852 DBUG_ASSERT(key_item->const_item());
1853
1854 /**
1855 * Propagate Items constant value to Field containing the value of this
1856 * key_part:
1857 */
1858 Field *const field = key_part->field;
1859 const int error =
1860 const_cast<Item *>(key_item)->save_in_field_no_warnings(field, true);
1861 if (unlikely(error)) {
1862 DBUG_PRINT("info", ("Failed to store constant Item into Field -> not"
1863 " pushable"));
1864 return false;
1865 }
1866 if (field->is_real_null()) {
1867 DBUG_PRINT("info", ("NULL constValues in key -> not pushable"));
1868 return false; // TODO, handle graceful -> continue?
1869 }
1870 return true;
1871 } // ndb_pushed_builder_ctx::is_const_item_pushable()
1872
1873 /**
1874 * ::optimize_query_plan()
1875 *
1876 * Decide the final execution order for the pushed joins. That mainly
1877 * involves deciding which table to be used as the 'm_parent'.
1878 *
1879 * The m_parent is choosen based on the available m_key_parents[]
1880 * which were set up by ::is_pushable_as_child(), and possibly later
1881 * modified (reduced) by ::validate_join_nest().
1882 *
1883 * When multiple parent candidates are available, we choose the one
1884 * closest to the root, which will result in the most 'bushy' tree
1885 * structure and the highest possible parallelism. Note that SPJ block
1886 * will build its own execution plan (based on whats being set up here)
1887 * which possible sequentialize the execution of these parallel branches.
1888 * (See WL#11164)
1889 */
optimize_query_plan()1890 int ndb_pushed_builder_ctx::optimize_query_plan() {
1891 DBUG_TRACE;
1892 const uint root_no = m_join_root->get_access_no();
1893 const uint last_table = m_plan.get_access_count() - 1;
1894
1895 // Find an optimal m_parent to be used when joining the tables
1896 for (uint tab_no = last_table; tab_no > root_no; tab_no--) {
1897 if (!m_join_scope.contain(tab_no)) continue;
1898 pushed_tables &table = m_tables[tab_no];
1899
1900 /**
1901 * Calculate the set of possible parents for the table, where:
1902 * - 'common' are those we may refer (possibly through the EQ-sets)
1903 * such that all FIELD_ITEMs are from the same parent.
1904 * - 'extended' are those parents refered from some of the
1905 * FIELD_ITEMs, and having the rest of the referred FIELD_ITEM
1906 * tables available as 'grandparent refs'
1907 * (The SPJ block can handle field references to any ancestor
1908 * operation, not just the (direct) parent).
1909 *
1910 * In addition there are firm dependencies between some parents
1911 * such that all 'depend_parents' must be referred as an ancestors
1912 * of the table. By default 'depend_parents' will at least contain
1913 * the most 'grandparent' of the extended parents.
1914 */
1915 ndb_table_access_map *key_parents = table.m_key_parents;
1916 ndb_table_access_map common_parents(m_join_scope);
1917 ndb_table_access_map extend_parents;
1918 ndb_table_access_map depend_parents;
1919
1920 for (uint i = 0;
1921 i < m_plan.get_table_access(tab_no)->get_no_of_key_fields(); i++) {
1922 DBUG_ASSERT(m_join_scope.contain(key_parents[i]));
1923 if (!key_parents[i].is_clear_all()) { // Key refers a parent field
1924 /**
1925 * Calculate 'common_parents' as the set of possible 'field_parents'
1926 * available from all 'key_part'.
1927 */
1928 common_parents.intersect(key_parents[i]);
1929
1930 /**
1931 * 'Extended' parents are refered from some 'FIELD_ITEM', and contain
1932 * all parents directly referred, or available as 'depend_parents'.
1933 * The later excludes those before the first (grand-)parent
1934 * available from all 'field_parents' (first_grandparent).
1935 * However, it also introduce a dependency of those
1936 * tables to really be available as grand parents.
1937 */
1938 extend_parents.add(key_parents[i]);
1939
1940 const uint first = key_parents[i].first_table(root_no);
1941 depend_parents.add(first);
1942 }
1943 }
1944
1945 /**
1946 * Previous childs might already have enforced some ancestors to be
1947 * available through this table due to some ancestors being referred by
1948 * them, add these.
1949 */
1950 depend_parents.add(table.m_ancestors);
1951
1952 /**
1953 * Same goes for nest-level dependencies: The 'first' in each nest
1954 * may enforce ancestor dependencies on the members of the nest.
1955 * If this table is the 'first' itself, it is embedded within the
1956 * nest controlled by the 'first_upper'.
1957 */
1958 if (table.m_first_inner < tab_no)
1959 depend_parents.add(m_tables[table.m_first_inner].m_ancestors);
1960 else if (table.m_first_upper > 0)
1961 depend_parents.add(m_tables[table.m_first_upper].m_ancestors);
1962
1963 /**
1964 * All 'depend_parents' has to be fulfilled, starting from the 'last',
1965 * closest to this tab_no. The 'depend_parents' not directly referred
1966 * as a parent from this table, will be fulfilled by adding them as required
1967 * ancestors of the choosen parent, see below.
1968 * Find the first dependency to fulfill:
1969 */
1970 const uint depends_on_parent = depend_parents.last_table(tab_no - 1);
1971
1972 /**
1973 * We try to find a parent within our own nest among the common_
1974 * or extend_parents, but also takes the required depends_on_parent
1975 * into consideration. Establish the lowest parent candidate
1976 * we may accept.
1977 */
1978 const uint first_candidate =
1979 std::max(depends_on_parent, table.m_first_inner);
1980
1981 /**
1982 * Find a parent among common_parent (preferred) or extend_parent
1983 * if possible, else choose the first we depends_on.
1984 *
1985 * Choose parent to be the first possible among 'parents'.
1986 * Result in the most 'bushy' query plan, enabling most parallelism
1987 */
1988 uint parent_no = common_parents.first_table(first_candidate);
1989 if (parent_no >= tab_no) { // Not found
1990 parent_no = extend_parents.first_table(first_candidate);
1991 if (parent_no >= tab_no) { // Not found
1992 parent_no = depends_on_parent;
1993 }
1994 }
1995 DBUG_ASSERT(parent_no < tab_no);
1996 table.m_parent = parent_no;
1997
1998 /**
1999 * Any remaining ancestor dependencies for this table has to be
2000 * added to the selected parent in order to be taken into account
2001 * for parent calculation for its ancestors.
2002 */
2003 depend_parents.clear_bit(parent_no);
2004 m_tables[parent_no].m_ancestors.add(depend_parents);
2005
2006 /**
2007 * Similar for nest-level dependencies: Any dependencies to tables outside
2008 * of this inner nest are enforces as mandatory nest-ancestor dependencies.
2009 */
2010 depend_parents.subtract(table.m_inner_nest);
2011 m_tables[table.m_first_inner].m_ancestors.add(depend_parents);
2012 }
2013
2014 /* Collect the full set of ancestors available through the selected 'm_parent'
2015 */
2016 for (uint tab_no = root_no + 1; tab_no <= last_table; tab_no++) {
2017 if (m_join_scope.contain(tab_no)) {
2018 pushed_tables &table = m_tables[tab_no];
2019 const uint parent_no = table.m_parent;
2020 table.m_ancestors = m_tables[parent_no].m_ancestors;
2021 table.m_ancestors.add(parent_no);
2022 }
2023 }
2024 return 0;
2025 } // ndb_pushed_builder_ctx::optimize_query_plan
2026
collect_key_refs(const AQP::Table_access * table,const Item * key_refs[]) const2027 void ndb_pushed_builder_ctx::collect_key_refs(const AQP::Table_access *table,
2028 const Item *key_refs[]) const {
2029 DBUG_TRACE;
2030
2031 const uint tab_no = table->get_access_no();
2032 const uint parent_no = m_tables[tab_no].m_parent;
2033 const ndb_table_access_map ancestors = m_tables[tab_no].m_ancestors;
2034
2035 DBUG_ASSERT(m_join_scope.contain(ancestors));
2036 DBUG_ASSERT(ancestors.contain(parent_no));
2037
2038 /**
2039 * If there are any key_fields with 'current_parents' different from
2040 * our selected 'parent', we have to find substitutes for
2041 * those key_fields within the equality set.
2042 **/
2043 for (uint key_part_no = 0; key_part_no < table->get_no_of_key_fields();
2044 key_part_no++) {
2045 const Item *const key_item = table->get_key_field(key_part_no);
2046 key_refs[key_part_no] = key_item;
2047
2048 DBUG_ASSERT(key_item->const_item() || key_item->type() == Item::FIELD_ITEM);
2049
2050 if (key_item->type() == Item::FIELD_ITEM) {
2051 const Item_field *join_item = static_cast<const Item_field *>(key_item);
2052 uint referred_table_no = get_table_no(join_item);
2053 Item_equal *item_equal;
2054
2055 if (referred_table_no != parent_no &&
2056 (item_equal = table->get_item_equal(join_item)) != NULL) {
2057 AQP::Equal_set_iterator iter(*item_equal);
2058 const Item_field *substitute_field;
2059 while ((substitute_field = iter.next()) != NULL) {
2060 ///////////////////////////////////////////////////////////
2061 // Prefer to replace join_item with ref. to selected parent.
2062 //
2063 const uint substitute_table_no = get_table_no(substitute_field);
2064 if (substitute_table_no == parent_no) {
2065 DBUG_PRINT("info",
2066 (" Replacing key_refs[%d] %s.%s with %s.%s (parent)",
2067 key_part_no, get_referred_table_access_name(join_item),
2068 get_referred_field_name(join_item),
2069 get_referred_table_access_name(substitute_field),
2070 get_referred_field_name(substitute_field)));
2071
2072 referred_table_no = substitute_table_no;
2073 key_refs[key_part_no] = join_item = substitute_field;
2074 break;
2075 } else if (ancestors.contain(substitute_table_no)) {
2076 DBUG_ASSERT(substitute_table_no <= parent_no);
2077
2078 //////////////////////////////////////////////////////////////////////
2079 // Second best is to replace join_item with closest grandparent ref.
2080 // In this case we will continue to search for the common parent
2081 // match: Updates key_refs[] if:
2082 // 1): Replace incorrect refs of tables not being an 'ancestor'.
2083 // 2): Found a better substitute closer to selected parent
2084 //
2085 if (!ancestors.contain(referred_table_no) || // 1
2086 referred_table_no < substitute_table_no) // 2)
2087 {
2088 DBUG_PRINT(
2089 "info",
2090 (" Replacing key_refs[%d] %s.%s with %s.%s (grandparent)",
2091 key_part_no, get_referred_table_access_name(join_item),
2092 get_referred_field_name(join_item),
2093 get_referred_table_access_name(substitute_field),
2094 get_referred_field_name(substitute_field)));
2095
2096 referred_table_no = substitute_table_no;
2097 key_refs[key_part_no] = join_item = substitute_field;
2098 }
2099 }
2100 } // while (substitute...
2101
2102 DBUG_ASSERT(referred_table_no == parent_no ||
2103 ancestors.contain(referred_table_no) ||
2104 m_const_scope.contain(
2105 referred_table_no)); // Is a 'const' paramValue
2106 }
2107 } // Item::FIELD_ITEM
2108 }
2109
2110 key_refs[table->get_no_of_key_fields()] = NULL;
2111 } // ndb_pushed_builder_ctx::collect_key_refs()
2112
2113 /**
2114 * For the specified table; build the set of NdbQueryOperands defining
2115 * the (index-) key value for fetching rows from the table.
2116 *
2117 * Key values may consist of a mix of const-, param- and linkedValue(),
2118 * as collected by the utility method ::collect_key_refs().
2119 *
2120 * A linkedValue() should preferably refer a value from the 'm_parent'
2121 * of the table. If the referred field is not available from parent,
2122 * another ancestor may also be used. In the later case, SPJ will
2123 * need to store the referred ancestor value, such that it can be located
2124 * by the correlation-ids through the chain of ancestors.
2125 *
2126 * SPJ API will normally deduct the parent / ancestor topology based
2127 * on the table(s) being referred by the linkedValues(). In case of multiple
2128 * tables being referred, the API will check that the set of ancestors
2129 * depends on (are ancestors of-) each other, such that all referred tables
2130 * are available through the chain of ancestors.
2131 *
2132 * In rare cases we may introduce extra parent dependencies in order to
2133 * establish a common set of ancestors. To maintain the join semantics, this
2134 * is only supported when the added dependencies are to tables in same
2135 * inner join-nest. Restriction applying to the above is checked by
2136 * is_pushable_as_child(). However ::build_key() need to enforce the
2137 * added dependencies by calling NdbQueryOptions::setParent(). (below)
2138 */
build_key(const AQP::Table_access * table,const NdbQueryOperand * op_key[],NdbQueryOptions * key_options)2139 int ndb_pushed_builder_ctx::build_key(const AQP::Table_access *table,
2140 const NdbQueryOperand *op_key[],
2141 NdbQueryOptions *key_options) {
2142 DBUG_TRACE;
2143 DBUG_ASSERT(m_join_scope.contain(table->get_access_no()));
2144
2145 const KEY *const key = &table->get_table()->key_info[table->get_index_no()];
2146 op_key[0] = NULL;
2147
2148 if (table == m_join_root) {
2149 if (ndbcluster_is_lookup_operation(table->get_access_type())) {
2150 for (uint i = 0; i < key->user_defined_key_parts; i++) {
2151 op_key[i] = m_builder->paramValue();
2152 if (unlikely(op_key[i] == NULL)) {
2153 return -1;
2154 }
2155 }
2156 op_key[key->user_defined_key_parts] = NULL;
2157 }
2158 } else {
2159 const uint key_fields = table->get_no_of_key_fields();
2160 DBUG_ASSERT(key_fields > 0 && key_fields <= key->user_defined_key_parts);
2161 uint map[ndb_pushed_join::MAX_LINKED_KEYS + 1];
2162
2163 if (ndbcluster_is_lookup_operation(table->get_access_type())) {
2164 const ha_ndbcluster *handler =
2165 down_cast<ha_ndbcluster *>(table->get_table()->file);
2166 ndbcluster_build_key_map(
2167 handler->m_table, handler->m_index[table->get_index_no()], key, map);
2168 } else {
2169 for (uint ix = 0; ix < key_fields; ix++) {
2170 map[ix] = ix;
2171 }
2172 }
2173
2174 const Item *join_items[ndb_pushed_join::MAX_LINKED_KEYS + 1];
2175 collect_key_refs(table, join_items);
2176
2177 ndb_table_access_map referred_parents;
2178 const KEY_PART_INFO *key_part = key->key_part;
2179 for (uint i = 0; i < key_fields; i++, key_part++) {
2180 const Item *const item = join_items[i];
2181 op_key[map[i]] = NULL;
2182
2183 DBUG_ASSERT(item->const_item() == item->const_for_execution());
2184 if (item->const_item()) {
2185 /**
2186 * Propagate Items constant value to Field containing the value of this
2187 * key_part:
2188 */
2189 Field *const field = key_part->field;
2190 DBUG_ASSERT(!field->is_real_null());
2191 const uchar *const ptr =
2192 (field->real_type() == MYSQL_TYPE_VARCHAR)
2193 ? field->field_ptr() + field->get_length_bytes()
2194 : field->field_ptr();
2195
2196 op_key[map[i]] = m_builder->constValue(ptr, field->data_length());
2197 } else {
2198 DBUG_ASSERT(item->type() == Item::FIELD_ITEM);
2199 const Item_field *const field_item =
2200 static_cast<const Item_field *>(item);
2201 const uint referred_table_no = get_table_no(field_item);
2202 referred_parents.add(referred_table_no);
2203
2204 if (m_join_scope.contain(referred_table_no)) {
2205 // Locate the parent operation for this 'join_items[]'.
2206 // May refer any of the preceding parent tables
2207 const NdbQueryOperationDef *const parent_op =
2208 m_tables[referred_table_no].m_op;
2209 DBUG_ASSERT(parent_op != NULL);
2210
2211 // TODO use field_index ??
2212 op_key[map[i]] =
2213 m_builder->linkedValue(parent_op, field_item->field_name);
2214 } else {
2215 DBUG_ASSERT(m_const_scope.contain(referred_table_no));
2216 // Outside scope of join plan, Handle as parameter as its value
2217 // will be known when we are ready to execute this query.
2218 if (unlikely(m_fld_refs >= ndb_pushed_join::MAX_REFERRED_FIELDS)) {
2219 DBUG_PRINT("info", ("Too many Field refs ( >= MAX_REFERRED_FIELDS) "
2220 "encountered"));
2221 return -1; // TODO, handle gracefull -> continue?
2222 }
2223 m_referred_fields[m_fld_refs++] = field_item->field;
2224 op_key[map[i]] = m_builder->paramValue();
2225 }
2226 }
2227
2228 if (unlikely(op_key[map[i]] == NULL)) {
2229 return -1;
2230 }
2231 }
2232 op_key[key_fields] = NULL;
2233
2234 // Might have to explicit set the designated parent.
2235 const uint tab_no = table->get_access_no();
2236 const uint parent_no = m_tables[tab_no].m_parent;
2237 if (!referred_parents.contain(parent_no)) {
2238 // Add the parent as a new dependency
2239 DBUG_ASSERT(m_tables[parent_no].m_op != NULL);
2240 key_options->setParent(m_tables[parent_no].m_op);
2241 }
2242 }
2243 return 0;
2244 } // ndb_pushed_builder_ctx::build_key()
2245
2246 /**
2247 * Call SPJ API to build a NdbQuery
2248 */
build_query()2249 int ndb_pushed_builder_ctx::build_query() {
2250 DBUG_TRACE;
2251
2252 DBUG_PRINT("enter",
2253 ("Table %d as root is pushable", m_join_root->get_access_no()));
2254 DBUG_EXECUTE("info", m_join_root->dbug_print(););
2255
2256 const uint root_no = m_join_root->get_access_no();
2257 DBUG_ASSERT(m_join_scope.contain(root_no));
2258
2259 if (m_builder == NULL) {
2260 m_builder = NdbQueryBuilder::create();
2261 if (unlikely(m_builder == NULL)) {
2262 return HA_ERR_OUT_OF_MEM;
2263 }
2264 }
2265
2266 for (uint tab_no = root_no; tab_no < m_plan.get_access_count(); tab_no++) {
2267 if (!m_join_scope.contain(tab_no)) continue;
2268
2269 const AQP::Table_access *const table = m_plan.get_table_access(tab_no);
2270 const AQP::enum_access_type access_type = table->get_access_type();
2271 ha_ndbcluster *handler =
2272 down_cast<ha_ndbcluster *>(table->get_table()->file);
2273
2274 NdbQueryOptions options;
2275 const NdbQueryOperand *op_key[ndb_pushed_join::MAX_KEY_PART + 1];
2276 if (table->get_index_no() >= 0) {
2277 const int error = build_key(table, op_key, &options);
2278 if (unlikely(error)) return error;
2279 }
2280
2281 if (table != m_join_root) {
2282 DBUG_ASSERT(m_tables[tab_no].m_parent != MAX_TABLES);
2283 const uint parent_no = m_tables[tab_no].m_parent;
2284
2285 if (m_tables[tab_no].isInnerJoined(m_tables[parent_no])) {
2286 // 'tab_no' is inner joined with its parent
2287 options.setMatchType(NdbQueryOptions::MatchNonNull);
2288 }
2289
2290 if (table->is_sj_firstmatch()) {
2291 /**
2292 * Is a Firstmatch'ed semijoin_nest. In order to let SPJ API
2293 * do firstMatch elimination of duplicated rows, we need to ensure:
2294 * 1) The entire semijoined-nest has been pushed down.
2295 * 2) There are no unpushed conditions in the above sj-nest.
2296 *
2297 * ... else we might end up returning a firstMatched'ed row,
2298 * which later turns out to be a non-match due to eiter 1) or 2).
2299 */
2300 const int last_sj_inner = table->get_last_sj_inner();
2301 const ndb_table_access_map semijoin(m_tables[last_sj_inner].m_sj_nest);
2302 if (m_join_scope.contain(semijoin) &&
2303 !m_has_pending_cond.is_overlapping(semijoin)) {
2304 options.setMatchType(NdbQueryOptions::MatchFirst);
2305 }
2306 }
2307
2308 if (table->is_antijoin()) {
2309 DBUG_ASSERT(m_tables[tab_no].isOuterJoined(m_tables[parent_no]));
2310 const ndb_table_access_map antijoin_scope(
2311 get_tables_in_range(tab_no, m_tables[tab_no].m_last_inner));
2312
2313 /**
2314 * From SPJ point of view, antijoin is a normal outer join. So once
2315 * we have accounted for the special antijoin_null_cond added to such
2316 * queries, no special handling is required for antijoin's wrt.
2317 * query correctness.
2318 *
2319 * However, as an added optimization, the SPJ API may eliminate the
2320 * upper-table rows not matching the 'Not exists' requirement, if:
2321 * 1) The entire (anti-)outer-joined-nest has been pushed down
2322 * 2) There are no unpushed conditions in the above join-nest.
2323 * -> or: 'antijoin-nest is completely evaluated by SPJ'
2324 *
2325 * Note that this is a pure optimization: Any returned rows supposed
2326 * to 'Not exist' will simply be eliminated by the mysql server.
2327 * -> We do join-pushdown of such antijoins even if the check below
2328 * does not allow us to setMatchType('MatchNullOnly')
2329 */
2330 if (m_join_scope.contain(antijoin_scope) &&
2331 !m_has_pending_cond.is_overlapping(antijoin_scope)) {
2332 const uint first_upper = m_tables[tab_no].m_first_upper;
2333 ndb_table_access_map upper_nest(full_inner_nest(first_upper, tab_no));
2334 upper_nest.intersect(m_join_scope);
2335
2336 if (upper_nest.contain(parent_no)) {
2337 /**
2338 * Antijoin is relative to the *upper_nest*. Thus we can only
2339 * eliminate found matches if they are relative the upper_nest.
2340 * Example: '(t1 oj (t2)) where not exists (t3 where t3.x = t1.y)'
2341 *
2342 * This nest structure is such that the upper of 'antijoin t3' is
2343 * t1. Thus we can only do match elimination of such a query when it
2344 * is built with 't3.parent == t1'.
2345 */
2346 options.setMatchType(NdbQueryOptions::MatchNullOnly);
2347 } else {
2348 /**
2349 * Else, subquery condition do not refer upper_nest.
2350 * Example: '(t1 oj (t2)) where not exists (t3 where t3.x = t2.y)'
2351 * Due to the nest structure, we still have t3.upper = t1.
2352 * However, the where condition dependencies will result in:
2353 * '3.parent == t2'. Specifying antijoin for this query may
2354 * eliminate matching rows from t2, while t1 rows will still
2355 * exists (with t2 NULL-extended).
2356 * However, we can still specify the less restrictive firstMatch
2357 * for such queries.
2358 */
2359 options.setMatchType(NdbQueryOptions::MatchFirst);
2360 }
2361 }
2362 }
2363
2364 /**
2365 * Inform SPJ API about the join nest dependencies. Needed in those
2366 * cases where the are no linkedValues determining which inner_
2367 * and upper_nest a table is a member of. SPJ API need this info
2368 * in order to correctly generate NULL extended outer join results.
2369 *
2370 * Example: t1 outer join (t2 inner join t3), where t3s join condition
2371 * does not refer t2. Thus, t3 will likely become an outer joined
2372 * child of t1 in the QueryTree. From the parent-child POW, t2,t3
2373 * will look like two separate outer joined tables, like:
2374 * 't1, outer join (t2), outer join (t3)'.
2375 *
2376 * Such queries need to set the join nest dependencies, such that
2377 * the NdbQuery interface is able to correcly generate NULL extended
2378 * rows.
2379 *
2380 * Below we add these nest dependencies even when not strictly required.
2381 * The API will just ignore such redundant nest dependencies.
2382 */
2383 if (m_tables[tab_no].isOuterJoined(m_tables[parent_no])) {
2384 ndb_table_access_map inner_nest(m_tables[tab_no].m_inner_nest);
2385 inner_nest.intersect(m_join_scope);
2386 if (!inner_nest.is_clear_all()) {
2387 // Table not first in its join_nest, set firstInner which it
2388 // depends on
2389 const uint real_first_inner =
2390 inner_nest.first_table(m_tables[tab_no].m_first_inner);
2391 options.setFirstInnerJoin(m_tables[real_first_inner].m_op);
2392
2393 } else if (m_tables[tab_no].m_first_upper >= 0) {
2394 const uint first_upper = m_tables[tab_no].m_first_upper;
2395 ndb_table_access_map upper_nest(full_inner_nest(first_upper, tab_no));
2396 upper_nest.intersect(m_join_scope);
2397 if (!upper_nest.is_clear_all()) {
2398 // There is an upper nest which we outer join with
2399 const uint real_first_upper =
2400 upper_nest.first_table(m_tables[tab_no].m_first_upper);
2401 options.setUpperJoin(m_tables[real_first_upper].m_op);
2402 }
2403 }
2404 }
2405 } // if '!m_join_root'
2406
2407 const NdbQueryOperationDef *query_op = NULL;
2408 if (ndbcluster_is_lookup_operation(access_type)) {
2409 // Primary key access assumed
2410 if (access_type == AQP::AT_PRIMARY_KEY ||
2411 access_type == AQP::AT_MULTI_PRIMARY_KEY) {
2412 DBUG_PRINT("info", ("Operation is 'primary-key-lookup'"));
2413 query_op = m_builder->readTuple(handler->m_table, op_key, &options);
2414 } else {
2415 DBUG_ASSERT(access_type == AQP::AT_UNIQUE_KEY);
2416 DBUG_PRINT("info", ("Operation is 'unique-index-lookup'"));
2417 const NdbDictionary::Index *const index =
2418 handler->m_index[table->get_index_no()].unique_index;
2419 DBUG_ASSERT(index);
2420 query_op =
2421 m_builder->readTuple(index, handler->m_table, op_key, &options);
2422 }
2423 } // ndbcluster_is_lookup_operation()
2424
2425 /**
2426 * AT_MULTI_MIXED may have 'ranges' which are pure single key lookups also.
2427 * In our current implementation these are converted into range access in
2428 * the pushed MRR implementation. However, the future plan is to build both
2429 * RANGE and KEY pushable joins for these.
2430 */
2431 else if (access_type == AQP::AT_ORDERED_INDEX_SCAN ||
2432 access_type == AQP::AT_MULTI_MIXED) {
2433 DBUG_ASSERT(table->get_index_no() >= 0);
2434 DBUG_ASSERT(handler->m_index[table->get_index_no()].index != NULL);
2435
2436 DBUG_PRINT("info", ("Operation is 'equal-range-lookup'"));
2437 DBUG_PRINT(
2438 "info",
2439 ("Creating scanIndex on index id:%d, name:%s", table->get_index_no(),
2440 handler->m_index[table->get_index_no()].index->getName()));
2441
2442 const NdbQueryIndexBound bounds(op_key);
2443 query_op =
2444 m_builder->scanIndex(handler->m_index[table->get_index_no()].index,
2445 handler->m_table, &bounds, &options);
2446 } else if (access_type == AQP::AT_TABLE_SCAN) {
2447 DBUG_PRINT("info", ("Operation is 'table scan'"));
2448 query_op = m_builder->scanTable(handler->m_table, &options);
2449 } else {
2450 DBUG_ASSERT(false);
2451 }
2452
2453 if (unlikely(!query_op)) return -1;
2454
2455 m_tables[tab_no].m_op = query_op;
2456 } // for (join_cnt= m_join_root->get_access_no();
2457 // join_cnt<plan.get_access_count(); join_cnt++)
2458
2459 return 0;
2460 } // ndb_pushed_builder_ctx::build_query()
2461
2462 /**
2463 * Fill in ix_map[] to map from KEY_PART_INFO[] order into
2464 * primary key / unique key order of key fields.
2465 */
ndbcluster_build_key_map(const NDBTAB * table,const NDB_INDEX_DATA & index,const KEY * key_def,uint ix_map[])2466 void ndbcluster_build_key_map(const NDBTAB *table, const NDB_INDEX_DATA &index,
2467 const KEY *key_def, uint ix_map[]) {
2468 uint ix;
2469
2470 if (index.unique_index_attrid_map) // UNIQUE_ORDERED_INDEX or UNIQUE_INDEX
2471 {
2472 for (ix = 0; ix < key_def->user_defined_key_parts; ix++) {
2473 ix_map[ix] = index.unique_index_attrid_map[ix];
2474 }
2475 } else // Primary key does not have a 'unique_index_attrid_map'
2476 {
2477 KEY_PART_INFO *key_part;
2478 uint key_pos = 0;
2479 int columnnr = 0;
2480 assert(index.type == PRIMARY_KEY_ORDERED_INDEX ||
2481 index.type == PRIMARY_KEY_INDEX);
2482
2483 for (ix = 0, key_part = key_def->key_part;
2484 ix < key_def->user_defined_key_parts; ix++, key_part++) {
2485 // As NdbColumnImpl::m_keyInfoPos isn't available through
2486 // NDB API we have to calculate it ourself, else we could:
2487 // ix_map[ix]= table->getColumn(key_part->fieldnr-1)->m_impl.m_keyInfoPos;
2488
2489 if (key_part->fieldnr < columnnr) {
2490 // PK columns are not in same order as the columns are defined in the
2491 // table, Restart PK search from first column:
2492 key_pos = 0;
2493 columnnr = 0;
2494 }
2495
2496 while (columnnr < key_part->fieldnr - 1) {
2497 if (table->getColumn(columnnr++)->getPrimaryKey()) key_pos++;
2498 }
2499
2500 assert(table->getColumn(columnnr)->getPrimaryKey());
2501 ix_map[ix] = key_pos;
2502
2503 columnnr++;
2504 key_pos++;
2505 }
2506 }
2507 } // ndbcluster_build_key_map
2508