1 /*
2    Copyright (c) 2017, 2020, MariaDB
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; version 2 of the License.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software
15    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
16 
17 /*
18   This file contains functions to support the splitting technique.
19   This optimization technique can be applied to equi-joins involving
20   materialized tables such as materialized views, materialized derived tables
21   and materialized CTEs. The technique also could be applied to materialized
22   semi-joins though the code below does not support this usage yet.
23 
24   Here are the main ideas behind this technique that we'll call SM optimization
25   (SplitMaterialization).
26 
27   Consider the query
28     SELECT t1.a, t.min
29       FROM t1, (SELECT t2.a, MIN(t2.b) as min FROM t2 GROUP BY t2.a) t
30     WHERE t1.a = t.a and t1.b < const
31 
32   Re-write the query into
33     SELECT t1.a, t.min
34       FROM t1, LATERAL (SELECT t2.a, MIN(t2.b) as min
35                         FROM t2 WHERE t2.a = t1.a GROUP BY t2.a) t
36     WHERE t1.b < const
37 
38   The execution of the original query (Q1) does the following:
39     1. Executes the query in the specification of the derived table
40        and puts the result set into a temporary table with an index
41        on the first column.
42     2. Joins t1 with the temporary table using the its index.
43 
44   The execution of the transformed query (Q1R) follows these steps:
45     1. For each row of t1 where t1.b < const a temporary table
46        containing all rows of of t2 with t2.a = t1.a is created
47     2. If there are any rows in the temporary table aggregation
48        is performed for them
49     3. The result of the aggregation is joined with t1.
50 
51   The second execution can win if:
52     a) There is an efficient way to select rows of t2 for which t2.a = t1.a
53        (For example if there is an index on t2.a)
54     and
55     b) The number of temporary tables created for partitions
56        is much smaller that the total number of partitions
57 
58   It should be noted that for the transformed query aggregation
59   for a partition may be performed several times.
60 
61   As we can see the optimization basically splits table t2 into
62   partitions and performs aggregation over each of them
63   independently.
64 
65   If we have only one equi-join condition then we either push it as
66   for Q1R or we don't. In a general case we may have much more options.
67   Consider the query (Q3)
68     SELECT
69       FROM t1,t2 (SELECT t3.a, t3.b, MIN(t3.c) as min
70                   FROM t3 GROUP BY a,b) t
71     WHERE t.a = t1.a AND t.b = t2.b
72           AND t1.c < c1 and t2.c < c2
73           AND P(t1,t2);
74   (P(t1,t2) designates  some additional conditions over columns of t1,t2).
75 
76   Assuming that there indexes on t3(a,b) and t3(b) here we have several
77   reasonable options to push equi-join conditions into the derived.
78   All these options should be taken into account when the optimizer
79   evaluates different join orders. When the join order (t1,t,t2) is
80   evaluated there is only one way of splitting : to push the condition
81   t.a = t1.a into t. With the join order (t2,t,t1) only the condition
82   t.b = t2.b can be pushed. When the join orders (t1,t2,t) and (t2,t1,t)
83   are evaluated then the optimizer should consider pushing t.a = t1.a,
84   t.b = t2.b and (t.a = t1.a AND t.b = t2.b) to choose the best condition
85   for splitting. Apparently here last condition is the best one because
86   it provides the miximum possible number of partitions.
87 
88   If we dropped the index on t3(a,b) and created the index on t3(a) instead
89   then we would have two options for splitting: to push t.a = t1.a or to
90   push t.b = t2.b. If the selectivity of the index t3(a) is better than
91   the selectivity of t3(b) then the first option is preferred.
92 
93   Although the condition (t.a = t1.a AND t.b = t2.b) provides a better
94   splitting than the condition t.a = t1.a the latter will be used for
95   splitting if the execution plan with the join order (t1,t,t2) turns out
96   to be the cheapest one. It's quite possible when the join condition
97   P(t1,t2) has a bad selectivity.
98 
99   Whenever the optimizer evaluates the cost of using a splitting it
100   compares it with the cost of materialization without splitting.
101 
102   If we just drop the index on t3(a,b) the chances that the splitting
103   will be used becomes much lower but they still exists providing that
104   the fanout of the partial join of t1 and t2 is small enough.
105 */
106 
107 /*
108   Splitting can be applied to a materialized table specified by the query
109   with post-join operations that require partitioning of the result set produced
110   by the join expression used in the FROM clause the query such as GROUP BY
111   operation and window function operation. In any of these cases the post-join
112   operation can be executed independently for any partition only over the rows
113   of this partition. Also if the set of all partitions is divided into disjoint
114   subsets the operation can applied to each subset independently. In this case
115   all rows are first partitioned into the groups each of which contains all the
116   rows from the partitions belonging the same subset and then each group
117   is subpartitioned into groups in the the post join operation.
118 
119   The set of all rows belonging to the union of several partitions is called
120   here superpartition. If a grouping operation is defined by the list
121   e_1,...,e_n then any set S = {e_i1,...,e_ik} can be used to devide all rows
122   into superpartions such that for any two rows r1, r2  the following holds:
123   e_ij(r1) = e_ij(r2) for each e_ij from S. We use the splitting technique
124   only if S consists of references to colums  of the joined tables.
125   For example if the GROUP BY list looks like this a, g(b), c we can consider
126   applying the splitting technique to the superpartitions defined by {a,c},
127   {a}, {c} (a and c here may be the references to the columns from different
128   tables).
129 */
130 
131  /*
132    The following describes when and how the optimizer decides whether it
133    makes sense to employ the splitting technique.
134 
135    1. For each instance of a materialized table (derived/view/CTE) it is
136       checked that it is potentially splittable. Now it is done right after the
137       execution plan for the select specifying this table has been chosen.
138 
139    2. Any potentially splittable materialized table T is subject to two-phase
140       optimization. It means that the optimizer first builds the best execution
141       plan for join that specifies T. Then the control is passed back to the
142       optimization process of the embedding select Q. After the execution plan
143       for Q has been chosen the optimizer finishes the optimization of the join
144       specifying T.
145 
146    3. When the optimizer builds the container with the KEYUSE structures
147       for the join of embedding select it detects the equi-join conditions
148       PC that potentially could be pushed into a potentially splittable
149       materialized table T. The collected information about such conditions
150       is stored together with other facts on potential splittings for table T.
151 
152    4. When the optimizer starts looking for the best execution plan for the
153       embedding select Q for each potentially splittable materialized table T
154       it creates special KEYUSE structures for pushable equi-join conditions
155       PC. These structures are used to add new elements to the container
156       of KEYUSE structures built for T. The specifics of these elements is
157       that they can be ebabled and disabled during the process of choosing
158       the best plan for Q.
159 
160    5. When the optimizer extends a partial join order with a potentially
161       splittable materialized table T (in function best_access_path) it
162       first evaluates a new execution plan for the modified specification
163       of T that adds all equi-join conditions that can be pushed with
164       current join prefix to the WHERE conditions of the original
165       specification of T. If the cost of the new plan is better than the
166       the cost of the original materialized table then the optimizer
167       prefers to use splitting for the current join prefix. As the cost
168       of the plan depends only on the pushed conditions it makes sense
169       to cache this plan for other prefixes.
170 
171    6. The optimizer takes into account the cost of splitting / materialization
172       of a potentially splittable materialized table T as a startup cost
173       to access table T.
174 
175    7. When the optimizer finally chooses the best execution plan for
176       the embedding select Q and this plan prefers using splitting
177       for table T with pushed equi-join conditions PC then the execution
178       plan for the underlying join with these conditions is chosen for T.
179 */
180 
181 /*
182   The implementation of the splitting technique below allows to apply
183   the technique only to a materialized derived table / view / CTE whose
184   specification is either a select with GROUP BY or a non-grouping select
185   with window functions that share the same PARTITION BY list.
186 */
187 
188 #include "mariadb.h"
189 #include "sql_select.h"
190 
191 /* Info on a splitting field */
192 struct SplM_field_info
193 {
194   /* Splitting field in the materialized table T */
195   Field *mat_field;
196   /* The item from the select list of the specification of T */
197   Item *producing_item;
198   /* The corresponding splitting field from the specification of T */
199   Field *underlying_field;
200 };
201 
202 
203 /* Info on the splitting execution plan saved in SplM_opt_info::cache */
204 struct SplM_plan_info
205 {
206   /* The cached splitting execution plan P */
207   POSITION *best_positions;
208   /* The cost of the above plan */
209   double cost;
210   /* Selectivity of splitting used in P */
211   double split_sel;
212   /* For fast search of KEYUSE_EXT elements used for splitting in P */
213   struct KEYUSE_EXT *keyuse_ext_start;
214   /* The tables that contains the fields used for splitting in P */
215   TABLE *table;
216   /* The number of the key from 'table' used for splitting in P */
217   uint key;
218   /* Number of the components of 'key' used for splitting in P */
219   uint parts;
220 };
221 
222 
223 /*
224   The structure contains the information that is used by the optimizer
225   for potentially splittable materialization of T  that is a materialized
226   derived_table / view / CTE
227 */
228 class SplM_opt_info : public Sql_alloc
229 {
230 public:
231   /* The join for the select specifying T */
232   JOIN *join;
233   /* The map of tables from 'join' whose columns can be used for partitioning */
234   table_map tables_usable_for_splitting;
235   /* Info about the fields of the joined tables usable for splitting */
236   SplM_field_info *spl_fields;
237   /* The number of elements in the above list */
238   uint spl_field_cnt;
239   /* The list of equalities injected into WHERE for split optimization */
240   List<Item> inj_cond_list;
241   /* Contains the structures to generate all KEYUSEs for pushable equalities */
242   List<KEY_FIELD> added_key_fields;
243   /* The cache of evaluated execution plans for 'join' with pushed equalities */
244   List<SplM_plan_info> plan_cache;
245   /* Cost of best execution plan for join when nothing is pushed */
246   double unsplit_cost;
247   /* Cardinality of T when nothing is pushed */
248   double unsplit_card;
249   /* Lastly evaluated execution plan for 'join' with pushed equalities */
250   SplM_plan_info *last_plan;
251 
252   SplM_plan_info *find_plan(TABLE *table, uint key, uint parts);
253 };
254 
255 
set_spl_opt_info(SplM_opt_info * spl_info)256 void TABLE::set_spl_opt_info(SplM_opt_info *spl_info)
257 {
258   if (spl_info)
259     spl_info->join->spl_opt_info= spl_info;
260   spl_opt_info= spl_info;
261 }
262 
263 
deny_splitting()264 void TABLE::deny_splitting()
265 {
266   DBUG_ASSERT(spl_opt_info != NULL);
267   spl_opt_info->join->spl_opt_info= NULL;
268   spl_opt_info= NULL;
269 }
270 
271 
get_materialization_cost()272 double TABLE::get_materialization_cost()
273 {
274   DBUG_ASSERT(spl_opt_info != NULL);
275   return spl_opt_info->unsplit_cost;
276 }
277 
278 
279 /* This structure is auxiliary and used only in the function that follows it */
280 struct SplM_field_ext_info: public SplM_field_info
281 {
282   uint item_no;
283   bool is_usable_for_ref_access;
284 };
285 
286 
287 /**
288   @brief
289     Check whether this join is one for potentially splittable materialized table
290 
291   @details
292     The function checks whether this join is for select that specifies
293     a potentially splittable materialized table T. If so, the collected
294     info on potential splittability of T is attached to the field spl_opt_info
295     of the TABLE structure for T.
296 
297     The function returns a positive answer if the following holds:
298     1. the optimizer switch 'split_materialized' is set 'on'
299     2. the select owning this join specifies a materialized derived/view/cte T
300     3. this is the only select in the specification of T
301     4. condition pushdown is not prohibited into T
302     5. T is not recursive
303     6. not all of this join are constant or optimized away
304     7. T is either
305        7.1. a grouping table with GROUP BY list P
306        or
307        7.2. a non-grouping table with window functions over the same non-empty
308             partition specified by the PARTITION BY list P
309     8. P contains some references on the columns of the joined tables C
310        occurred also in the select list of this join
311     9. There are defined some keys usable for ref access of fields from C
312        with available statistics.
313     10. The select doesn't use WITH ROLLUP (This limitation can probably be
314        lifted)
315 
316   @retval
317     true   if the answer is positive
318     false  otherwise
319 */
320 
check_for_splittable_materialized()321 bool JOIN::check_for_splittable_materialized()
322 {
323   ORDER *partition_list= 0;
324   st_select_lex_unit *unit= select_lex->master_unit();
325   TABLE_LIST *derived= unit->derived;
326   if (!(optimizer_flag(thd, OPTIMIZER_SWITCH_SPLIT_MATERIALIZED)) ||  // !(1)
327       !(derived && derived->is_materialized_derived()) ||             // !(2)
328       (unit->first_select()->next_select()) ||                        // !(3)
329       (derived->prohibit_cond_pushdown) ||                            // !(4)
330       (derived->is_recursive_with_table()) ||                         // !(5)
331       (table_count == 0 || const_tables == top_join_tab_count) ||     // !(6)
332       rollup.state != ROLLUP::STATE_NONE)                             // (10)
333     return false;
334   if (group_list)                                                     // (7.1)
335   {
336     if (!select_lex->have_window_funcs())
337       partition_list= group_list;
338   }
339   else if (select_lex->have_window_funcs() &&
340            select_lex->window_specs.elements == 1)                    // (7.2)
341   {
342     partition_list=
343       select_lex->window_specs.head()->partition_list->first;
344   }
345   if (!partition_list)
346     return false;
347 
348   ORDER *ord;
349   Dynamic_array<SplM_field_ext_info> candidates;
350 
351   /*
352     Select from partition_list all candidates for splitting.
353     A candidate must be
354     - field item or refer to such (8.1)
355     - item mentioned in the select list (8.2)
356     Put info about such candidates into the array candidates
357   */
358   table_map usable_tables= 0;  // tables that contains the candidate
359   for (ord= partition_list; ord; ord= ord->next)
360   {
361     Item *ord_item= *ord->item;
362     if (ord_item->real_item()->type() != Item::FIELD_ITEM)   // !(8.1)
363       continue;
364 
365     Field *ord_field= ((Item_field *) (ord_item->real_item()))->field;
366 
367     /* Ignore fields from  of inner tables of outer joins */
368     TABLE_LIST *tbl= ord_field->table->pos_in_table_list;
369     if (tbl->is_inner_table_of_outer_join())
370       continue;
371 
372     List_iterator<Item> li(fields_list);
373     Item *item;
374     uint item_no= 0;
375     while ((item= li++))
376     {
377       if ((*ord->item)->eq(item, 0))       // (8.2)
378       {
379 	SplM_field_ext_info new_elem;
380         new_elem.producing_item= item;
381         new_elem.item_no= item_no;
382         new_elem.mat_field= derived->table->field[item_no];
383         new_elem.underlying_field= ord_field;
384         new_elem.is_usable_for_ref_access= false;
385         candidates.push(new_elem);
386         usable_tables|= ord_field->table->map;
387         break;
388       }
389       item_no++;
390     }
391   }
392   if (candidates.elements() == 0)  // no candidates satisfying (8.1) && (8.2)
393     return false;
394 
395   /*
396     For each table from this join find the keys that can be used for ref access
397     of the fields mentioned in the 'array candidates'
398   */
399 
400   SplM_field_ext_info *cand;
401   SplM_field_ext_info *cand_start= &candidates.at(0);
402   SplM_field_ext_info *cand_end= cand_start + candidates.elements();
403 
404   for (JOIN_TAB *tab= join_tab;
405        tab < join_tab + top_join_tab_count; tab++)
406   {
407     TABLE *table= tab->table;
408     if (!(table->map & usable_tables))
409       continue;
410 
411     table->keys_usable_for_splitting.clear_all();
412     uint i;
413     for (i= 0; i < table->s->keys; i++)
414     {
415       if (!table->keys_in_use_for_query.is_set(i))
416         continue;
417       KEY *key_info= table->key_info + i;
418       uint key_parts= table->actual_n_key_parts(key_info);
419       uint usable_kp_cnt= 0;
420       for ( ; usable_kp_cnt < key_parts; usable_kp_cnt++)
421       {
422         if (key_info->actual_rec_per_key(usable_kp_cnt) == 0)
423           break;
424         int fldnr= key_info->key_part[usable_kp_cnt].fieldnr;
425 
426         for (cand= cand_start; cand < cand_end; cand++)
427         {
428           if (cand->underlying_field->table == table &&
429               cand->underlying_field->field_index + 1 == fldnr)
430 	  {
431             cand->is_usable_for_ref_access= true;
432             break;
433           }
434         }
435         if (cand == cand_end)
436           break;
437       }
438       if (usable_kp_cnt)
439         table->keys_usable_for_splitting.set_bit(i);
440     }
441   }
442 
443   /* Count the candidate fields that can be accessed by ref */
444   uint spl_field_cnt= (uint)candidates.elements();
445   for (cand= cand_start; cand < cand_end; cand++)
446   {
447     if (!cand->is_usable_for_ref_access)
448       spl_field_cnt--;
449   }
450 
451   if (!spl_field_cnt)  // No candidate field can be accessed by ref => !(9)
452     return false;
453 
454   /*
455     Create a structure of the type SplM_opt_info and fill it with
456     the collected info on potential splittability of T
457   */
458   SplM_opt_info *spl_opt_info= new (thd->mem_root) SplM_opt_info();
459   SplM_field_info *spl_field=
460     (SplM_field_info *) (thd->calloc(sizeof(SplM_field_info) *
461                                             spl_field_cnt));
462 
463   if (!(spl_opt_info && spl_field)) // consider T as not good for splitting
464     return false;
465 
466   spl_opt_info->join= this;
467   spl_opt_info->tables_usable_for_splitting= 0;
468   spl_opt_info->spl_field_cnt= spl_field_cnt;
469   spl_opt_info->spl_fields= spl_field;
470   for (cand= cand_start; cand < cand_end; cand++)
471   {
472     if (!cand->is_usable_for_ref_access)
473       continue;
474     spl_field->producing_item= cand->producing_item;
475     spl_field->underlying_field= cand->underlying_field;
476     spl_field->mat_field= cand->mat_field;
477     spl_opt_info->tables_usable_for_splitting|=
478     cand->underlying_field->table->map;
479     spl_field++;
480   }
481 
482   /* Attach this info to the table T */
483   derived->table->set_spl_opt_info(spl_opt_info);
484 
485   /*
486     If this is specification of a materialized derived table T that is
487     potentially splittable and is used in the from list of the right operand
488     of an IN predicand transformed to a semi-join then the embedding semi-join
489     nest is not allowed to be materialized.
490   */
491   if (derived && derived->is_materialized_derived() &&
492       derived->embedding && derived->embedding->sj_subq_pred)
493     derived->embedding->sj_subq_pred->types_allow_materialization= FALSE;
494   return true;
495 }
496 
497 
498 /**
499   @brief
500     Collect info on KEY_FIELD usable for splitting
501 
502   @param
503     key_field     KEY_FIELD to collect info on
504 
505   @details
506     The function assumes that this table is potentially splittable.
507     The function checks whether the KEY_FIELD structure key_field built for
508     this  table was created for a splitting field f. If so, the function does
509     the following using info from key_field:
510     1. Builds an equality of the form f = key_field->val that could be
511        pushed into this table.
512     2. Creates a new KEY_FIELD structure for this equality and stores
513        a reference to this structure in this->spl_opt_info.
514 */
515 
add_splitting_info_for_key_field(KEY_FIELD * key_field)516 void TABLE::add_splitting_info_for_key_field(KEY_FIELD *key_field)
517 {
518   DBUG_ASSERT(spl_opt_info != NULL);
519   JOIN *join= spl_opt_info->join;
520   Field *field= key_field->field;
521   SplM_field_info *spl_field= spl_opt_info->spl_fields;
522   uint i= spl_opt_info->spl_field_cnt;
523   for ( ; i; i--, spl_field++)
524   {
525     if (spl_field->mat_field == field)
526       break;
527   }
528   if (!i)         // field is not usable for splitting
529     return;
530 
531   /*
532     Any equality condition that can be potentially pushed into the
533     materialized derived table is constructed now though later it may turn out
534     that it is not needed, because it is not used for splitting.
535     The reason for this is that the failure to construct it when it has to be
536     injected causes denial for further processing of the query.
537     Formally this equality is needed in the KEY_FIELD structure constructed
538     here that will be used to generate additional keyuses usable for splitting.
539     However key_field.cond could be used for this purpose (see implementations
540     of virtual function can_optimize_keypart_ref()).
541 
542     The condition is built in such a form that it can be added to the WHERE
543     condition of the select that specifies this table.
544   */
545   THD *thd= in_use;
546   Item *left_item= spl_field->producing_item->build_clone(thd);
547   Item *right_item= key_field->val->build_clone(thd);
548   Item_func_eq *eq_item= 0;
549   if (left_item && right_item)
550   {
551     right_item->walk(&Item::set_fields_as_dependent_processor,
552                      false, join->select_lex);
553     right_item->update_used_tables();
554     eq_item= new (thd->mem_root) Item_func_eq(thd, left_item, right_item);
555   }
556   if (!eq_item)
557     return;
558   KEY_FIELD *added_key_field=
559     (KEY_FIELD *) thd->alloc(sizeof(KEY_FIELD));
560   if (!added_key_field ||
561       spl_opt_info->added_key_fields.push_back(added_key_field,thd->mem_root))
562     return;
563   added_key_field->field= spl_field->underlying_field;
564   added_key_field->cond= eq_item;
565   added_key_field->val= key_field->val;
566   added_key_field->level= 0;
567   added_key_field->optimize= KEY_OPTIMIZE_EQ;
568   added_key_field->eq_func= true;
569 
570   Item *real= key_field->val->real_item();
571   if ((real->type() == Item::FIELD_ITEM) &&
572         ((Item_field*)real)->field->maybe_null())
573     added_key_field->null_rejecting= true;
574   else
575     added_key_field->null_rejecting= false;
576 
577   added_key_field->cond_guard= NULL;
578   added_key_field->sj_pred_no= UINT_MAX;
579   return;
580 }
581 
582 
583 static bool
add_ext_keyuse_for_splitting(Dynamic_array<KEYUSE_EXT> * ext_keyuses,KEY_FIELD * added_key_field,uint key,uint part)584 add_ext_keyuse_for_splitting(Dynamic_array<KEYUSE_EXT> *ext_keyuses,
585                              KEY_FIELD *added_key_field, uint key, uint part)
586 {
587   KEYUSE_EXT keyuse_ext;
588   Field *field= added_key_field->field;
589 
590   JOIN_TAB *tab=field->table->reginfo.join_tab;
591   key_map possible_keys=field->get_possible_keys();
592   possible_keys.intersect(field->table->keys_usable_for_splitting);
593   tab->keys.merge(possible_keys);
594 
595   Item_func_eq *eq_item= (Item_func_eq *) (added_key_field->cond);
596   keyuse_ext.table= field->table;
597   keyuse_ext.val= eq_item->arguments()[1];
598   keyuse_ext.key= key;
599   keyuse_ext.keypart=part;
600   keyuse_ext.keypart_map= (key_part_map) 1 << part;
601   keyuse_ext.used_tables= keyuse_ext.val->used_tables();
602   keyuse_ext.optimize= added_key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL;
603   keyuse_ext.ref_table_rows= 0;
604   keyuse_ext.null_rejecting= added_key_field->null_rejecting;
605   keyuse_ext.cond_guard= added_key_field->cond_guard;
606   keyuse_ext.sj_pred_no= added_key_field->sj_pred_no;
607   keyuse_ext.validity_ref= 0;
608   keyuse_ext.needed_in_prefix= added_key_field->val->used_tables();
609   keyuse_ext.validity_var= false;
610   return ext_keyuses->push(keyuse_ext);
611 }
612 
613 
614 static int
sort_ext_keyuse(KEYUSE_EXT * a,KEYUSE_EXT * b)615 sort_ext_keyuse(KEYUSE_EXT *a, KEYUSE_EXT *b)
616 {
617   if (a->table->tablenr != b->table->tablenr)
618     return (int) (a->table->tablenr - b->table->tablenr);
619   if (a->key != b->key)
620     return (int) (a->key - b->key);
621   return (int) (a->keypart - b->keypart);
622 }
623 
624 
625 static void
sort_ext_keyuses(Dynamic_array<KEYUSE_EXT> * keyuses)626 sort_ext_keyuses(Dynamic_array<KEYUSE_EXT> *keyuses)
627 {
628   KEYUSE_EXT *first_keyuse= &keyuses->at(0);
629   my_qsort(first_keyuse, keyuses->elements(), sizeof(KEYUSE_EXT),
630            (qsort_cmp) sort_ext_keyuse);
631 }
632 
633 
634 /**
635   @brief
636     Add info on keyuses usable for splitting into an array
637 */
638 
639 static bool
add_ext_keyuses_for_splitting_field(Dynamic_array<KEYUSE_EXT> * ext_keyuses,KEY_FIELD * added_key_field)640 add_ext_keyuses_for_splitting_field(Dynamic_array<KEYUSE_EXT> *ext_keyuses,
641                                     KEY_FIELD *added_key_field)
642 {
643   Field *field= added_key_field->field;
644   TABLE *table= field->table;
645   for (uint key= 0; key < table->s->keys; key++)
646   {
647     if (!(table->keys_usable_for_splitting.is_set(key)))
648       continue;
649     KEY *key_info= table->key_info + key;
650     uint key_parts= table->actual_n_key_parts(key_info);
651     KEY_PART_INFO *key_part_info= key_info->key_part;
652     for (uint part=0; part <  key_parts; part++, key_part_info++)
653     {
654       if (!field->eq(key_part_info->field))
655         continue;
656       if (add_ext_keyuse_for_splitting(ext_keyuses, added_key_field, key, part))
657         return true;
658     }
659   }
660   return false;
661 }
662 
663 
664 /*
665   @brief
666     Cost of the post join operation used in specification of splittable table
667 */
668 
669 static
spl_postjoin_oper_cost(THD * thd,double join_record_count,uint rec_len)670 double spl_postjoin_oper_cost(THD *thd, double join_record_count, uint rec_len)
671 {
672   double cost;
673   cost=  get_tmp_table_write_cost(thd, join_record_count,rec_len) *
674          join_record_count;   // cost to fill tmp table
675   cost+= get_tmp_table_lookup_cost(thd, join_record_count,rec_len) *
676          join_record_count;   // cost to perform post join operation used here
677   cost+= get_tmp_table_lookup_cost(thd, join_record_count, rec_len) +
678          (join_record_count == 0 ? 0 :
679           join_record_count * log2 (join_record_count)) *
680          SORT_INDEX_CMP_COST;             // cost to perform  sorting
681   return cost;
682 }
683 
684 /**
685   @brief
686     Add KEYUSE structures that can be usable for splitting
687 
688   @details
689     This function is called only for joins created for potentially
690     splittable materialized tables. The function does the following:
691     1. Creates the dynamic array ext_keyuses_for_splitting of KEYUSE_EXT
692        structures and fills is with info about all keyuses that
693        could be used for splitting.
694     2. Sort the array ext_keyuses_for_splitting for fast access by key
695        on certain columns.
696     3. Collects and stores cost and cardinality info on the best execution
697        plan that does not use splitting and save this plan together with
698        corresponding array of keyuses.
699     4. Expand this array with KEYUSE elements built from the info stored
700        in ext_keyuses_for_splitting that could be produced by pushed
701        equalities employed for splitting.
702     5. Prepare the extended array of keyuses to be used in the function
703        best_access_plan()
704 */
705 
add_keyuses_for_splitting()706 void JOIN::add_keyuses_for_splitting()
707 {
708   uint i;
709   uint idx;
710   KEYUSE_EXT *keyuse_ext;
711   KEYUSE_EXT keyuse_ext_end;
712   double oper_cost;
713   uint rec_len;
714   uint added_keyuse_count;
715   TABLE *table= select_lex->master_unit()->derived->table;
716   List_iterator_fast<KEY_FIELD> li(spl_opt_info->added_key_fields);
717   KEY_FIELD *added_key_field;
718   if (!spl_opt_info->added_key_fields.elements)
719     goto err;
720   if (!(ext_keyuses_for_splitting= new Dynamic_array<KEYUSE_EXT>))
721     goto err;
722   while ((added_key_field= li++))
723   {
724     (void) add_ext_keyuses_for_splitting_field(ext_keyuses_for_splitting,
725                                                added_key_field);
726   }
727   added_keyuse_count= (uint)ext_keyuses_for_splitting->elements();
728   if (!added_keyuse_count)
729     goto err;
730   sort_ext_keyuses(ext_keyuses_for_splitting);
731   bzero((char*) &keyuse_ext_end, sizeof(keyuse_ext_end));
732   if (ext_keyuses_for_splitting->push(keyuse_ext_end))
733     goto err;
734 
735   spl_opt_info->unsplit_card= join_record_count;
736 
737   rec_len= table->s->rec_buff_length;
738 
739   oper_cost= spl_postjoin_oper_cost(thd, join_record_count, rec_len);
740 
741   spl_opt_info->unsplit_cost= best_positions[table_count-1].read_time +
742                               oper_cost;
743 
744   if (!(save_qep= new Join_plan_state(table_count + 1)))
745     goto err;
746 
747   save_query_plan(save_qep);
748 
749   if (!keyuse.buffer &&
750        my_init_dynamic_array(&keyuse, sizeof(KEYUSE), 20, 64,
751                              MYF(MY_THREAD_SPECIFIC)))
752     goto err;
753 
754   if (allocate_dynamic(&keyuse,
755                        save_qep->keyuse.elements +
756                        added_keyuse_count))
757     goto err;
758 
759   idx= keyuse.elements= save_qep->keyuse.elements;
760   if (keyuse.elements)
761     memcpy(keyuse.buffer,
762            save_qep->keyuse.buffer,
763            (size_t) keyuse.elements * keyuse.size_of_element);
764 
765   keyuse_ext= &ext_keyuses_for_splitting->at(0);
766   for (i=0; i < added_keyuse_count; i++, keyuse_ext++, idx++)
767   {
768     set_dynamic(&keyuse, (KEYUSE *) keyuse_ext, idx);
769     KEYUSE *added_keyuse= ((KEYUSE *) (keyuse.buffer)) + idx;
770     added_keyuse->validity_ref= &keyuse_ext->validity_var;
771   }
772 
773   if (sort_and_filter_keyuse(thd, &keyuse, true))
774     goto err;
775   optimize_keyuse(this, &keyuse);
776 
777   for (uint i= 0; i < table_count; i++)
778   {
779     JOIN_TAB *tab= join_tab + i;
780     map2table[tab->table->tablenr]= tab;
781   }
782 
783   return;
784 
785 err:
786   if (save_qep)
787     restore_query_plan(save_qep);
788   table->deny_splitting();
789   return;
790 }
791 
792 
793 /**
794   @brief
795     Add KEYUSE structures that can be usable for splitting of this joined table
796 */
797 
add_keyuses_for_splitting()798 void JOIN_TAB::add_keyuses_for_splitting()
799 {
800   DBUG_ASSERT(table->spl_opt_info != NULL);
801   SplM_opt_info *spl_opt_info= table->spl_opt_info;
802   spl_opt_info->join->add_keyuses_for_splitting();
803 }
804 
805 
806 /*
807   @brief
808      Find info on the splitting plan by the splitting key
809 */
810 
find_plan(TABLE * table,uint key,uint parts)811 SplM_plan_info *SplM_opt_info::find_plan(TABLE *table, uint key, uint parts)
812 {
813   List_iterator_fast<SplM_plan_info> li(plan_cache);
814   SplM_plan_info *spl_plan;
815   while ((spl_plan= li++))
816   {
817     if (spl_plan->table == table &&
818         spl_plan->key == key &&
819         spl_plan->parts == parts)
820       break;
821   }
822   return spl_plan;
823 }
824 
825 
826 /*
827   @breaf
828     Enable/Disable a keyuses that can be used for splitting
829  */
830 
831 static
reset_validity_vars_for_keyuses(KEYUSE_EXT * key_keyuse_ext_start,TABLE * table,uint key,table_map remaining_tables,bool validity_val)832 void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start,
833                                      TABLE *table, uint key,
834                                      table_map remaining_tables,
835                                      bool validity_val)
836 {
837   KEYUSE_EXT *keyuse_ext= key_keyuse_ext_start;
838   do
839   {
840     if (!(keyuse_ext->needed_in_prefix & remaining_tables))
841     {
842       /*
843         The enabling/disabling flags are set just in KEYUSE_EXT structures.
844         Yet keyuses that are used by best_access_path() have pointers
845         to these flags.
846       */
847       keyuse_ext->validity_var= validity_val;
848     }
849     keyuse_ext++;
850   }
851   while (keyuse_ext->key == key && keyuse_ext->table == table);
852 }
853 
854 
855 /**
856   @brief
857     Choose the best splitting to extend the evaluated partial join
858 
859   @param
860     record_count      estimated cardinality of the extended partial join
861     remaining_tables  tables not joined yet
862 
863   @details
864     This function is called during the search for the best execution
865     plan of the join that contains this table T. The function is called
866     every time when the optimizer tries to extend a partial join by
867     joining it with table T. Depending on what tables are already in the
868     partial join different equalities usable for splitting can be pushed
869     into T. The function evaluates different variants and chooses the
870     best one. Then the function finds the plan for the materializing join
871     with the chosen equality conditions pushed into it. If the cost of the
872     plan turns out to be less than the cost of the best plan without
873     splitting the function set it as the true plan of materialization
874     of the table T.
875     The function caches the found plans for materialization of table T
876     together if the info what key was used for splitting. Next time when
877     the optimizer prefers to use the same key the plan is taken from
878     the cache of plans
879 
880   @retval
881     Pointer to the info on the found plan that employs the pushed equalities
882     if the plan has been chosen, NULL - otherwise.
883 */
884 
choose_best_splitting(double record_count,table_map remaining_tables)885 SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
886                                                  table_map remaining_tables)
887 {
888   SplM_opt_info *spl_opt_info= table->spl_opt_info;
889   DBUG_ASSERT(spl_opt_info != NULL);
890   JOIN *join= spl_opt_info->join;
891   THD *thd= join->thd;
892   table_map tables_usable_for_splitting=
893               spl_opt_info->tables_usable_for_splitting;
894   KEYUSE_EXT *keyuse_ext= &join->ext_keyuses_for_splitting->at(0);
895   KEYUSE_EXT *UNINIT_VAR(best_key_keyuse_ext_start);
896   TABLE *best_table= 0;
897   double best_rec_per_key= DBL_MAX;
898   SplM_plan_info *spl_plan= 0;
899   uint best_key= 0;
900   uint best_key_parts= 0;
901 
902   /*
903     Check whether there are keys that can be used to join T employing splitting
904     and if so, select the best out of such keys
905   */
906   for (uint tablenr= 0; tablenr < join->table_count; tablenr++)
907   {
908     if (!((1ULL << tablenr) & tables_usable_for_splitting))
909       continue;
910     JOIN_TAB *tab= join->map2table[tablenr];
911     TABLE *table= tab->table;
912     if (keyuse_ext->table != table)
913       continue;
914     do
915     {
916       uint key= keyuse_ext->key;
917       KEYUSE_EXT *key_keyuse_ext_start= keyuse_ext;
918       key_part_map found_parts= 0;
919       do
920       {
921         if (keyuse_ext->needed_in_prefix & remaining_tables)
922 	{
923           keyuse_ext++;
924           continue;
925         }
926         if (!(keyuse_ext->keypart_map & found_parts))
927 	{
928           if ((!found_parts && !keyuse_ext->keypart) ||
929               (found_parts && ((keyuse_ext->keypart_map >> 1) & found_parts)))
930             found_parts|= keyuse_ext->keypart_map;
931           else
932 	  {
933             do
934 	    {
935               keyuse_ext++;
936             }
937             while (keyuse_ext->key == key && keyuse_ext->table == table);
938             break;
939           }
940         }
941         KEY *key_info= table->key_info + key;
942         double rec_per_key=
943                  key_info->actual_rec_per_key(keyuse_ext->keypart);
944         if (rec_per_key < best_rec_per_key)
945 	{
946           best_table= keyuse_ext->table;
947           best_key= keyuse_ext->key;
948 	  best_key_parts= keyuse_ext->keypart + 1;
949           best_rec_per_key= rec_per_key;
950           best_key_keyuse_ext_start= key_keyuse_ext_start;
951         }
952         keyuse_ext++;
953       }
954       while (keyuse_ext->key == key && keyuse_ext->table == table);
955     }
956     while (keyuse_ext->table == table);
957   }
958   spl_opt_info->last_plan= 0;
959   if (best_table)
960   {
961     /*
962       The key for splitting was chosen, look for the plan for this key
963       in the cache
964     */
965     spl_plan= spl_opt_info->find_plan(best_table, best_key, best_key_parts);
966     if (!spl_plan)
967     {
968       /*
969         The plan for the chosen key has not been found in the cache.
970         Build a new plan and save info on it in the cache
971       */
972       table_map all_table_map= (((table_map) 1) << join->table_count) - 1;
973       reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table,
974                                       best_key, remaining_tables, true);
975       choose_plan(join, all_table_map & ~join->const_table_map);
976 
977       /*
978         Check that the chosen plan is really a splitting plan.
979         If not or if there is not enough memory to save the plan in the cache
980         then just return with no splitting plan.
981       */
982       POSITION *first_non_const_pos= join->best_positions + join->const_tables;
983       TABLE *table= first_non_const_pos->table->table;
984       key_map spl_keys= table->keys_usable_for_splitting;
985       if (!(first_non_const_pos->key &&
986             spl_keys.is_set(first_non_const_pos->key->key)) ||
987           !(spl_plan= (SplM_plan_info *) thd->alloc(sizeof(SplM_plan_info))) ||
988 	  !(spl_plan->best_positions=
989 	     (POSITION *) thd->alloc(sizeof(POSITION) * join->table_count)) ||
990 	  spl_opt_info->plan_cache.push_back(spl_plan))
991       {
992         reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table,
993                                         best_key, remaining_tables, false);
994         return 0;
995       }
996 
997       spl_plan->keyuse_ext_start= best_key_keyuse_ext_start;
998       spl_plan->table= best_table;
999       spl_plan->key= best_key;
1000       spl_plan->parts= best_key_parts;
1001       spl_plan->split_sel= best_rec_per_key /
1002                            (spl_opt_info->unsplit_card ?
1003                             spl_opt_info->unsplit_card : 1);
1004 
1005       uint rec_len= table->s->rec_buff_length;
1006 
1007       double split_card= spl_opt_info->unsplit_card * spl_plan->split_sel;
1008       double oper_cost= split_card *
1009                         spl_postjoin_oper_cost(thd, split_card, rec_len);
1010       spl_plan->cost= join->best_positions[join->table_count-1].read_time +
1011                       + oper_cost;
1012 
1013       memcpy((char *) spl_plan->best_positions,
1014              (char *) join->best_positions,
1015              sizeof(POSITION) * join->table_count);
1016       reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table,
1017                                       best_key, remaining_tables, false);
1018     }
1019     if (spl_plan)
1020     {
1021       if(record_count * spl_plan->cost < spl_opt_info->unsplit_cost)
1022       {
1023         /*
1024           The best plan that employs splitting is cheaper than
1025           the plan without splitting
1026 	*/
1027         spl_opt_info->last_plan= spl_plan;
1028       }
1029     }
1030   }
1031 
1032   /* Set the cost of the preferred materialization for this partial join */
1033   records= (ha_rows)spl_opt_info->unsplit_card;
1034   spl_plan= spl_opt_info->last_plan;
1035   if (spl_plan)
1036   {
1037     startup_cost= record_count * spl_plan->cost;
1038     records= (ha_rows) (records * spl_plan->split_sel);
1039   }
1040   else
1041     startup_cost= spl_opt_info->unsplit_cost;
1042   return spl_plan;
1043 }
1044 
1045 
1046 /**
1047   @brief
1048     Inject equalities for splitting used by the materialization join
1049 
1050   @param
1051     excluded_tables  used to filter out the equalities that cannot
1052                       be pushed.
1053 
1054   @details
1055     This function injects equalities pushed into a derived table T for which
1056     the split optimization has been chosen by the optimizer. The function
1057     is called by JOIN::inject_splitting_cond_for_all_tables_with_split_op().
1058     All equalities usable for splitting T whose right parts do not depend on
1059     any of the 'excluded_tables' can be pushed into the where clause of the
1060     derived table T.
1061     The function also marks the select that specifies T as
1062     UNCACHEABLE_DEPENDENT_INJECTED.
1063 
1064   @retval
1065     false  on success
1066     true   on failure
1067 */
1068 
inject_best_splitting_cond(table_map excluded_tables)1069 bool JOIN::inject_best_splitting_cond(table_map excluded_tables)
1070 {
1071   Item *inj_cond= 0;
1072   List<Item> *inj_cond_list= &spl_opt_info->inj_cond_list;
1073   List_iterator<KEY_FIELD> li(spl_opt_info->added_key_fields);
1074   KEY_FIELD *added_key_field;
1075   while ((added_key_field= li++))
1076   {
1077     if (excluded_tables & added_key_field->val->used_tables())
1078       continue;
1079     if (inj_cond_list->push_back(added_key_field->cond, thd->mem_root))
1080       return true;
1081   }
1082   DBUG_ASSERT(inj_cond_list->elements);
1083   switch (inj_cond_list->elements) {
1084   case 1:
1085     inj_cond= inj_cond_list->head(); break;
1086   default:
1087     inj_cond= new (thd->mem_root) Item_cond_and(thd, *inj_cond_list);
1088     if (!inj_cond)
1089       return true;
1090   }
1091   if (inj_cond)
1092     inj_cond->fix_fields(thd,0);
1093 
1094   if (inject_cond_into_where(inj_cond->copy_andor_structure(thd)))
1095     return true;
1096 
1097   select_lex->uncacheable|= UNCACHEABLE_DEPENDENT_INJECTED;
1098   st_select_lex_unit *unit= select_lex->master_unit();
1099   unit->uncacheable|= UNCACHEABLE_DEPENDENT_INJECTED;
1100 
1101   return false;
1102 }
1103 
1104 
1105 /**
1106   @brief
1107     Test if equality is injected for split optimization
1108 
1109   @param
1110     eq_item   equality to to test
1111 
1112   @retval
1113     true    eq_item is equality injected for split optimization
1114     false   otherwise
1115 */
1116 
is_eq_cond_injected_for_split_opt(Item_func_eq * eq_item)1117 bool is_eq_cond_injected_for_split_opt(Item_func_eq *eq_item)
1118 {
1119   Item *left_item= eq_item->arguments()[0]->real_item();
1120   if (left_item->type() != Item::FIELD_ITEM)
1121     return false;
1122   Field *field= ((Item_field *) left_item)->field;
1123   if (!field->table->reginfo.join_tab)
1124     return false;
1125   JOIN *join= field->table->reginfo.join_tab->join;
1126   if (!join->spl_opt_info)
1127     return false;
1128   List_iterator_fast<Item> li(join->spl_opt_info->inj_cond_list);
1129   Item *item;
1130   while ((item= li++))
1131   {
1132     if (item == eq_item)
1133         return true;
1134   }
1135   return false;
1136 }
1137 
1138 
1139 /**
1140   @brief
1141     Fix the splitting chosen for a splittable table in the final query plan
1142 
1143   @param
1144     spl_plan   info on the splitting plan chosen for the splittable table T
1145     remaining_tables  the table T is joined just before these tables
1146     is_const_table    the table T is a constant table
1147 
1148   @details
1149     If in the final query plan the optimizer has chosen a splitting plan
1150     then the function sets this plan as the final execution plan to
1151     materialized the table T. Otherwise the plan that does not use
1152     splitting is set for the materialization.
1153 
1154   @retval
1155     false  on success
1156     true   on failure
1157 */
1158 
fix_splitting(SplM_plan_info * spl_plan,table_map remaining_tables,bool is_const_table)1159 bool JOIN_TAB::fix_splitting(SplM_plan_info *spl_plan,
1160                              table_map remaining_tables,
1161                              bool is_const_table)
1162 {
1163   SplM_opt_info *spl_opt_info= table->spl_opt_info;
1164   DBUG_ASSERT(table->spl_opt_info != 0);
1165   JOIN *md_join= spl_opt_info->join;
1166   if (spl_plan && !is_const_table)
1167   {
1168     memcpy((char *) md_join->best_positions,
1169            (char *) spl_plan->best_positions,
1170            sizeof(POSITION) * md_join->table_count);
1171     /*
1172       This is called for a proper work of JOIN::get_best_combination()
1173       called for the join that materializes T
1174     */
1175     reset_validity_vars_for_keyuses(spl_plan->keyuse_ext_start,
1176                                     spl_plan->table,
1177                                     spl_plan->key,
1178                                     remaining_tables,
1179                                     true);
1180   }
1181   else if (md_join->save_qep)
1182   {
1183     md_join->restore_query_plan(md_join->save_qep);
1184   }
1185   return false;
1186 }
1187 
1188 
1189 /**
1190   @brief
1191     Fix the splittings chosen splittable tables in the final query plan
1192 
1193   @details
1194     The function calls JOIN_TAB::fix_splittins for all potentially
1195     splittable tables in this join to set all final materialization
1196     plans chosen for these tables.
1197 
1198   @retval
1199     false  on success
1200     true   on failure
1201 */
1202 
fix_all_splittings_in_plan()1203 bool JOIN::fix_all_splittings_in_plan()
1204 {
1205   table_map prev_tables= 0;
1206   table_map all_tables= (table_map(1) << table_count) - 1;
1207   for (uint tablenr= 0; tablenr < table_count; tablenr++)
1208   {
1209     POSITION *cur_pos= &best_positions[tablenr];
1210     JOIN_TAB *tab= cur_pos->table;
1211     if (tab->table->is_splittable())
1212     {
1213       SplM_plan_info *spl_plan= cur_pos->spl_plan;
1214       if (tab->fix_splitting(spl_plan,
1215                              all_tables & ~prev_tables,
1216                              tablenr < const_tables ))
1217           return true;
1218     }
1219     prev_tables|= tab->table->map;
1220   }
1221   return false;
1222 }
1223 
1224 
1225 /**
1226   @brief
1227     Inject splitting conditions into WHERE of split derived
1228 
1229   @details
1230     The function calls JOIN_TAB::inject_best_splitting_cond() for each
1231     materialized derived table T used in this join for which the split
1232     optimization has been chosen by the optimizer. It is done in order to
1233     inject equalities pushed into the where clause of the specification
1234     of T that would be helpful to employ the splitting technique.
1235 
1236   @retval
1237     false  on success
1238     true   on failure
1239 */
1240 
inject_splitting_cond_for_all_tables_with_split_opt()1241 bool JOIN::inject_splitting_cond_for_all_tables_with_split_opt()
1242 {
1243   table_map prev_tables= 0;
1244   table_map all_tables= (table_map(1) << table_count) - 1;
1245   for (uint tablenr= 0; tablenr < table_count; tablenr++)
1246   {
1247     POSITION *cur_pos= &best_positions[tablenr];
1248     JOIN_TAB *tab= cur_pos->table;
1249     prev_tables|= tab->table->map;
1250     if (!(tab->table->is_splittable() && cur_pos->spl_plan))
1251       continue;
1252     SplM_opt_info *spl_opt_info= tab->table->spl_opt_info;
1253     JOIN *join= spl_opt_info->join;
1254     /*
1255       Currently the equalities referencing columns of SJM tables with
1256       look-up access cannot be pushed into materialized derived.
1257     */
1258     if (join->inject_best_splitting_cond((all_tables & ~prev_tables) |
1259 				          sjm_lookup_tables))
1260         return true;
1261   }
1262   return false;
1263 }
1264