1 #ifndef SQL_PLANNER_INCLUDED
2 #define SQL_PLANNER_INCLUDED
3 
4 /* Copyright (c) 2000, 2021, Oracle and/or its affiliates.
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License, version 2.0,
8    as published by the Free Software Foundation.
9 
10    This program is also distributed with certain software (including
11    but not limited to OpenSSL) that is licensed under separate terms,
12    as designated in a particular file or component or in included license
13    documentation.  The authors of MySQL hereby grant you an additional
14    permission to link the program and your derivative works with the
15    separately licensed software that they have included with MySQL.
16 
17    This program is distributed in the hope that it will be useful,
18    but WITHOUT ANY WARRANTY; without even the implied warranty of
19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20    GNU General Public License, version 2.0, for more details.
21 
22    You should have received a copy of the GNU General Public License
23    along with this program; if not, write to the Free Software
24    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
25 
26 /** @file Join planner classes */
27 
28 #include "sql_class.h"
29 #include "sql_select.h"
30 #include "sql_test.h"
31 #include "sql_optimizer.h"
32 
33 class Opt_trace_object;
34 
35 /**
36   This class determines the optimal join order for tables within
37   a basic query block, ie a query specification clause, possibly extended
38   with semi-joined tables from embedded subqueries.
39 
40   This class takes as prerequisite a join class where all dependencies among
41   tables have been sorted out, all possible access paths have been
42   sorted out, and all statistics information has been filled in.
43 
44   The class has a sole public function that will calculate the most
45   optimal plan based on the inputs and the environment, such as prune level
46   and greedy optimizer search depth. For more information, see the
47   function headers for the private functions greedy_search(),
48   best_extension_by_limited_search() and eq_ref_extension_by_limited_search().
49 */
50 
51 class Optimize_table_order
52 {
53 public:
Optimize_table_order(THD * thd_arg,JOIN * join_arg,TABLE_LIST * sjm_nest_arg)54   Optimize_table_order(THD *thd_arg, JOIN *join_arg, TABLE_LIST *sjm_nest_arg)
55   : thd(thd_arg), join(join_arg),
56     search_depth(determine_search_depth(thd->variables.optimizer_search_depth,
57                                         join->tables - join->const_tables)),
58     prune_level(thd->variables.optimizer_prune_level),
59     cur_embedding_map(0), emb_sjm_nest(sjm_nest_arg),
60     excluded_tables((emb_sjm_nest ?
61                      (join->all_table_map & ~emb_sjm_nest->sj_inner_tables) : 0) |
62                     (join->allow_outer_refs ? 0 : OUTER_REF_TABLE_BIT)),
63     has_sj(!(join->select_lex->sj_nests.is_empty() || emb_sjm_nest)),
64     test_all_ref_keys(false), found_plan_with_allowed_sj(false)
65   {}
~Optimize_table_order()66   ~Optimize_table_order()
67   {}
68   /**
69     Entry point to table join order optimization.
70     For further description, see class header and private function headers.
71 
72     @return false if successful, true if error
73   */
74   bool choose_table_order();
75 
76 private:
77   THD *const thd;            // Pointer to current THD
78   JOIN *const join;          // Pointer to the current plan being developed
79   const uint search_depth;   // Maximum search depth to apply in greedy search
80   const uint prune_level;    // pruning heuristics to be applied
81                              // (0 = EXHAUSTIVE, 1 = PRUNE_BY_TIME_OR_ROWS)
82   /**
83     Bitmap of all join nests embedding the last table appended to the current
84     partial join.
85   */
86   nested_join_map cur_embedding_map;
87   /**
88     If non-NULL, we are optimizing a materialized semi-join nest.
89     If NULL, we are optimizing a complete join plan.
90   */
91   const TABLE_LIST *const emb_sjm_nest;
92   /**
93     When calculating a plan for a materialized semi-join nest,
94     best_access_path() needs to know not only the remaining tables within the
95     semi-join nest, but also all tables outside of this nest, because there may
96     be key references between the semi-join nest and the outside tables
97     that should not be considered when materializing the semi-join nest.
98     @c excluded_tables tracks these tables.
99   */
100   const table_map excluded_tables;
101   /*
102     No need to call advance_sj_state() when
103      1) there are no semijoin nests or
104      2) we are optimizing a materialized semijoin nest.
105   */
106   const bool has_sj;
107 
108   /**
109      If true, find_best_ref() must go through all keys, no shortcutting
110      allowed.
111   */
112   bool test_all_ref_keys;
113 
114 
115   /// True if we found a complete plan using only allowed semijoin strategies.
116   bool found_plan_with_allowed_sj;
117 
118   inline Key_use* find_best_ref(JOIN_TAB  *tab,
119                                 const table_map remaining_tables,
120                                 const uint idx,
121                                 const double prefix_rowcount,
122                                 bool *found_condition,
123                                 table_map *ref_depends_map,
124                                 uint *used_key_parts);
125   double calculate_scan_cost(const JOIN_TAB *tab,
126                              const uint idx,
127                              const Key_use *best_ref,
128                              const double prefix_rowcount,
129                              const bool found_condition,
130                              const bool disable_jbuf,
131                              double *rows_after_filtering,
132                              Opt_trace_object *trace_access_scan);
133   void best_access_path(JOIN_TAB *tab,
134                         const table_map remaining_tables,
135                         const uint idx,
136                         bool disable_jbuf,
137                         const double prefix_rowcount,
138                         POSITION *pos);
139   bool semijoin_loosescan_fill_driving_table_position(const JOIN_TAB  *s,
140                                                       table_map remaining_tables,
141                                                       uint      idx,
142                                                       POSITION *loose_scan_pos);
143   bool check_interleaving_with_nj(JOIN_TAB *next_tab);
144   void advance_sj_state(table_map remaining_tables,
145                         const JOIN_TAB *tab, uint idx);
146   void backout_nj_state(const table_map remaining_tables,
147                         const JOIN_TAB *tab);
148   void optimize_straight_join(table_map join_tables);
149   bool greedy_search(table_map remaining_tables);
150   bool best_extension_by_limited_search(table_map remaining_tables,
151                                         uint idx,
152                                         uint current_search_depth);
153   table_map eq_ref_extension_by_limited_search(
154                                         table_map remaining_tables,
155                                         uint idx,
156                                         uint current_search_depth);
157   void consider_plan(uint idx, Opt_trace_object *trace_obj);
158   bool fix_semijoin_strategies();
159   bool semijoin_firstmatch_loosescan_access_paths(
160                 uint first_tab, uint last_tab, table_map remaining_tables,
161                 bool loosescan, bool final,
162                 double *newcount, double *newcost);
163   void semijoin_mat_scan_access_paths(
164                 uint last_inner_tab, uint last_outer_tab,
165                 table_map remaining_tables, TABLE_LIST *sjm_nest, bool final,
166                 double *newcount, double *newcost);
167   void semijoin_mat_lookup_access_paths(
168                 uint last_inner, TABLE_LIST *sjm_nest,
169                 double *newcount, double *newcost);
170   void semijoin_dupsweedout_access_paths(
171                 uint first_tab, uint last_tab,
172                 table_map remaining_tables,
173                 double *newcount, double *newcost);
174 
175   static uint determine_search_depth(uint search_depth, uint table_count);
176 };
177 
178 void get_partial_join_cost(JOIN *join, uint n_tables, double *cost_arg,
179                            double *rowcount_arg);
180 
181 /**
182   Calculate 'Post read filtering' effect of JOIN::conds for table
183   'tab'. Only conditions that are not directly involved in the chosen
184   access method shall be included in the calculation of this 'Post
185   read filtering' effect.
186 
187   The function first identifies fields that are directly used by the
188   access method. This includes columns used by range and ref access types,
189   and predicates on the identified columns (if any) will not be taken into
190   account when the filtering effect is calculated.
191 
192   The function will then calculate the filtering effect of any predicate
193   that applies to 'tab' and is not depending on the columns used by the
194   access method. The source of information with highest accuracy is
195   always preferred and is as follows:
196     1) Row estimates from the range optimizer
197     2) Row estimates from index statistics (records per key)
198     3) Guesstimates
199 
200   Thus, after identifying columns that are used by the access method,
201   the function will look for rows estimates made by the range optimizer.
202   If found, the estimates from the range optimizer are calculated into
203   the filtering effect.
204 
205   The function then goes through JOIN::conds to get estimates from any
206   remaining predicate that applies to 'tab' and does not depend on any
207   tables that come later in the join sequence. Predicates that depend on
208   columns that are either used by the access method or used in the row
209   estimate from the range optimizer will not be considered in this phase.
210 
211   @param tab          The table condition filtering effect is calculated
212                       for
213   @param keyuse       Describes the 'ref' access method (if any) that is
214                       chosen
215   @param used_tables  Tables earlier in the join sequence than 'tab'
216   @param fanout       The number of rows read by the chosen access
217                       method for each row combination of previous tables
218   @param is_join_buffering  Whether or not condition filtering is about
219                       to be calculated for an access method using join
220                       buffering.
221   @return  the 'post read filtering' effect (between 0 and 1) of
222            JOIN::conds
223 */
224 float calculate_condition_filter(const JOIN_TAB *const tab,
225                                  const Key_use *const keyuse,
226                                  table_map used_tables,
227                                  double fanout,
228                                  bool is_join_buffering);
229 #endif /* SQL_PLANNER_INCLUDED */
230