1 /*
2    Copyright (c) 2014, 2021, Oracle and/or its affiliates.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software Foundation,
22    51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
23 
24 // First include (the generated) my_config.h, to get correct platform defines.
25 #include "my_config.h"
26 #include "opt_statistics.h"
27 #include "key.h"                                // rec_per_key_t, KEY
28 #include "table.h"                              // TABLE
29 
30 using std::max;
31 
32 /**
33   This code for computing a guestimate for records per key is based on
34   code in Optimize_table_order::find_best_ref().
35 
36   Assume that the first key part matches 1% of the file and that the
37   whole key matches 10 (duplicates) or 1 (unique) records. For small
38   tables, ensure there are at least ten different key values.  Assume
39   also that more key matches proportionally more records. This gives
40   the formula:
41 
42     records = a - (x-1)/(c-1)*(a-b)
43 
44   where
45 
46     b = records matched by whole key
47     a = records matched by first key part (1% of all records?)
48     c = number of key parts in key
49     x = used key parts (1 <= x <= c)
50 
51   @todo Change Optimize_table_order::find_best_ref() to use this function.
52 */
53 
guess_rec_per_key(const TABLE * const table,const KEY * const key,uint used_keyparts)54 rec_per_key_t guess_rec_per_key(const TABLE *const table, const KEY *const key,
55                                 uint used_keyparts)
56 {
57   assert(used_keyparts >= 1);
58   assert(used_keyparts <= key->user_defined_key_parts);
59   assert(!key->has_records_per_key(used_keyparts - 1));
60 
61   const ha_rows table_rows= table->file->stats.records;
62 
63   /*
64     Make an estimates for how many records the whole key will match.
65     If there exists index statistics for the whole key we use this.
66     If not, we assume the whole key matches ten records for a non-unique
67     index and 1 record for a unique index.
68   */
69   rec_per_key_t rec_per_key_all;
70   if (key->has_records_per_key(key->user_defined_key_parts - 1))
71     rec_per_key_all= key->records_per_key(key->user_defined_key_parts - 1);
72   else
73   {
74     if (key->actual_flags & HA_NOSAME)
75       rec_per_key_all= 1.0f;                     // Unique index
76     else
77     {
78       rec_per_key_all= 10.0f;                    // Non-unique index
79 
80       /*
81         Assume the index contains at least ten unique values. Need to
82         adjust the records per key estimate for small tables. For an
83         empty table we assume records per key is 1.
84       */
85       set_if_smaller(rec_per_key_all, max(rec_per_key_t(table_rows)/10, 1.0f));
86     }
87   }
88 
89   rec_per_key_t rec_per_key;
90 
91   // rec_per_key estimate for first key part (1% of records)
92   const rec_per_key_t rec_per_key_first= table_rows * 0.01f;
93 
94   if (rec_per_key_first < rec_per_key_all)
95   {
96     rec_per_key= rec_per_key_all;
97   }
98   else
99   {
100     if (key->user_defined_key_parts > 1)
101     {
102       // See formula above
103       rec_per_key= rec_per_key_first -
104                    (rec_per_key_t(used_keyparts - 1) /
105                     (key->user_defined_key_parts - 1)) *
106                    (rec_per_key_first - rec_per_key_all);
107     }
108     else
109     {
110       // Single column index
111       if (key->actual_flags & HA_NOSAME)
112         rec_per_key= 1.0f;                      // Unique index
113       else
114         rec_per_key= rec_per_key_first;         // Non-unique index
115     }
116 
117     assert(rec_per_key >= rec_per_key_all);
118   }
119 
120   return rec_per_key;
121 }
122