1 /* Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
22 
23 /**
24   @file sql/histograms/histogram.cc
25   Histogram base class (implementation).
26 */
27 
28 #include "sql/histograms/histogram.h"  // Histogram, Histogram_comparator
29 
30 #include <sys/types.h>
31 #include <algorithm>
32 #include <map>
33 #include <memory>  // std::unique_ptr
34 #include <new>
35 #include <random>
36 #include <string>
37 #include <vector>
38 
39 #include "field_types.h"  // enum_field_types
40 #include "lex_string.h"
41 #include "m_ctype.h"
42 #include "my_alloc.h"
43 #include "my_bitmap.h"
44 #include "my_dbug.h"
45 #include "my_inttypes.h"
46 #include "my_sys.h"  // my_micro_time, get_charset
47 #include "my_systime.h"
48 #include "my_time.h"
49 #include "mysql/service_mysql_alloc.h"
50 #include "mysql_time.h"
51 #include "mysqld_error.h"
52 #include "scope_guard.h"  // create_scope_guard
53 #include "sql/auth/auth_common.h"
54 #include "sql/dd/cache/dictionary_client.h"
55 #include "sql/dd/dd.h"
56 #include "sql/dd/string_type.h"
57 #include "sql/dd/types/column.h"
58 #include "sql/dd/types/column_statistics.h"
59 #include "sql/dd/types/table.h"  // dd::Table
60 #include "sql/debug_sync.h"
61 #include "sql/field.h"  // Field
62 #include "sql/handler.h"
63 #include "sql/histograms/equi_height.h"  // Equi_height<T>
64 #include "sql/histograms/singleton.h"    // Singleton<T>
65 #include "sql/histograms/value_map.h"    // Value_map
66 #include "sql/item.h"
67 #include "sql/json_dom.h"  // Json_*
68 #include "sql/key.h"
69 #include "sql/mdl.h"  // MDL_request
70 #include "sql/my_decimal.h"
71 #include "sql/psi_memory_key.h"  // key_memory_histograms
72 #include "sql/sql_base.h"        // open_and_lock_tables,
73 #include "sql/sql_bitmap.h"
74 // close_thread_tables
75 #include "sql/sql_class.h"  // make_lex_string_root
76 #include "sql/sql_const.h"
77 #include "sql/strfunc.h"  // find_type2, find_set
78 #include "sql/system_variables.h"
79 #include "sql/table.h"
80 #include "sql/thd_raii.h"
81 #include "sql/transaction.h"  // trans_commit_stmt, trans_rollback_stmt
82 #include "sql/tztime.h"       // my_tz_UTC
83 #include "sql_string.h"       // String
84 #include "template_utils.h"
85 
86 struct TYPELIB;
87 
88 namespace histograms {
89 
90 /*
91   This type represents a instrumented map of value maps, indexed by field
92   number.
93 */
94 using value_map_collection = std::map<
95     uint16, std::unique_ptr<histograms::Value_map_base>, std::less<uint16>,
96     Histogram_key_allocator<
97         std::pair<const uint16, std::unique_ptr<histograms::Value_map_base>>>>;
98 
operator ()(size_t s) const99 void *Histogram_psi_key_alloc::operator()(size_t s) const {
100   return my_malloc(key_memory_histograms, s, MYF(MY_WME | ME_FATALERROR));
101 }
102 
103 /**
104   Convert from enum_field_types to Value_map_type.
105 
106   @param field_type the field type
107   @param is_unsigned whether the field type is unsigned or not. This is only
108                      considered if the field type is LONGLONG
109 
110   @return A Value_map_type. May be INVALID if the Value_map does not support
111           the field type.
112 */
field_type_to_value_map_type(const enum_field_types field_type,const bool is_unsigned)113 static Value_map_type field_type_to_value_map_type(
114     const enum_field_types field_type, const bool is_unsigned) {
115   switch (field_type) {
116     case MYSQL_TYPE_DECIMAL:
117     case MYSQL_TYPE_NEWDECIMAL:
118       return Value_map_type::DECIMAL;
119     case MYSQL_TYPE_TINY:
120     case MYSQL_TYPE_SHORT:
121     case MYSQL_TYPE_LONG:
122     case MYSQL_TYPE_INT24:
123     case MYSQL_TYPE_YEAR:
124     case MYSQL_TYPE_BIT:
125       return Value_map_type::INT;
126     case MYSQL_TYPE_ENUM:
127       return Value_map_type::ENUM;
128     case MYSQL_TYPE_SET:
129       return Value_map_type::SET;
130     case MYSQL_TYPE_LONGLONG:
131       return is_unsigned ? Value_map_type::UINT : Value_map_type::INT;
132     case MYSQL_TYPE_FLOAT:
133     case MYSQL_TYPE_DOUBLE:
134       return Value_map_type::DOUBLE;
135     case MYSQL_TYPE_TIME:
136     case MYSQL_TYPE_TIME2:
137       return Value_map_type::TIME;
138     case MYSQL_TYPE_DATE:
139     case MYSQL_TYPE_NEWDATE:
140       return Value_map_type::DATE;
141     case MYSQL_TYPE_DATETIME:
142     case MYSQL_TYPE_TIMESTAMP:
143     case MYSQL_TYPE_TIMESTAMP2:
144     case MYSQL_TYPE_DATETIME2:
145       return Value_map_type::DATETIME;
146     case MYSQL_TYPE_TINY_BLOB:
147     case MYSQL_TYPE_MEDIUM_BLOB:
148     case MYSQL_TYPE_LONG_BLOB:
149     case MYSQL_TYPE_BLOB:
150     case MYSQL_TYPE_VAR_STRING:
151     case MYSQL_TYPE_STRING:
152     case MYSQL_TYPE_VARCHAR:
153       return Value_map_type::STRING;
154     case MYSQL_TYPE_JSON:
155     case MYSQL_TYPE_GEOMETRY:
156     case MYSQL_TYPE_NULL:
157     default:
158       return Value_map_type::INVALID;
159   }
160 
161   // All cases should be handled, so this should not be hit.
162   /* purecov: begin inspected */
163   DBUG_ASSERT(false);
164   return Value_map_type::INVALID;
165   /* purecov: end */
166 }
167 
168 /**
169   Get the Value_map_type from a Field object.
170 
171   This effectively looks at the real_type() of a Field, and converts this to
172   a Value_map_type
173 
174   @param field The field to convert from
175 
176   @return A Value_map_type. May be INVALID if the Value_map does not support
177           the field type.
178 */
field_type_to_value_map_type(const Field * field)179 static Value_map_type field_type_to_value_map_type(const Field *field) {
180   bool is_unsigned = false;
181   if (field->real_type() == MYSQL_TYPE_LONGLONG) {
182     /*
183       For most integer types, the Value_map_type will be INT (int64). This type
184       will not cover the entire value range for the SQL data type UNSIGNED
185       BIGINT, so we need to distinguish between SIGNED BIGINT and UNSIGNED
186       BIGINT so that we can switch the Value_map_type to UINT (uint64).
187     */
188     is_unsigned = field->is_unsigned();
189   }
190 
191   return field_type_to_value_map_type(field->real_type(), is_unsigned);
192 }
193 
194 /**
195   Lock a column statistic MDL key for writing (exclusive lock).
196 
197   @param thd thread handle
198   @param mdl_key the MDL key to lock
199 
200   @return true on error, false on success
201 */
lock_for_write(THD * thd,const MDL_key & mdl_key)202 static bool lock_for_write(THD *thd, const MDL_key &mdl_key) {
203   DBUG_EXECUTE_IF("histogram_fail_during_lock_for_write", { return true; });
204 
205   MDL_request mdl_request;
206   MDL_REQUEST_INIT_BY_KEY(&mdl_request, &mdl_key, MDL_EXCLUSIVE,
207                           MDL_TRANSACTION);
208 
209   // If locking fails, an error has already been flagged.
210   return thd->mdl_context.acquire_lock(&mdl_request,
211                                        thd->variables.lock_wait_timeout);
212 }
213 
Histogram(MEM_ROOT * mem_root,const std::string & db_name,const std::string & tbl_name,const std::string & col_name,enum_histogram_type type,Value_map_type data_type)214 Histogram::Histogram(MEM_ROOT *mem_root, const std::string &db_name,
215                      const std::string &tbl_name, const std::string &col_name,
216                      enum_histogram_type type, Value_map_type data_type)
217     : m_null_values_fraction(INVALID_NULL_VALUES_FRACTION),
218       m_charset(nullptr),
219       m_num_buckets_specified(0),
220       m_mem_root(mem_root),
221       m_hist_type(type),
222       m_data_type(data_type) {
223   lex_string_strmake(m_mem_root, &m_database_name, db_name.c_str(),
224                      db_name.length());
225 
226   lex_string_strmake(m_mem_root, &m_table_name, tbl_name.c_str(),
227                      tbl_name.length());
228 
229   lex_string_strmake(m_mem_root, &m_column_name, col_name.c_str(),
230                      col_name.length());
231 }
232 
Histogram(MEM_ROOT * mem_root,const Histogram & other)233 Histogram::Histogram(MEM_ROOT *mem_root, const Histogram &other)
234     : m_sampling_rate(other.m_sampling_rate),
235       m_null_values_fraction(other.m_null_values_fraction),
236       m_charset(other.m_charset),
237       m_num_buckets_specified(other.m_num_buckets_specified),
238       m_mem_root(mem_root),
239       m_hist_type(other.m_hist_type),
240       m_data_type(other.m_data_type) {
241   lex_string_strmake(m_mem_root, &m_database_name, other.m_database_name.str,
242                      other.m_database_name.length);
243 
244   lex_string_strmake(m_mem_root, &m_table_name, other.m_table_name.str,
245                      other.m_table_name.length);
246 
247   lex_string_strmake(m_mem_root, &m_column_name, other.m_column_name.str,
248                      other.m_column_name.length);
249 }
250 
histogram_to_json(Json_object * json_object) const251 bool Histogram::histogram_to_json(Json_object *json_object) const {
252   // Get the current time in GMT timezone with microsecond accuray.
253   timeval time_value;
254   my_micro_time_to_timeval(my_micro_time(), &time_value);
255 
256   MYSQL_TIME current_time;
257   my_tz_UTC->gmt_sec_to_TIME(&current_time, time_value);
258 
259   // last-updated
260   const Json_datetime last_updated(current_time, MYSQL_TYPE_DATETIME);
261   if (json_object->add_clone(last_updated_str(), &last_updated))
262     return true; /* purecov: inspected */
263 
264   // histogram-type
265   const Json_string histogram_type(histogram_type_to_str());
266   if (json_object->add_clone(histogram_type_str(), &histogram_type))
267     return true; /* purecov: inspected */
268 
269   // Sampling rate
270   DBUG_ASSERT(get_sampling_rate() >= 0.0);
271   DBUG_ASSERT(get_sampling_rate() <= 1.0);
272   const Json_double sampling_rate(get_sampling_rate());
273   if (json_object->add_clone(sampling_rate_str(), &sampling_rate))
274     return true; /* purecov: inspected */
275 
276   // The number of buckets specified in the ANALYZE TABLE command
277   const Json_int num_buckets_specified(get_num_buckets_specified());
278   if (json_object->add_clone(numer_of_buckets_specified_str(),
279                              &num_buckets_specified))
280     return true; /* purecov: inspected */
281 
282   // Fraction of NULL values.
283   DBUG_ASSERT(get_null_values_fraction() >= 0.0);
284   DBUG_ASSERT(get_null_values_fraction() <= 1.0);
285   const Json_double null_values(get_null_values_fraction());
286   if (json_object->add_clone(null_values_str(), &null_values))
287     return true; /* purecov: inspected */
288 
289   // charset-id
290   const Json_uint charset_id(get_character_set()->number);
291   if (json_object->add_clone(collation_id_str(), &charset_id))
292     return true; /* purecov: inspected */
293   return false;
294 }
295 
get_null_values_fraction() const296 double Histogram::get_null_values_fraction() const {
297   if (m_null_values_fraction != INVALID_NULL_VALUES_FRACTION) {
298     DBUG_ASSERT(m_null_values_fraction >= 0.0);
299     DBUG_ASSERT(m_null_values_fraction <= 1.0);
300   }
301 
302   return m_null_values_fraction;
303 }
304 
305 template <class T>
build_histogram(MEM_ROOT * mem_root,const Value_map<T> & value_map,size_t num_buckets,const std::string & db_name,const std::string & tbl_name,const std::string & col_name)306 Histogram *build_histogram(MEM_ROOT *mem_root, const Value_map<T> &value_map,
307                            size_t num_buckets, const std::string &db_name,
308                            const std::string &tbl_name,
309                            const std::string &col_name) {
310   Histogram *histogram = nullptr;
311 
312   /*
313     If the number of buckets specified is greater or equal to the number
314     of distinct values, we create a Singleton histogram. Otherwise we create
315     an equi-height histogram.
316   */
317   if (num_buckets >= value_map.size()) {
318     Singleton<T> *singleton = new (mem_root) Singleton<T>(
319         mem_root, db_name, tbl_name, col_name, value_map.get_data_type());
320 
321     if (singleton == nullptr) return nullptr;
322 
323     if (singleton->build_histogram(value_map, num_buckets))
324       return nullptr; /* purecov: inspected */
325 
326     histogram = singleton;
327   } else {
328     Equi_height<T> *equi_height = new (mem_root) Equi_height<T>(
329         mem_root, db_name, tbl_name, col_name, value_map.get_data_type());
330 
331     if (equi_height == nullptr) return nullptr;
332 
333     if (equi_height->build_histogram(value_map, num_buckets))
334       return nullptr; /* purecov: inspected */
335 
336     histogram = equi_height;
337   }
338 
339   // We should not have a nullptr at this point.
340   DBUG_ASSERT(histogram != nullptr);
341 
342   // Verify that the original number of buckets specified is set.
343   DBUG_ASSERT(histogram->get_num_buckets_specified() == num_buckets);
344 
345   // Verify that we haven't created more buckets than requested.
346   DBUG_ASSERT(histogram->get_num_buckets() <= num_buckets);
347 
348   // Ensure that the character set is set.
349   DBUG_ASSERT(histogram->get_character_set() != nullptr);
350 
351   // Check that the fraction of NULL values has been set properly.
352   DBUG_ASSERT(histogram->get_null_values_fraction() >= 0.0);
353   DBUG_ASSERT(histogram->get_null_values_fraction() <= 1.0);
354 
355   return histogram;
356 }
357 
json_to_histogram(MEM_ROOT * mem_root,const std::string & schema_name,const std::string & table_name,const std::string & column_name,const Json_object & json_object)358 Histogram *Histogram::json_to_histogram(MEM_ROOT *mem_root,
359                                         const std::string &schema_name,
360                                         const std::string &table_name,
361                                         const std::string &column_name,
362                                         const Json_object &json_object) {
363   // Histogram type (equi-height or singleton).
364   const Json_dom *histogram_type_dom =
365       json_object.get(Histogram::histogram_type_str());
366   if (histogram_type_dom == nullptr ||
367       histogram_type_dom->json_type() != enum_json_type::J_STRING) {
368     return nullptr; /* purecov: deadcode */
369   }
370 
371   // Histogram data type
372   const Json_dom *data_type_dom = json_object.get(Histogram::data_type_str());
373   if (data_type_dom == nullptr ||
374       data_type_dom->json_type() != enum_json_type::J_STRING) {
375     return nullptr; /* purecov: deadcode */
376   }
377 
378   const Json_string *histogram_type =
379       down_cast<const Json_string *>(histogram_type_dom);
380   const Json_string *data_type = down_cast<const Json_string *>(data_type_dom);
381 
382   Histogram *histogram = nullptr;
383   if (histogram_type->value() == Histogram::equi_height_str()) {
384     // Equi-height histogram
385     if (data_type->value() == "double") {
386       histogram = new (mem_root)
387           Equi_height<double>(mem_root, schema_name, table_name, column_name,
388                               Value_map_type::DOUBLE);
389     } else if (data_type->value() == "int") {
390       histogram = new (mem_root) Equi_height<longlong>(
391           mem_root, schema_name, table_name, column_name, Value_map_type::INT);
392     } else if (data_type->value() == "enum") {
393       histogram = new (mem_root) Equi_height<longlong>(
394           mem_root, schema_name, table_name, column_name, Value_map_type::ENUM);
395     } else if (data_type->value() == "set") {
396       histogram = new (mem_root) Equi_height<longlong>(
397           mem_root, schema_name, table_name, column_name, Value_map_type::SET);
398     } else if (data_type->value() == "uint") {
399       histogram = new (mem_root) Equi_height<ulonglong>(
400           mem_root, schema_name, table_name, column_name, Value_map_type::UINT);
401     } else if (data_type->value() == "string") {
402       histogram = new (mem_root)
403           Equi_height<String>(mem_root, schema_name, table_name, column_name,
404                               Value_map_type::STRING);
405     } else if (data_type->value() == "date") {
406       histogram = new (mem_root) Equi_height<MYSQL_TIME>(
407           mem_root, schema_name, table_name, column_name, Value_map_type::DATE);
408     } else if (data_type->value() == "time") {
409       histogram = new (mem_root) Equi_height<MYSQL_TIME>(
410           mem_root, schema_name, table_name, column_name, Value_map_type::TIME);
411     } else if (data_type->value() == "datetime") {
412       histogram = new (mem_root)
413           Equi_height<MYSQL_TIME>(mem_root, schema_name, table_name,
414                                   column_name, Value_map_type::DATETIME);
415     } else if (data_type->value() == "decimal") {
416       histogram = new (mem_root)
417           Equi_height<my_decimal>(mem_root, schema_name, table_name,
418                                   column_name, Value_map_type::DECIMAL);
419     } else {
420       return nullptr; /* purecov: deadcode */
421     }
422   } else if (histogram_type->value() == Histogram::singleton_str()) {
423     // Singleton histogram
424     if (data_type->value() == "double") {
425       histogram =
426           new (mem_root) Singleton<double>(mem_root, schema_name, table_name,
427                                            column_name, Value_map_type::DOUBLE);
428     } else if (data_type->value() == "int") {
429       histogram = new (mem_root) Singleton<longlong>(
430           mem_root, schema_name, table_name, column_name, Value_map_type::INT);
431     } else if (data_type->value() == "enum") {
432       histogram = new (mem_root) Singleton<longlong>(
433           mem_root, schema_name, table_name, column_name, Value_map_type::ENUM);
434     } else if (data_type->value() == "set") {
435       histogram = new (mem_root) Singleton<longlong>(
436           mem_root, schema_name, table_name, column_name, Value_map_type::SET);
437     } else if (data_type->value() == "uint") {
438       histogram = new (mem_root) Singleton<ulonglong>(
439           mem_root, schema_name, table_name, column_name, Value_map_type::UINT);
440     } else if (data_type->value() == "string") {
441       histogram =
442           new (mem_root) Singleton<String>(mem_root, schema_name, table_name,
443                                            column_name, Value_map_type::STRING);
444     } else if (data_type->value() == "datetime") {
445       histogram = new (mem_root)
446           Singleton<MYSQL_TIME>(mem_root, schema_name, table_name, column_name,
447                                 Value_map_type::DATETIME);
448     } else if (data_type->value() == "date") {
449       histogram = new (mem_root) Singleton<MYSQL_TIME>(
450           mem_root, schema_name, table_name, column_name, Value_map_type::DATE);
451     } else if (data_type->value() == "time") {
452       histogram = new (mem_root) Singleton<MYSQL_TIME>(
453           mem_root, schema_name, table_name, column_name, Value_map_type::TIME);
454     } else if (data_type->value() == "decimal") {
455       histogram = new (mem_root)
456           Singleton<my_decimal>(mem_root, schema_name, table_name, column_name,
457                                 Value_map_type::DECIMAL);
458     } else {
459       return nullptr; /* purecov: deadcode */
460     }
461   } else {
462     // Unsupported histogram type.
463     return nullptr; /* purecov: deadcode */
464   }
465 
466   if (histogram != nullptr && histogram->json_to_histogram(json_object))
467     return nullptr; /* purecov: deadcode */
468   return histogram;
469 }
470 
471 /*
472   All subclasses should also call this function in order to populate fields that
473   are shared among all histogram types (character set, null values fraction).
474 */
json_to_histogram(const Json_object & json_object)475 bool Histogram::json_to_histogram(const Json_object &json_object) {
476   // The sampling rate that was used to create the histogram.
477   const Json_dom *sampling_rate_dom = json_object.get(sampling_rate_str());
478   if (sampling_rate_dom == nullptr ||
479       sampling_rate_dom->json_type() != enum_json_type::J_DOUBLE) {
480     return true; /* purecov: deadcode */
481   }
482   const Json_double *sampling_rate =
483       down_cast<const Json_double *>(sampling_rate_dom);
484   m_sampling_rate = sampling_rate->value();
485 
486   // The number of buckets originally specified by the user.
487   const Json_dom *num_buckets_specified_dom =
488       json_object.get(numer_of_buckets_specified_str());
489   if (num_buckets_specified_dom == nullptr ||
490       num_buckets_specified_dom->json_type() != enum_json_type::J_INT) {
491     return true; /* purecov: deadcode */
492   }
493   const Json_int *num_buckets_specified =
494       down_cast<const Json_int *>(num_buckets_specified_dom);
495   m_num_buckets_specified = num_buckets_specified->value();
496 
497   // Fraction of SQL null-values in the original data set.
498   const Json_dom *null_values_dom = json_object.get(null_values_str());
499   if (null_values_dom == nullptr ||
500       null_values_dom->json_type() != enum_json_type::J_DOUBLE) {
501     return true; /* purecov: deadcode */
502   }
503   const Json_double *null_values =
504       down_cast<const Json_double *>(null_values_dom);
505   m_null_values_fraction = null_values->value();
506 
507   // Character set ID
508   const Json_dom *charset_id_dom = json_object.get(collation_id_str());
509   if (charset_id_dom == nullptr ||
510       charset_id_dom->json_type() != enum_json_type::J_UINT) {
511     return true; /* purecov: deadcode */
512   }
513   const Json_uint *charset_id = down_cast<const Json_uint *>(charset_id_dom);
514 
515   // Get the charset (my_sys.h)
516   m_charset = get_charset(static_cast<uint>(charset_id->value()), MYF(0));
517 
518   return false;
519 }
520 
521 static std::map<const Value_map_type, const std::string> value_map_type_to_str =
522     {{Value_map_type::DATETIME, "datetime"}, {Value_map_type::DATE, "date"},
523      {Value_map_type::TIME, "time"},         {Value_map_type::INT, "int"},
524      {Value_map_type::UINT, "uint"},         {Value_map_type::DOUBLE, "double"},
525      {Value_map_type::DECIMAL, "decimal"},   {Value_map_type::STRING, "string"},
526      {Value_map_type::ENUM, "enum"},         {Value_map_type::SET, "set"}};
527 
histogram_data_type_to_json(Json_object * json_object) const528 bool Histogram::histogram_data_type_to_json(Json_object *json_object) const {
529   std::string foo = value_map_type_to_str[get_data_type()];
530   const Json_string json_value(foo);
531   return json_object->add_clone(data_type_str(), &json_value);
532 }
533 
534 template <>
extract_json_dom_value(const Json_dom * json_dom,double * out)535 bool Histogram::extract_json_dom_value(const Json_dom *json_dom, double *out) {
536   if (json_dom->json_type() != enum_json_type::J_DOUBLE)
537     return true; /* purecov: deadcode */
538   *out = down_cast<const Json_double *>(json_dom)->value();
539   return false;
540 }
541 
542 template <>
extract_json_dom_value(const Json_dom * json_dom,String * out)543 bool Histogram::extract_json_dom_value(const Json_dom *json_dom, String *out) {
544   DBUG_ASSERT(get_character_set() != nullptr);
545   if (json_dom->json_type() != enum_json_type::J_OPAQUE)
546     return true; /* purecov: deadcode */
547   const Json_opaque *json_opaque = down_cast<const Json_opaque *>(json_dom);
548 
549   String value(json_opaque->value(), json_opaque->size(), get_character_set());
550 
551   /*
552     Make a copy of the data, since the JSON opaque will free it before we need
553     it.
554   */
555   char *value_dup_data = value.dup(get_mem_root());
556   if (value_dup_data == nullptr) {
557     DBUG_ASSERT(false); /* purecov: deadcode */
558     return true;        // OOM
559   }
560 
561   out->set(value_dup_data, value.length(), value.charset());
562   return false;
563 }
564 
565 template <>
extract_json_dom_value(const Json_dom * json_dom,ulonglong * out)566 bool Histogram::extract_json_dom_value(const Json_dom *json_dom,
567                                        ulonglong *out) {
568   if (json_dom->json_type() != enum_json_type::J_UINT)
569     return true; /* purecov: deadcode */
570   *out = down_cast<const Json_uint *>(json_dom)->value();
571   return false;
572 }
573 
574 template <>
extract_json_dom_value(const Json_dom * json_dom,longlong * out)575 bool Histogram::extract_json_dom_value(const Json_dom *json_dom,
576                                        longlong *out) {
577   if (json_dom->json_type() != enum_json_type::J_INT)
578     return true; /* purecov: deadcode */
579   *out = down_cast<const Json_int *>(json_dom)->value();
580   return false;
581 }
582 
583 template <>
extract_json_dom_value(const Json_dom * json_dom,MYSQL_TIME * out)584 bool Histogram::extract_json_dom_value(const Json_dom *json_dom,
585                                        MYSQL_TIME *out) {
586   if (json_dom->json_type() != enum_json_type::J_DATE &&
587       json_dom->json_type() != enum_json_type::J_TIME &&
588       json_dom->json_type() != enum_json_type::J_DATETIME &&
589       json_dom->json_type() != enum_json_type::J_TIMESTAMP)
590     return true; /* purecov: deadcode */
591   *out = *down_cast<const Json_datetime *>(json_dom)->value();
592   return false;
593 }
594 
595 template <>
extract_json_dom_value(const Json_dom * json_dom,my_decimal * out)596 bool Histogram::extract_json_dom_value(const Json_dom *json_dom,
597                                        my_decimal *out) {
598   if (json_dom->json_type() != enum_json_type::J_DECIMAL)
599     return true; /* purecov: deadcode */
600   *out = *down_cast<const Json_decimal *>(json_dom)->value();
601   return false;
602 }
603 
604 /**
605   Check if a field is covered by a single-part unique index (primary key or
606   unique index). Indexes that are marked as invisible are ignored.
607 
608   @param thd The current session.
609   @param field The field to check.
610 
611   @return true if the field is covered by a single-part unique index. False
612           otherwise.
613 */
covered_by_single_part_index(const THD * thd,const Field * field)614 static bool covered_by_single_part_index(const THD *thd, const Field *field) {
615   Key_map possible_keys;
616   possible_keys.merge(field->table->s->usable_indexes(thd));
617   possible_keys.intersect(field->key_start);
618   DBUG_ASSERT(field->table->s->keys <= possible_keys.length());
619   for (uint i = 0; i < field->table->s->keys; ++i) {
620     if (possible_keys.is_set(i) &&
621         field->table->s->key_info[i].user_defined_key_parts == 1 &&
622         (field->table->s->key_info[i].flags & HA_NOSAME)) {
623       return true;
624     }
625   }
626 
627   return false;
628 }
629 
630 /**
631   Prepare one Value_map for each field we are creating histogram statistics for.
632   We will also estimate how many bytes one row will consume. For example, if we
633   are creating histogram statistics for two INTEGER columns, we estimate that
634   one row will consume (sizeof(longlong) * 2) bytes (16 bytes).
635 
636   @param fields              A vector with all the fields we are creating
637                              histogram statistics for.
638   @param[out] value_maps     A map where the Value_maps will be initialized.
639   @param[out] row_size_bytes An estimation of how many bytes one row will
640                              consume.
641 
642   @return true on error, false otherwise.
643 */
prepare_value_maps(std::vector<Field *,Histogram_key_allocator<Field * >> & fields,value_map_collection & value_maps,size_t * row_size_bytes)644 static bool prepare_value_maps(
645     std::vector<Field *, Histogram_key_allocator<Field *>> &fields,
646     value_map_collection &value_maps, size_t *row_size_bytes) {
647   *row_size_bytes = 0;
648   for (const Field *field : fields) {
649     histograms::Value_map_base *value_map = nullptr;
650 
651     const Value_map_type value_map_type =
652         histograms::field_type_to_value_map_type(field);
653 
654     switch (value_map_type) {
655       case histograms::Value_map_type::STRING: {
656         size_t max_field_length =
657             std::min(static_cast<size_t>(field->field_length),
658                      histograms::HISTOGRAM_MAX_COMPARE_LENGTH);
659         *row_size_bytes += max_field_length * field->charset()->mbmaxlen;
660         value_map =
661             new histograms::Value_map<String>(field->charset(), value_map_type);
662         break;
663       }
664       case histograms::Value_map_type::DOUBLE: {
665         value_map =
666             new histograms::Value_map<double>(field->charset(), value_map_type);
667         break;
668       }
669       case histograms::Value_map_type::INT:
670       case histograms::Value_map_type::ENUM:
671       case histograms::Value_map_type::SET: {
672         value_map = new histograms::Value_map<longlong>(field->charset(),
673                                                         value_map_type);
674         break;
675       }
676       case histograms::Value_map_type::UINT: {
677         value_map = new histograms::Value_map<ulonglong>(field->charset(),
678                                                          value_map_type);
679         break;
680       }
681       case histograms::Value_map_type::DATETIME:
682       case histograms::Value_map_type::DATE:
683       case histograms::Value_map_type::TIME: {
684         value_map = new histograms::Value_map<MYSQL_TIME>(field->charset(),
685                                                           value_map_type);
686         break;
687       }
688       case histograms::Value_map_type::DECIMAL: {
689         value_map = new histograms::Value_map<my_decimal>(field->charset(),
690                                                           value_map_type);
691         break;
692       }
693       case histograms::Value_map_type::INVALID: {
694         DBUG_ASSERT(false); /* purecov: deadcode */
695         return true;
696       }
697     }
698 
699     // Overhead for each element
700     *row_size_bytes += value_map->element_overhead();
701 
702     value_maps.emplace(field->field_index(),
703                        std::unique_ptr<histograms::Value_map_base>(value_map));
704   }
705 
706   return false;
707 }
708 
709 /**
710   Read data from a table into the provided Value_maps. We will read data using
711   sampling with the provided sampling percentage.
712 
713   @param fields            A vector with the fields we are reading data from.
714   @param sample_percentage The sampling percentage we will use for sampling.
715                            Must be between 0.0 and 100.0.
716   @param table             The table we are reading the data from.
717   @param value_maps        The Value_maps we are reading data into.
718 
719   @return true on error, false otherwise.
720 */
fill_value_maps(const std::vector<Field *,Histogram_key_allocator<Field * >> & fields,double sample_percentage,const TABLE * table,value_map_collection & value_maps)721 static bool fill_value_maps(
722     const std::vector<Field *, Histogram_key_allocator<Field *>> &fields,
723     double sample_percentage, const TABLE *table,
724     value_map_collection &value_maps) {
725   DBUG_ASSERT(sample_percentage > 0.0);
726   DBUG_ASSERT(sample_percentage <= 100.0);
727   DBUG_ASSERT(fields.size() == value_maps.size());
728 
729   std::random_device rd;
730   std::uniform_int_distribution<int> dist;
731   int sampling_seed = dist(rd);
732 
733   DBUG_EXECUTE_IF("histogram_force_sampling", {
734     sampling_seed = 1;
735     sample_percentage = 50.0;
736   });
737 
738   void *scan_ctx = nullptr;
739 
740   for (auto &value_map : value_maps)
741     value_map.second->set_sampling_rate(sample_percentage / 100.0);
742 
743   if (table->file->ha_sample_init(scan_ctx, sample_percentage, sampling_seed,
744                                   enum_sampling_method::SYSTEM)) {
745     return true;
746   }
747 
748   auto handler_guard = create_scope_guard([table, scan_ctx]() {
749     table->file->ha_sample_end(scan_ctx); /* purecov: deadcode */
750   });
751 
752   // Read the data from each column into its own Value_map.
753   int res = table->file->ha_sample_next(scan_ctx, table->record[0]);
754 
755   while (res == 0) {
756     for (Field *field : fields) {
757       histograms::Value_map_base *value_map =
758           value_maps.at(field->field_index()).get();
759 
760       switch (histograms::field_type_to_value_map_type(field)) {
761         case histograms::Value_map_type::STRING: {
762           StringBuffer<MAX_FIELD_WIDTH> str_buf(field->charset());
763           field->val_str(&str_buf);
764 
765           if (field->is_null())
766             value_map->add_null_values(1);
767           else if (value_map->add_values(static_cast<String>(str_buf), 1))
768             return true; /* purecov: deadcode */
769           break;
770         }
771         case histograms::Value_map_type::DOUBLE: {
772           double value = field->val_real();
773           if (field->is_null())
774             value_map->add_null_values(1);
775           else if (value_map->add_values(value, 1))
776             return true; /* purecov: deadcode */
777           break;
778         }
779         case histograms::Value_map_type::INT:
780         case histograms::Value_map_type::ENUM:
781         case histograms::Value_map_type::SET: {
782           longlong value = field->val_int();
783           if (field->is_null())
784             value_map->add_null_values(1);
785           else if (value_map->add_values(value, 1))
786             return true; /* purecov: deadcode */
787           break;
788         }
789         case histograms::Value_map_type::UINT: {
790           ulonglong value = static_cast<ulonglong>(field->val_int());
791           if (field->is_null())
792             value_map->add_null_values(1);
793           else if (value_map->add_values(value, 1))
794             return true; /* purecov: deadcode */
795           break;
796         }
797         case histograms::Value_map_type::DATE: {
798           MYSQL_TIME time_value;
799           TIME_from_longlong_date_packed(&time_value,
800                                          field->val_date_temporal());
801           if (field->is_null())
802             value_map->add_null_values(1);
803           else if (value_map->add_values(time_value, 1))
804             return true; /* purecov: deadcode */
805           break;
806         }
807         case histograms::Value_map_type::TIME: {
808           MYSQL_TIME time_value;
809           TIME_from_longlong_time_packed(&time_value,
810                                          field->val_time_temporal());
811           if (field->is_null())
812             value_map->add_null_values(1);
813           else if (value_map->add_values(time_value, 1))
814             return true; /* purecov: deadcode */
815           break;
816         }
817         case histograms::Value_map_type::DATETIME: {
818           MYSQL_TIME time_value;
819           TIME_from_longlong_datetime_packed(&time_value,
820                                              field->val_date_temporal());
821           if (field->is_null())
822             value_map->add_null_values(1);
823           else if (value_map->add_values(time_value, 1))
824             return true; /* purecov: deadcode */
825           break;
826         }
827         case histograms::Value_map_type::DECIMAL: {
828           my_decimal buffer;
829           my_decimal *value;
830           value = field->val_decimal(&buffer);
831 
832           if (field->is_null())
833             value_map->add_null_values(1);
834           else if (value_map->add_values(*value, 1))
835             return true; /* purecov: deadcode */
836           break;
837         }
838         case histograms::Value_map_type::INVALID: {
839           DBUG_ASSERT(false); /* purecov: deadcode */
840           break;
841         }
842       }
843     }
844 
845     res = table->file->ha_sample_next(scan_ctx, table->record[0]);
846 
847     DBUG_EXECUTE_IF(
848         "sample_read_sample_half", static uint count = 1;
849         if (count == std::max(1ULL, table->file->stats.records) / 2) {
850           res = HA_ERR_END_OF_FILE;
851           break;
852         } ++count;);
853   }
854 
855   if (res != HA_ERR_END_OF_FILE) return true; /* purecov: deadcode */
856 
857   // Close the handler
858   handler_guard.commit();
859   if (table->file->ha_sample_end(scan_ctx)) {
860     DBUG_ASSERT(false); /* purecov: deadcode */
861     return true;
862   }
863 
864   return false;
865 }
866 
update_histogram(THD * thd,TABLE_LIST * table,const columns_set & columns,int num_buckets,results_map & results)867 bool update_histogram(THD *thd, TABLE_LIST *table, const columns_set &columns,
868                       int num_buckets, results_map &results) {
869   dd::cache::Dictionary_client::Auto_releaser auto_releaser(thd->dd_client());
870 
871   // Read only should have been stopped at an earlier stage.
872   DBUG_ASSERT(!check_readonly(thd, false));
873   DBUG_ASSERT(!thd->tx_read_only);
874 
875   DBUG_ASSERT(results.empty());
876   DBUG_ASSERT(!columns.empty());
877 
878   // Only one table should be specified in ANALYZE TABLE .. UPDATE HISTOGRAM
879   DBUG_ASSERT(table->next_local == nullptr);
880 
881   if (table->table != nullptr && table->table->s->tmp_table != NO_TMP_TABLE) {
882     /*
883       Normally, the table we are going to read data from is not initialized at
884       this point. But if table->table is not a null-pointer, it has already been
885       initialized at an earlier stage. This will happen if the table is a
886       temporary table.
887     */
888     results.emplace("", Message::TEMPORARY_TABLE);
889     return true;
890   }
891 
892   /*
893     Create two scope guards; one for disabling autocommit and one that will do a
894     rollback and ensure that any open tables are closed before returning.
895   */
896   Disable_autocommit_guard autocommit_guard(thd);
897   auto tables_guard = create_scope_guard([thd]() {
898     if (trans_rollback_stmt(thd) || trans_rollback(thd))
899       DBUG_ASSERT(false); /* purecov: deadcode */
900     close_thread_tables(thd);
901   });
902 
903   table->reinit_before_use(thd);
904   if (open_and_lock_tables(thd, table, 0)) {
905     return true;
906   }
907 
908   DBUG_EXECUTE_IF("histogram_fail_after_open_table", { return true; });
909 
910   if (table->is_view()) {
911     results.emplace("", Message::VIEW);
912     return true;
913   }
914 
915   DBUG_ASSERT(table->table != nullptr);
916   TABLE *tbl = table->table;
917 
918   if (tbl->s->encrypt_type.length > 0 &&
919       my_strcasecmp(system_charset_info, "n", tbl->s->encrypt_type.str) != 0) {
920     results.emplace("", Message::ENCRYPTED_TABLE);
921     return true;
922   }
923 
924   /*
925     Check if the provided column names exist, and that they have a supported
926     data type. If they do, mark them in the read set.
927   */
928   bitmap_clear_all(tbl->write_set);
929   bitmap_clear_all(tbl->read_set);
930   std::vector<Field *, Histogram_key_allocator<Field *>> resolved_fields;
931 
932   for (const std::string &column_name : columns) {
933     Field *field = find_field_in_table_sef(tbl, column_name.c_str());
934 
935     if (field == nullptr) {
936       // Field not found in table
937       results.emplace(column_name, Message::FIELD_NOT_FOUND);
938       continue;
939     } else if (histograms::field_type_to_value_map_type(field) ==
940                histograms::Value_map_type::INVALID) {
941       // Unsupported data type
942       results.emplace(column_name, Message::UNSUPPORTED_DATA_TYPE);
943       continue;
944     }
945 
946     /*
947       Check if this field is covered by a single-part unique index. If it is, we
948       don't want to create histogram statistics for it.
949     */
950     if (covered_by_single_part_index(thd, field)) {
951       results.emplace(column_name,
952                       Message::COVERED_BY_SINGLE_PART_UNIQUE_INDEX);
953       continue;
954     }
955     resolved_fields.push_back(field);
956 
957     bitmap_set_bit(tbl->read_set, field->field_index());
958     if (field->is_gcol()) {
959       bitmap_set_bit(tbl->write_set, field->field_index());
960       /*
961         The base columns needs to be in the write set in case of nested
962         generated columns:
963 
964         CREATE TABLE t1 (
965           col1 INT,
966           col2 INT AS (col1 + 1) VIRTUAL,
967           col3 INT AS (col2 + 1) VIRTUAL);
968 
969         If we are reading data from "col3", we also need to update the data in
970         "col2" in order for the generated value to be correct.
971       */
972       bitmap_union(tbl->write_set, &field->gcol_info->base_columns_map);
973       bitmap_union(tbl->read_set, &field->gcol_info->base_columns_map);
974     }
975   }
976 
977   /*
978     If we don't have any fields, we just quit here. Return "true" so we don't
979     write empty transactions/statements to the binlog.
980   */
981   if (resolved_fields.empty()) return true;
982 
983   /*
984     Prepare one Value_map for each field we are creating histogram statistics
985     for. Also, estimate how many bytes one row will consume so that we can
986     estimate how many rows we can fit into memory permitted by
987     histogram_generation_max_mem_size.
988   */
989   size_t row_size_bytes = 0;
990   value_map_collection value_maps;
991   if (prepare_value_maps(resolved_fields, value_maps, &row_size_bytes))
992     return true; /* purecov: deadcode */
993 
994   /*
995     Caclulate how many rows we can fit into memory permitted by
996     histogram_generation_max_mem_size.
997   */
998   double rows_in_memory = thd->variables.histogram_generation_max_mem_size /
999                           static_cast<double>(row_size_bytes);
1000 
1001   /*
1002     Ensure that we estimate at least one row in the table, so we avoid
1003     division by zero error.
1004 
1005     NOTE: We ignore errors from "fetch_number_of_rows()" on purpose, since we
1006     don't consider it fatal not having the correct row estimate.
1007   */
1008   table->fetch_number_of_rows();
1009   ha_rows rows_in_table = std::max(1ULL, tbl->file->stats.records);
1010 
1011   double sample_percentage = rows_in_memory / rows_in_table * 100.0;
1012   sample_percentage = std::min(sample_percentage, 100.0);
1013 
1014   // Read data from the table into the Value_maps we have prepared.
1015   if (fill_value_maps(resolved_fields, sample_percentage, tbl, value_maps))
1016     return true; /* purecov: deadcode */
1017 
1018   // Create a histogram for each Value_map, and store it to persistent storage.
1019   for (const Field *field : resolved_fields) {
1020     /*
1021       The MEM_ROOT is transferred to the dictionary object when
1022       histogram->store_histogram is called.
1023     */
1024     MEM_ROOT local_mem_root;
1025     init_alloc_root(key_memory_histograms, &local_mem_root, 256, 0);
1026 
1027     std::string col_name(field->field_name);
1028     histograms::Histogram *histogram =
1029         value_maps.at(field->field_index())
1030             ->build_histogram(
1031                 &local_mem_root, num_buckets,
1032                 std::string(table->db, table->db_length),
1033                 std::string(table->table_name, table->table_name_length),
1034                 col_name);
1035 
1036     if (histogram == nullptr) {
1037       /* purecov: begin inspected */
1038       my_error(ER_UNABLE_TO_BUILD_HISTOGRAM, MYF(0), field->field_name,
1039                table->db, table->table_name);
1040       return true;
1041       /* purecov: end */
1042     } else if (histogram->store_histogram(thd)) {
1043       // errors have already been reported
1044       return true; /* purecov: deadcode */
1045     }
1046 
1047     results.emplace(col_name, Message::HISTOGRAM_CREATED);
1048   }
1049 
1050   bool ret = trans_commit_stmt(thd) || trans_commit(thd);
1051   close_thread_tables(thd);
1052   tables_guard.commit();
1053   return ret;
1054 }
1055 
drop_all_histograms(THD * thd,const TABLE_LIST & table,const dd::Table & table_definition,results_map & results)1056 bool drop_all_histograms(THD *thd, const TABLE_LIST &table,
1057                          const dd::Table &table_definition,
1058                          results_map &results) {
1059   columns_set columns;
1060   for (const auto &col : table_definition.columns())
1061     columns.emplace(col->name().c_str());
1062 
1063   return drop_histograms(thd, table, columns, results);
1064 }
1065 
drop_histograms(THD * thd,const TABLE_LIST & table,const columns_set & columns,results_map & results)1066 bool drop_histograms(THD *thd, const TABLE_LIST &table,
1067                      const columns_set &columns, results_map &results) {
1068   dd::cache::Dictionary_client *client = thd->dd_client();
1069   dd::cache::Dictionary_client::Auto_releaser auto_releaser(client);
1070 
1071   for (const std::string &column_name : columns) {
1072     MDL_key mdl_key;
1073     dd::Column_statistics::create_mdl_key(
1074         {table.db, table.db_length},
1075         {table.table_name, table.table_name_length}, column_name.c_str(),
1076         &mdl_key);
1077 
1078     if (lock_for_write(thd, mdl_key))
1079       return true;  // error is already reported.
1080 
1081     dd::String_type dd_name = dd::Column_statistics::create_name(
1082         {table.db, table.db_length},
1083         {table.table_name, table.table_name_length}, column_name.c_str());
1084 
1085     // Do we have an existing histogram for this column?
1086     const dd::Column_statistics *column_statistics = nullptr;
1087     if (client->acquire(dd_name, &column_statistics)) {
1088       // error is already reported.
1089       return true; /* purecov: deadcode */
1090     }
1091 
1092     if (column_statistics == nullptr) {
1093       results.emplace(column_name, Message::NO_HISTOGRAM_FOUND);
1094       continue;
1095     }
1096 
1097     if (client->drop(column_statistics)) {
1098       /* purecov: begin inspected */
1099       my_error(ER_UNABLE_TO_DROP_COLUMN_STATISTICS, MYF(0), column_name.c_str(),
1100                table.db, table.table_name);
1101       return true;
1102       /* purecov: end */
1103     }
1104 
1105     results.emplace(column_name, Message::HISTOGRAM_DELETED);
1106   }
1107 
1108   return false;
1109 }
1110 
store_histogram(THD * thd) const1111 bool Histogram::store_histogram(THD *thd) const {
1112   dd::cache::Dictionary_client *client = thd->dd_client();
1113 
1114   MDL_key mdl_key;
1115   dd::Column_statistics::create_mdl_key(get_database_name().str,
1116                                         get_table_name().str,
1117                                         get_column_name().str, &mdl_key);
1118 
1119   if (lock_for_write(thd, mdl_key)) {
1120     // Error has already been reported
1121     return true; /* purecov: deadcode */
1122   }
1123 
1124   DEBUG_SYNC(thd, "store_histogram_after_write_lock");
1125 
1126   dd::String_type dd_name = dd::Column_statistics::create_name(
1127       get_database_name().str, get_table_name().str, get_column_name().str);
1128 
1129   // Do we have an existing histogram for this column?
1130   dd::Column_statistics *column_stats = nullptr;
1131   if (client->acquire_for_modification(dd_name, &column_stats)) {
1132     // Error has already been reported
1133     return true; /* purecov: deadcode */
1134   }
1135 
1136   if (column_stats != nullptr) {
1137     // Update the existing object.
1138     column_stats->set_histogram(this);
1139     if (client->update(column_stats)) {
1140       /* purecov: begin inspected */
1141       my_error(ER_UNABLE_TO_UPDATE_COLUMN_STATISTICS, MYF(0),
1142                get_column_name().str, get_database_name().str,
1143                get_table_name().str);
1144       return true;
1145       /* purecov: end */
1146     }
1147   } else {
1148     // Create a new object
1149     std::unique_ptr<dd::Column_statistics> column_statistics(
1150         dd::create_object<dd::Column_statistics>());
1151 
1152     column_statistics.get()->set_schema_name(get_database_name().str);
1153     column_statistics.get()->set_table_name(get_table_name().str);
1154     column_statistics.get()->set_column_name(get_column_name().str);
1155     column_statistics.get()->set_name(dd_name);
1156     column_statistics.get()->set_histogram(this);
1157 
1158     if (client->store(column_statistics.get())) {
1159       /* purecov: begin inspected */
1160       my_error(ER_UNABLE_TO_STORE_COLUMN_STATISTICS, MYF(0),
1161                get_column_name().str, get_database_name().str,
1162                get_table_name().str);
1163       return true;
1164       /* purecov: end */
1165     }
1166   }
1167 
1168   return false;
1169 }
1170 
1171 /**
1172   Rename a single histogram from a old schema/table name to a new schema/table
1173   name. It is used for instance by RENAME TABLE, where the contents of the
1174   histograms doesn't change.
1175 
1176   @param thd             Thread handler.
1177   @param old_schema_name The old schema name.
1178   @param old_table_name  The old table name.
1179   @param new_schema_name The new schema name.
1180   @param new_table_name  The new table name.
1181   @param column_name     The column name.
1182   @param results         A map where the result of the operation is stored.
1183 
1184   @return false on success, true on error.
1185 */
rename_histogram(THD * thd,const char * old_schema_name,const char * old_table_name,const char * new_schema_name,const char * new_table_name,const char * column_name,results_map & results)1186 static bool rename_histogram(THD *thd, const char *old_schema_name,
1187                              const char *old_table_name,
1188                              const char *new_schema_name,
1189                              const char *new_table_name,
1190                              const char *column_name, results_map &results) {
1191   dd::cache::Dictionary_client *client = thd->dd_client();
1192   dd::cache::Dictionary_client::Auto_releaser auto_releaser(client);
1193 
1194   // First find the histogram with the old name.
1195   MDL_key mdl_key;
1196   dd::Column_statistics::create_mdl_key(old_schema_name, old_table_name,
1197                                         column_name, &mdl_key);
1198 
1199   if (lock_for_write(thd, mdl_key)) {
1200     // Error has already been reported
1201     return true; /* purecov: deadcode */
1202   }
1203 
1204   dd::String_type dd_name = dd::Column_statistics::create_name(
1205       old_schema_name, old_table_name, column_name);
1206 
1207   dd::Column_statistics *column_statistics = nullptr;
1208   if (client->acquire_for_modification(dd_name, &column_statistics)) {
1209     // Error has already been reported
1210     return true; /* purecov: deadcode */
1211   }
1212 
1213   if (column_statistics == nullptr) {
1214     results.emplace(column_name, Message::NO_HISTOGRAM_FOUND);
1215     return false;
1216   }
1217 
1218   dd::Column_statistics::create_mdl_key(new_schema_name, new_table_name,
1219                                         column_name, &mdl_key);
1220 
1221   if (lock_for_write(thd, mdl_key)) {
1222     // Error has already been reported
1223     return true; /* purecov: deadcode */
1224   }
1225 
1226   column_statistics->set_schema_name(new_schema_name);
1227   column_statistics->set_table_name(new_table_name);
1228   column_statistics->set_column_name(column_name);
1229   column_statistics->set_name(column_statistics->create_name());
1230   if (client->update(column_statistics)) {
1231     /* purecov: begin inspected */
1232     my_error(ER_UNABLE_TO_UPDATE_COLUMN_STATISTICS, MYF(0), column_name,
1233              old_schema_name, old_table_name);
1234     return true;
1235     /* purecov: end */
1236   }
1237 
1238   results.emplace(column_name, Message::HISTOGRAM_DELETED);
1239   return false;
1240 }
1241 
rename_histograms(THD * thd,const char * old_schema_name,const char * old_table_name,const char * new_schema_name,const char * new_table_name,results_map & results)1242 bool rename_histograms(THD *thd, const char *old_schema_name,
1243                        const char *old_table_name, const char *new_schema_name,
1244                        const char *new_table_name, results_map &results) {
1245   dd::cache::Dictionary_client::Auto_releaser releaser(thd->dd_client());
1246 
1247   MDL_request mdl_request;
1248   MDL_REQUEST_INIT(&mdl_request, MDL_key::TABLE, old_schema_name,
1249                    old_table_name, MDL_SHARED_READ_ONLY, MDL_TRANSACTION);
1250 
1251   if (thd->mdl_context.acquire_lock(&mdl_request,
1252                                     thd->variables.lock_wait_timeout)) {
1253     // error has already been reported
1254     return true; /* purecov: deadcode */
1255   }
1256 
1257   /*
1258     We have to look up the new table since it already will be renamed at this
1259     point.
1260   */
1261   const dd::Table *table_def = nullptr;
1262   if (thd->dd_client()->acquire(new_schema_name, new_table_name, &table_def)) {
1263     // error has already been reported
1264     return false; /* purecov: deadcode */
1265   }
1266 
1267   if (table_def == nullptr) {
1268     DBUG_ASSERT(false); /* purecov: deadcode */
1269     return false;
1270   }
1271 
1272   for (const auto &col : table_def->columns()) {
1273     if (rename_histogram(thd, old_schema_name, old_table_name, new_schema_name,
1274                          new_table_name, col->name().c_str(), results))
1275       return true; /* purecov: deadcode */
1276   }
1277 
1278   return false;
1279 }
1280 
find_histogram(THD * thd,const std::string & schema_name,const std::string & table_name,const std::string & column_name,const Histogram ** histogram)1281 bool find_histogram(THD *thd, const std::string &schema_name,
1282                     const std::string &table_name,
1283                     const std::string &column_name,
1284                     const Histogram **histogram) {
1285   DBUG_ASSERT(*histogram == nullptr);
1286 
1287   if (schema_name == "mysql" || table_name == "column_statistics") return false;
1288 
1289   dd::String_type dd_name = dd::Column_statistics::create_name(
1290       schema_name.c_str(), table_name.c_str(), column_name.c_str());
1291 
1292   const dd::Column_statistics *column_statistics = nullptr;
1293   dd::cache::Dictionary_client *client = thd->dd_client();
1294   if (client->acquire<dd::Column_statistics>(dd_name, &column_statistics))
1295     return true; /* purecov: deadcode */
1296 
1297   if (column_statistics == nullptr) return false;
1298 
1299   *histogram = column_statistics->histogram();
1300   return false;
1301 }
1302 
1303 template <class T>
get_less_than_selectivity_dispatcher(const T & value) const1304 double Histogram::get_less_than_selectivity_dispatcher(const T &value) const {
1305   switch (get_histogram_type()) {
1306     case enum_histogram_type::SINGLETON: {
1307       const Singleton<T> *singleton = down_cast<const Singleton<T> *>(this);
1308       return singleton->get_less_than_selectivity(value);
1309     }
1310     case enum_histogram_type::EQUI_HEIGHT: {
1311       const Equi_height<T> *equi_height =
1312           down_cast<const Equi_height<T> *>(this);
1313       return equi_height->get_less_than_selectivity(value);
1314     }
1315   }
1316   /* purecov: begin deadcode */
1317   DBUG_ASSERT(false);
1318   return 0.0;
1319   /* purecov: end deadcode */
1320 }
1321 
1322 template <class T>
get_greater_than_selectivity_dispatcher(const T & value) const1323 double Histogram::get_greater_than_selectivity_dispatcher(
1324     const T &value) const {
1325   switch (get_histogram_type()) {
1326     case enum_histogram_type::SINGLETON: {
1327       const Singleton<T> *singleton = down_cast<const Singleton<T> *>(this);
1328       return singleton->get_greater_than_selectivity(value);
1329     }
1330     case enum_histogram_type::EQUI_HEIGHT: {
1331       const Equi_height<T> *equi_height =
1332           down_cast<const Equi_height<T> *>(this);
1333       return equi_height->get_greater_than_selectivity(value);
1334     }
1335   }
1336   /* purecov: begin deadcode */
1337   DBUG_ASSERT(false);
1338   return 0.0;
1339   /* purecov: end deadcode */
1340 }
1341 
1342 template <class T>
get_equal_to_selectivity_dispatcher(const T & value) const1343 double Histogram::get_equal_to_selectivity_dispatcher(const T &value) const {
1344   switch (get_histogram_type()) {
1345     case enum_histogram_type::SINGLETON: {
1346       const Singleton<T> *singleton = down_cast<const Singleton<T> *>(this);
1347       return singleton->get_equal_to_selectivity(value);
1348     }
1349     case enum_histogram_type::EQUI_HEIGHT: {
1350       const Equi_height<T> *equi_height =
1351           down_cast<const Equi_height<T> *>(this);
1352       return equi_height->get_equal_to_selectivity(value);
1353     }
1354   }
1355   /* purecov: begin deadcode */
1356   DBUG_ASSERT(false);
1357   return 0.0;
1358   /* purecov: end deadcode */
1359 }
1360 
get_temporal(Item * item,Value_map_type preferred_type,MYSQL_TIME * time_value)1361 static bool get_temporal(Item *item, Value_map_type preferred_type,
1362                          MYSQL_TIME *time_value) {
1363   if (item->is_temporal_with_date_and_time()) {
1364     TIME_from_longlong_datetime_packed(time_value, item->val_date_temporal());
1365   } else if (item->is_temporal_with_date()) {
1366     TIME_from_longlong_date_packed(time_value, item->val_date_temporal());
1367   } else if (item->is_temporal_with_time()) {
1368     TIME_from_longlong_time_packed(time_value, item->val_time_temporal());
1369   } else {
1370     switch (preferred_type) {
1371       case Value_map_type::DATE:
1372       case Value_map_type::DATETIME:
1373         if (item->get_date_from_non_temporal(time_value, 0)) return true;
1374         break;
1375       case Value_map_type::TIME:
1376         if (item->get_time_from_non_temporal(time_value)) return true;
1377         break;
1378       default:
1379         /* purecov: begin deadcode */
1380         DBUG_ASSERT(0);
1381         break;
1382         /* purecov: end deadcode */
1383     }
1384   }
1385 
1386   return false;
1387 }
1388 
1389 template <class T>
apply_operator(const enum_operator op,const T & value) const1390 double Histogram::apply_operator(const enum_operator op, const T &value) const {
1391   switch (op) {
1392     case enum_operator::LESS_THAN:
1393       return get_less_than_selectivity_dispatcher(value);
1394     case enum_operator::GREATER_THAN:
1395       return get_greater_than_selectivity_dispatcher(value);
1396     case enum_operator::EQUALS_TO:
1397       return get_equal_to_selectivity_dispatcher(value);
1398     default:
1399       /* purecov: begin deadcode */
1400       DBUG_ASSERT(false);
1401       return 1.0;
1402       /* purecov: end deadcode */
1403   }
1404 }
1405 
get_selectivity_dispatcher(Item * item,const enum_operator op,const TYPELIB * typelib,double * selectivity) const1406 bool Histogram::get_selectivity_dispatcher(Item *item, const enum_operator op,
1407                                            const TYPELIB *typelib,
1408                                            double *selectivity) const {
1409   switch (this->get_data_type()) {
1410     case Value_map_type::INVALID: {
1411       /* purecov: begin deadcode */
1412       DBUG_ASSERT(false);
1413       return true;
1414       /* purecov: end deadcode */
1415     }
1416     case Value_map_type::STRING: {
1417       // Is the character set the same? If not, we cannot use the histogram
1418       if (item->collation.collation->number != get_character_set()->number)
1419         return true;
1420 
1421       StringBuffer<MAX_FIELD_WIDTH> str_buf(item->collation.collation);
1422       const String *str = item->val_str(&str_buf);
1423       if (item->is_null()) return true;
1424 
1425       *selectivity =
1426           apply_operator(op, str->substr(0, HISTOGRAM_MAX_COMPARE_LENGTH));
1427       return false;
1428     }
1429     case Value_map_type::INT: {
1430       const longlong value = item->val_int();
1431       if (item->is_null()) return true;
1432 
1433       *selectivity = apply_operator(op, value);
1434       return false;
1435     }
1436     case Value_map_type::ENUM: {
1437       DBUG_ASSERT(typelib != nullptr);
1438 
1439       longlong value;
1440       if (item->data_type() == MYSQL_TYPE_VARCHAR) {
1441         StringBuffer<MAX_FIELD_WIDTH> str_buf(item->collation.collation);
1442         const String *str = item->val_str(&str_buf);
1443         if (item->is_null()) return true;
1444 
1445         // Remove any trailing whitespace
1446         size_t length = str->charset()->cset->lengthsp(
1447             str->charset(), str->ptr(), str->length());
1448         value = find_type2(typelib, str->ptr(), length, str->charset());
1449       } else {
1450         value = item->val_int();
1451         if (item->is_null()) return true;
1452       }
1453 
1454       if (op == enum_operator::EQUALS_TO) {
1455         *selectivity = get_equal_to_selectivity_dispatcher(value);
1456         return false;
1457       }
1458 
1459       return true; /* purecov: deadcode */
1460     }
1461     case Value_map_type::SET: {
1462       DBUG_ASSERT(typelib != nullptr);
1463 
1464       longlong value;
1465       if (item->data_type() == MYSQL_TYPE_VARCHAR) {
1466         StringBuffer<MAX_FIELD_WIDTH> str_buf(item->collation.collation);
1467         const String *str = item->val_str(&str_buf);
1468         if (item->is_null()) return true;
1469 
1470         bool got_warning;
1471         const char *not_used;
1472         uint not_used2;
1473         ulonglong tmp_value =
1474             find_set(typelib, str->ptr(), str->length(), str->charset(),
1475                      &not_used, &not_used2, &got_warning);
1476 
1477         value = static_cast<ulonglong>(tmp_value);
1478       } else {
1479         value = item->val_int();
1480         if (item->is_null()) return true;
1481       }
1482 
1483       if (op == enum_operator::EQUALS_TO) {
1484         *selectivity = get_equal_to_selectivity_dispatcher(value);
1485         return false;
1486       }
1487 
1488       return true; /* purecov: deadcode */
1489     }
1490     case Value_map_type::UINT: {
1491       const ulonglong value = static_cast<ulonglong>(item->val_int());
1492       if (item->is_null()) return true;
1493 
1494       *selectivity = apply_operator(op, value);
1495       return false;
1496     }
1497     case Value_map_type::DOUBLE: {
1498       const double value = item->val_real();
1499       if (item->is_null()) return true;
1500 
1501       *selectivity = apply_operator(op, value);
1502       return false;
1503     }
1504     case Value_map_type::DECIMAL: {
1505       my_decimal buffer;
1506       const my_decimal *value = item->val_decimal(&buffer);
1507       if (item->is_null()) return true;
1508 
1509       *selectivity = apply_operator(op, *value);
1510       return false;
1511     }
1512     case Value_map_type::DATE:
1513     case Value_map_type::TIME:
1514     case Value_map_type::DATETIME: {
1515       MYSQL_TIME temporal_value;
1516       if (get_temporal(item, get_data_type(), &temporal_value) ||
1517           item->is_null())
1518         return true;
1519 
1520       *selectivity = apply_operator(op, temporal_value);
1521       return false;
1522     }
1523   }
1524 
1525   /* purecov: begin deadcode */
1526   DBUG_ASSERT(false);
1527   return true;
1528   /* purecov: end deadcode */
1529 }
1530 
get_selectivity(Item ** items,size_t item_count,enum_operator op,double * selectivity) const1531 bool Histogram::get_selectivity(Item **items, size_t item_count,
1532                                 enum_operator op, double *selectivity) const {
1533   // Do some sanity checking first
1534   switch (op) {
1535     case enum_operator::EQUALS_TO:
1536     case enum_operator::GREATER_THAN:
1537     case enum_operator::LESS_THAN:
1538     case enum_operator::LESS_THAN_OR_EQUAL:
1539     case enum_operator::GREATER_THAN_OR_EQUAL:
1540     case enum_operator::NOT_EQUALS_TO:
1541       DBUG_ASSERT(item_count == 2);
1542       /*
1543         Verify that one side of the predicate is a column/field, and that the
1544         other side is a constant value.
1545 
1546         Make sure that we have the constant item as the right side argument of
1547         the predicate internally.
1548       */
1549       if (items[0]->const_item() && items[1]->type() == Item::FIELD_ITEM) {
1550         // Flip the operators as well as the operator itself.
1551         switch (op) {
1552           case enum_operator::GREATER_THAN:
1553             op = enum_operator::LESS_THAN;
1554             break;
1555           case enum_operator::LESS_THAN:
1556             op = enum_operator::GREATER_THAN;
1557             break;
1558           case enum_operator::LESS_THAN_OR_EQUAL:
1559             op = enum_operator::GREATER_THAN_OR_EQUAL;
1560             break;
1561           case enum_operator::GREATER_THAN_OR_EQUAL:
1562             op = enum_operator::LESS_THAN_OR_EQUAL;
1563             break;
1564           default:
1565             break;
1566         }
1567         Item *items_flipped[2];
1568         items_flipped[0] = items[1];
1569         items_flipped[1] = items[0];
1570         return get_selectivity(items_flipped, item_count, op, selectivity);
1571       } else if (items[0]->type() != Item::FIELD_ITEM ||
1572                  !items[1]->const_item()) {
1573         return true;
1574       }
1575       break;
1576     case enum_operator::BETWEEN:
1577     case enum_operator::NOT_BETWEEN:
1578       DBUG_ASSERT(item_count == 3);
1579 
1580       if (items[0]->type() != Item::FIELD_ITEM || !items[1]->const_item() ||
1581           !items[2]->const_item()) {
1582         return true;
1583       }
1584       break;
1585     case enum_operator::IN_LIST:
1586     case enum_operator::NOT_IN_LIST:
1587       DBUG_ASSERT(item_count >= 2);
1588 
1589       if (items[0]->type() != Item::FIELD_ITEM)
1590         return true; /* purecov: deadcode */
1591 
1592       // This will only work if all items are const_items
1593       for (size_t i = 1; i < item_count; ++i) {
1594         if (!items[i]->const_item()) return true;
1595       }
1596       break;
1597     case enum_operator::IS_NULL:
1598     case enum_operator::IS_NOT_NULL:
1599       DBUG_ASSERT(item_count == 1);
1600       if (items[0]->type() != Item::FIELD_ITEM) return true;
1601   }
1602 
1603   DBUG_ASSERT(items[0]->type() == Item::FIELD_ITEM);
1604 
1605   const TYPELIB *typelib = nullptr;
1606   const Item_field *item_field = down_cast<const Item_field *>(items[0]);
1607   if (item_field->field->real_type() == MYSQL_TYPE_ENUM ||
1608       item_field->field->real_type() == MYSQL_TYPE_SET) {
1609     const Field_enum *field_enum =
1610         down_cast<const Field_enum *>(item_field->field);
1611     typelib = field_enum->typelib;
1612   }
1613 
1614   switch (op) {
1615     case enum_operator::LESS_THAN:
1616     case enum_operator::EQUALS_TO:
1617     case enum_operator::GREATER_THAN: {
1618       return get_selectivity_dispatcher(items[1], op, typelib, selectivity);
1619     }
1620     case enum_operator::LESS_THAN_OR_EQUAL: {
1621       double less_than_selectivity;
1622       double equals_to_selectivity;
1623       if (get_selectivity_dispatcher(items[1], enum_operator::LESS_THAN,
1624                                      typelib, &less_than_selectivity) ||
1625           get_selectivity_dispatcher(items[1], enum_operator::EQUALS_TO,
1626                                      typelib, &equals_to_selectivity))
1627         return true;
1628 
1629       *selectivity = std::min(less_than_selectivity + equals_to_selectivity,
1630                               get_non_null_values_frequency());
1631       return false;
1632     }
1633     case enum_operator::GREATER_THAN_OR_EQUAL: {
1634       double greater_than_selectivity;
1635       double equals_to_selectivity;
1636       if (get_selectivity_dispatcher(items[1], enum_operator::GREATER_THAN,
1637                                      typelib, &greater_than_selectivity) ||
1638           get_selectivity_dispatcher(items[1], enum_operator::EQUALS_TO,
1639                                      typelib, &equals_to_selectivity))
1640         return true;
1641 
1642       *selectivity = std::min(greater_than_selectivity + equals_to_selectivity,
1643                               get_non_null_values_frequency());
1644       return false;
1645     }
1646     case enum_operator::NOT_EQUALS_TO: {
1647       double equals_to_selectivity;
1648       if (get_selectivity_dispatcher(items[1], enum_operator::EQUALS_TO,
1649                                      typelib, &equals_to_selectivity))
1650         return true;
1651 
1652       *selectivity = std::max(
1653           get_non_null_values_frequency() - equals_to_selectivity, 0.0);
1654       return false;
1655     }
1656     case enum_operator::BETWEEN: {
1657       double less_than_selectivity;
1658       double greater_than_selectivity;
1659       if (get_selectivity_dispatcher(items[1], enum_operator::LESS_THAN,
1660                                      typelib, &less_than_selectivity) ||
1661           get_selectivity_dispatcher(items[2], enum_operator::GREATER_THAN,
1662                                      typelib, &greater_than_selectivity))
1663         return true;
1664 
1665       *selectivity = this->get_non_null_values_frequency() -
1666                      (less_than_selectivity + greater_than_selectivity);
1667 
1668       /*
1669         Make sure that we don't return a value less than 0.0. This might happen
1670         with a query like:
1671           EXPLAIN SELECT a FROM t1 WHERE t1.a BETWEEN 3 AND 0;
1672       */
1673       *selectivity = std::max(0.0, *selectivity);
1674       return false;
1675     }
1676     case enum_operator::NOT_BETWEEN: {
1677       double less_than_selectivity;
1678       double greater_than_selectivity;
1679       if (get_selectivity_dispatcher(items[1], enum_operator::LESS_THAN,
1680                                      typelib, &less_than_selectivity) ||
1681           get_selectivity_dispatcher(items[2], enum_operator::GREATER_THAN,
1682                                      typelib, &greater_than_selectivity))
1683         return true;
1684 
1685       /*
1686         Make sure that we don't return a value greater than 1.0. This might
1687         happen with a query like:
1688           EXPLAIN SELECT a FROM t1 WHERE t1.a NOT BETWEEN 3 AND 0;
1689       */
1690       *selectivity = std::min(less_than_selectivity + greater_than_selectivity,
1691                               get_non_null_values_frequency());
1692       return false;
1693     }
1694     case enum_operator::IN_LIST: {
1695       *selectivity = 0.0;
1696       for (size_t i = 1; i < item_count; ++i) {
1697         double equals_to_selectivity;
1698         if (get_selectivity_dispatcher(items[i], enum_operator::EQUALS_TO,
1699                                        typelib, &equals_to_selectivity))
1700           return true;
1701 
1702         *selectivity += equals_to_selectivity;
1703 
1704         if (*selectivity >= get_non_null_values_frequency()) break;
1705       }
1706 
1707       /*
1708         Long in-lists may easily exceed a selectivity of
1709         get_non_null_values_frequency() in certain cases.
1710       */
1711       *selectivity = std::min(*selectivity, get_non_null_values_frequency());
1712       return false;
1713     }
1714     case enum_operator::NOT_IN_LIST: {
1715       *selectivity = this->get_non_null_values_frequency();
1716       for (size_t i = 1; i < item_count; ++i) {
1717         double equals_to_selectivity;
1718         if (get_selectivity_dispatcher(items[i], enum_operator::EQUALS_TO,
1719                                        typelib, &equals_to_selectivity)) {
1720           if (items[i]->null_value) {
1721             // WHERE col1 NOT IN (..., NULL, ...) will return zero rows.
1722             *selectivity = 0.0;
1723             return false;
1724           }
1725 
1726           return true; /* purecov: deadcode */
1727         }
1728 
1729         *selectivity -= equals_to_selectivity;
1730         if (*selectivity <= 0.0) break;
1731       }
1732 
1733       /*
1734         Long in-lists may easily estimate a selectivity less than 0.0 in certain
1735         cases.
1736       */
1737       *selectivity = std::max(*selectivity, 0.0);
1738       return false;
1739     }
1740     case enum_operator::IS_NULL:
1741       *selectivity = this->get_null_values_fraction();
1742       return false;
1743     case enum_operator::IS_NOT_NULL:
1744       *selectivity = 1.0 - this->get_null_values_fraction();
1745       return false;
1746   }
1747 
1748   /* purecov: begin deadcode */
1749   DBUG_ASSERT(false);
1750   return true;
1751   /* purecov: end deadcode */
1752 }
1753 
1754 // Explicit template instantiations.
1755 template Histogram *build_histogram(MEM_ROOT *, const Value_map<double> &,
1756                                     size_t, const std::string &,
1757                                     const std::string &, const std::string &);
1758 
1759 template Histogram *build_histogram(MEM_ROOT *, const Value_map<String> &,
1760                                     size_t, const std::string &,
1761                                     const std::string &, const std::string &);
1762 
1763 template Histogram *build_histogram(MEM_ROOT *, const Value_map<ulonglong> &,
1764                                     size_t, const std::string &,
1765                                     const std::string &, const std::string &);
1766 
1767 template Histogram *build_histogram(MEM_ROOT *, const Value_map<longlong> &,
1768                                     size_t, const std::string &,
1769                                     const std::string &, const std::string &);
1770 
1771 template Histogram *build_histogram(MEM_ROOT *, const Value_map<MYSQL_TIME> &,
1772                                     size_t, const std::string &,
1773                                     const std::string &, const std::string &);
1774 
1775 template Histogram *build_histogram(MEM_ROOT *, const Value_map<my_decimal> &,
1776                                     size_t, const std::string &,
1777                                     const std::string &, const std::string &);
1778 
1779 }  // namespace histograms
1780