1 /* Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23 /**
24 @file sql/histograms/histogram.cc
25 Histogram base class (implementation).
26 */
27
28 #include "sql/histograms/histogram.h" // Histogram, Histogram_comparator
29
30 #include <sys/types.h>
31 #include <algorithm>
32 #include <map>
33 #include <memory> // std::unique_ptr
34 #include <new>
35 #include <random>
36 #include <string>
37 #include <vector>
38
39 #include "field_types.h" // enum_field_types
40 #include "lex_string.h"
41 #include "m_ctype.h"
42 #include "my_alloc.h"
43 #include "my_bitmap.h"
44 #include "my_dbug.h"
45 #include "my_inttypes.h"
46 #include "my_sys.h" // my_micro_time, get_charset
47 #include "my_systime.h"
48 #include "my_time.h"
49 #include "mysql/service_mysql_alloc.h"
50 #include "mysql_time.h"
51 #include "mysqld_error.h"
52 #include "scope_guard.h" // create_scope_guard
53 #include "sql/auth/auth_common.h"
54 #include "sql/dd/cache/dictionary_client.h"
55 #include "sql/dd/dd.h"
56 #include "sql/dd/string_type.h"
57 #include "sql/dd/types/column.h"
58 #include "sql/dd/types/column_statistics.h"
59 #include "sql/dd/types/table.h" // dd::Table
60 #include "sql/debug_sync.h"
61 #include "sql/field.h" // Field
62 #include "sql/handler.h"
63 #include "sql/histograms/equi_height.h" // Equi_height<T>
64 #include "sql/histograms/singleton.h" // Singleton<T>
65 #include "sql/histograms/value_map.h" // Value_map
66 #include "sql/item.h"
67 #include "sql/json_dom.h" // Json_*
68 #include "sql/key.h"
69 #include "sql/mdl.h" // MDL_request
70 #include "sql/my_decimal.h"
71 #include "sql/psi_memory_key.h" // key_memory_histograms
72 #include "sql/sql_base.h" // open_and_lock_tables,
73 #include "sql/sql_bitmap.h"
74 // close_thread_tables
75 #include "sql/sql_class.h" // make_lex_string_root
76 #include "sql/sql_const.h"
77 #include "sql/strfunc.h" // find_type2, find_set
78 #include "sql/system_variables.h"
79 #include "sql/table.h"
80 #include "sql/thd_raii.h"
81 #include "sql/transaction.h" // trans_commit_stmt, trans_rollback_stmt
82 #include "sql/tztime.h" // my_tz_UTC
83 #include "sql_string.h" // String
84 #include "template_utils.h"
85
86 struct TYPELIB;
87
88 namespace histograms {
89
90 /*
91 This type represents a instrumented map of value maps, indexed by field
92 number.
93 */
94 using value_map_collection = std::map<
95 uint16, std::unique_ptr<histograms::Value_map_base>, std::less<uint16>,
96 Histogram_key_allocator<
97 std::pair<const uint16, std::unique_ptr<histograms::Value_map_base>>>>;
98
operator ()(size_t s) const99 void *Histogram_psi_key_alloc::operator()(size_t s) const {
100 return my_malloc(key_memory_histograms, s, MYF(MY_WME | ME_FATALERROR));
101 }
102
103 /**
104 Convert from enum_field_types to Value_map_type.
105
106 @param field_type the field type
107 @param is_unsigned whether the field type is unsigned or not. This is only
108 considered if the field type is LONGLONG
109
110 @return A Value_map_type. May be INVALID if the Value_map does not support
111 the field type.
112 */
field_type_to_value_map_type(const enum_field_types field_type,const bool is_unsigned)113 static Value_map_type field_type_to_value_map_type(
114 const enum_field_types field_type, const bool is_unsigned) {
115 switch (field_type) {
116 case MYSQL_TYPE_DECIMAL:
117 case MYSQL_TYPE_NEWDECIMAL:
118 return Value_map_type::DECIMAL;
119 case MYSQL_TYPE_TINY:
120 case MYSQL_TYPE_SHORT:
121 case MYSQL_TYPE_LONG:
122 case MYSQL_TYPE_INT24:
123 case MYSQL_TYPE_YEAR:
124 case MYSQL_TYPE_BIT:
125 return Value_map_type::INT;
126 case MYSQL_TYPE_ENUM:
127 return Value_map_type::ENUM;
128 case MYSQL_TYPE_SET:
129 return Value_map_type::SET;
130 case MYSQL_TYPE_LONGLONG:
131 return is_unsigned ? Value_map_type::UINT : Value_map_type::INT;
132 case MYSQL_TYPE_FLOAT:
133 case MYSQL_TYPE_DOUBLE:
134 return Value_map_type::DOUBLE;
135 case MYSQL_TYPE_TIME:
136 case MYSQL_TYPE_TIME2:
137 return Value_map_type::TIME;
138 case MYSQL_TYPE_DATE:
139 case MYSQL_TYPE_NEWDATE:
140 return Value_map_type::DATE;
141 case MYSQL_TYPE_DATETIME:
142 case MYSQL_TYPE_TIMESTAMP:
143 case MYSQL_TYPE_TIMESTAMP2:
144 case MYSQL_TYPE_DATETIME2:
145 return Value_map_type::DATETIME;
146 case MYSQL_TYPE_TINY_BLOB:
147 case MYSQL_TYPE_MEDIUM_BLOB:
148 case MYSQL_TYPE_LONG_BLOB:
149 case MYSQL_TYPE_BLOB:
150 case MYSQL_TYPE_VAR_STRING:
151 case MYSQL_TYPE_STRING:
152 case MYSQL_TYPE_VARCHAR:
153 return Value_map_type::STRING;
154 case MYSQL_TYPE_JSON:
155 case MYSQL_TYPE_GEOMETRY:
156 case MYSQL_TYPE_NULL:
157 default:
158 return Value_map_type::INVALID;
159 }
160
161 // All cases should be handled, so this should not be hit.
162 /* purecov: begin inspected */
163 DBUG_ASSERT(false);
164 return Value_map_type::INVALID;
165 /* purecov: end */
166 }
167
168 /**
169 Get the Value_map_type from a Field object.
170
171 This effectively looks at the real_type() of a Field, and converts this to
172 a Value_map_type
173
174 @param field The field to convert from
175
176 @return A Value_map_type. May be INVALID if the Value_map does not support
177 the field type.
178 */
field_type_to_value_map_type(const Field * field)179 static Value_map_type field_type_to_value_map_type(const Field *field) {
180 bool is_unsigned = false;
181 if (field->real_type() == MYSQL_TYPE_LONGLONG) {
182 /*
183 For most integer types, the Value_map_type will be INT (int64). This type
184 will not cover the entire value range for the SQL data type UNSIGNED
185 BIGINT, so we need to distinguish between SIGNED BIGINT and UNSIGNED
186 BIGINT so that we can switch the Value_map_type to UINT (uint64).
187 */
188 is_unsigned = field->is_unsigned();
189 }
190
191 return field_type_to_value_map_type(field->real_type(), is_unsigned);
192 }
193
194 /**
195 Lock a column statistic MDL key for writing (exclusive lock).
196
197 @param thd thread handle
198 @param mdl_key the MDL key to lock
199
200 @return true on error, false on success
201 */
lock_for_write(THD * thd,const MDL_key & mdl_key)202 static bool lock_for_write(THD *thd, const MDL_key &mdl_key) {
203 DBUG_EXECUTE_IF("histogram_fail_during_lock_for_write", { return true; });
204
205 MDL_request mdl_request;
206 MDL_REQUEST_INIT_BY_KEY(&mdl_request, &mdl_key, MDL_EXCLUSIVE,
207 MDL_TRANSACTION);
208
209 // If locking fails, an error has already been flagged.
210 return thd->mdl_context.acquire_lock(&mdl_request,
211 thd->variables.lock_wait_timeout);
212 }
213
Histogram(MEM_ROOT * mem_root,const std::string & db_name,const std::string & tbl_name,const std::string & col_name,enum_histogram_type type,Value_map_type data_type)214 Histogram::Histogram(MEM_ROOT *mem_root, const std::string &db_name,
215 const std::string &tbl_name, const std::string &col_name,
216 enum_histogram_type type, Value_map_type data_type)
217 : m_null_values_fraction(INVALID_NULL_VALUES_FRACTION),
218 m_charset(nullptr),
219 m_num_buckets_specified(0),
220 m_mem_root(mem_root),
221 m_hist_type(type),
222 m_data_type(data_type) {
223 lex_string_strmake(m_mem_root, &m_database_name, db_name.c_str(),
224 db_name.length());
225
226 lex_string_strmake(m_mem_root, &m_table_name, tbl_name.c_str(),
227 tbl_name.length());
228
229 lex_string_strmake(m_mem_root, &m_column_name, col_name.c_str(),
230 col_name.length());
231 }
232
Histogram(MEM_ROOT * mem_root,const Histogram & other)233 Histogram::Histogram(MEM_ROOT *mem_root, const Histogram &other)
234 : m_sampling_rate(other.m_sampling_rate),
235 m_null_values_fraction(other.m_null_values_fraction),
236 m_charset(other.m_charset),
237 m_num_buckets_specified(other.m_num_buckets_specified),
238 m_mem_root(mem_root),
239 m_hist_type(other.m_hist_type),
240 m_data_type(other.m_data_type) {
241 lex_string_strmake(m_mem_root, &m_database_name, other.m_database_name.str,
242 other.m_database_name.length);
243
244 lex_string_strmake(m_mem_root, &m_table_name, other.m_table_name.str,
245 other.m_table_name.length);
246
247 lex_string_strmake(m_mem_root, &m_column_name, other.m_column_name.str,
248 other.m_column_name.length);
249 }
250
histogram_to_json(Json_object * json_object) const251 bool Histogram::histogram_to_json(Json_object *json_object) const {
252 // Get the current time in GMT timezone with microsecond accuray.
253 timeval time_value;
254 my_micro_time_to_timeval(my_micro_time(), &time_value);
255
256 MYSQL_TIME current_time;
257 my_tz_UTC->gmt_sec_to_TIME(¤t_time, time_value);
258
259 // last-updated
260 const Json_datetime last_updated(current_time, MYSQL_TYPE_DATETIME);
261 if (json_object->add_clone(last_updated_str(), &last_updated))
262 return true; /* purecov: inspected */
263
264 // histogram-type
265 const Json_string histogram_type(histogram_type_to_str());
266 if (json_object->add_clone(histogram_type_str(), &histogram_type))
267 return true; /* purecov: inspected */
268
269 // Sampling rate
270 DBUG_ASSERT(get_sampling_rate() >= 0.0);
271 DBUG_ASSERT(get_sampling_rate() <= 1.0);
272 const Json_double sampling_rate(get_sampling_rate());
273 if (json_object->add_clone(sampling_rate_str(), &sampling_rate))
274 return true; /* purecov: inspected */
275
276 // The number of buckets specified in the ANALYZE TABLE command
277 const Json_int num_buckets_specified(get_num_buckets_specified());
278 if (json_object->add_clone(numer_of_buckets_specified_str(),
279 &num_buckets_specified))
280 return true; /* purecov: inspected */
281
282 // Fraction of NULL values.
283 DBUG_ASSERT(get_null_values_fraction() >= 0.0);
284 DBUG_ASSERT(get_null_values_fraction() <= 1.0);
285 const Json_double null_values(get_null_values_fraction());
286 if (json_object->add_clone(null_values_str(), &null_values))
287 return true; /* purecov: inspected */
288
289 // charset-id
290 const Json_uint charset_id(get_character_set()->number);
291 if (json_object->add_clone(collation_id_str(), &charset_id))
292 return true; /* purecov: inspected */
293 return false;
294 }
295
get_null_values_fraction() const296 double Histogram::get_null_values_fraction() const {
297 if (m_null_values_fraction != INVALID_NULL_VALUES_FRACTION) {
298 DBUG_ASSERT(m_null_values_fraction >= 0.0);
299 DBUG_ASSERT(m_null_values_fraction <= 1.0);
300 }
301
302 return m_null_values_fraction;
303 }
304
305 template <class T>
build_histogram(MEM_ROOT * mem_root,const Value_map<T> & value_map,size_t num_buckets,const std::string & db_name,const std::string & tbl_name,const std::string & col_name)306 Histogram *build_histogram(MEM_ROOT *mem_root, const Value_map<T> &value_map,
307 size_t num_buckets, const std::string &db_name,
308 const std::string &tbl_name,
309 const std::string &col_name) {
310 Histogram *histogram = nullptr;
311
312 /*
313 If the number of buckets specified is greater or equal to the number
314 of distinct values, we create a Singleton histogram. Otherwise we create
315 an equi-height histogram.
316 */
317 if (num_buckets >= value_map.size()) {
318 Singleton<T> *singleton = new (mem_root) Singleton<T>(
319 mem_root, db_name, tbl_name, col_name, value_map.get_data_type());
320
321 if (singleton == nullptr) return nullptr;
322
323 if (singleton->build_histogram(value_map, num_buckets))
324 return nullptr; /* purecov: inspected */
325
326 histogram = singleton;
327 } else {
328 Equi_height<T> *equi_height = new (mem_root) Equi_height<T>(
329 mem_root, db_name, tbl_name, col_name, value_map.get_data_type());
330
331 if (equi_height == nullptr) return nullptr;
332
333 if (equi_height->build_histogram(value_map, num_buckets))
334 return nullptr; /* purecov: inspected */
335
336 histogram = equi_height;
337 }
338
339 // We should not have a nullptr at this point.
340 DBUG_ASSERT(histogram != nullptr);
341
342 // Verify that the original number of buckets specified is set.
343 DBUG_ASSERT(histogram->get_num_buckets_specified() == num_buckets);
344
345 // Verify that we haven't created more buckets than requested.
346 DBUG_ASSERT(histogram->get_num_buckets() <= num_buckets);
347
348 // Ensure that the character set is set.
349 DBUG_ASSERT(histogram->get_character_set() != nullptr);
350
351 // Check that the fraction of NULL values has been set properly.
352 DBUG_ASSERT(histogram->get_null_values_fraction() >= 0.0);
353 DBUG_ASSERT(histogram->get_null_values_fraction() <= 1.0);
354
355 return histogram;
356 }
357
json_to_histogram(MEM_ROOT * mem_root,const std::string & schema_name,const std::string & table_name,const std::string & column_name,const Json_object & json_object)358 Histogram *Histogram::json_to_histogram(MEM_ROOT *mem_root,
359 const std::string &schema_name,
360 const std::string &table_name,
361 const std::string &column_name,
362 const Json_object &json_object) {
363 // Histogram type (equi-height or singleton).
364 const Json_dom *histogram_type_dom =
365 json_object.get(Histogram::histogram_type_str());
366 if (histogram_type_dom == nullptr ||
367 histogram_type_dom->json_type() != enum_json_type::J_STRING) {
368 return nullptr; /* purecov: deadcode */
369 }
370
371 // Histogram data type
372 const Json_dom *data_type_dom = json_object.get(Histogram::data_type_str());
373 if (data_type_dom == nullptr ||
374 data_type_dom->json_type() != enum_json_type::J_STRING) {
375 return nullptr; /* purecov: deadcode */
376 }
377
378 const Json_string *histogram_type =
379 down_cast<const Json_string *>(histogram_type_dom);
380 const Json_string *data_type = down_cast<const Json_string *>(data_type_dom);
381
382 Histogram *histogram = nullptr;
383 if (histogram_type->value() == Histogram::equi_height_str()) {
384 // Equi-height histogram
385 if (data_type->value() == "double") {
386 histogram = new (mem_root)
387 Equi_height<double>(mem_root, schema_name, table_name, column_name,
388 Value_map_type::DOUBLE);
389 } else if (data_type->value() == "int") {
390 histogram = new (mem_root) Equi_height<longlong>(
391 mem_root, schema_name, table_name, column_name, Value_map_type::INT);
392 } else if (data_type->value() == "enum") {
393 histogram = new (mem_root) Equi_height<longlong>(
394 mem_root, schema_name, table_name, column_name, Value_map_type::ENUM);
395 } else if (data_type->value() == "set") {
396 histogram = new (mem_root) Equi_height<longlong>(
397 mem_root, schema_name, table_name, column_name, Value_map_type::SET);
398 } else if (data_type->value() == "uint") {
399 histogram = new (mem_root) Equi_height<ulonglong>(
400 mem_root, schema_name, table_name, column_name, Value_map_type::UINT);
401 } else if (data_type->value() == "string") {
402 histogram = new (mem_root)
403 Equi_height<String>(mem_root, schema_name, table_name, column_name,
404 Value_map_type::STRING);
405 } else if (data_type->value() == "date") {
406 histogram = new (mem_root) Equi_height<MYSQL_TIME>(
407 mem_root, schema_name, table_name, column_name, Value_map_type::DATE);
408 } else if (data_type->value() == "time") {
409 histogram = new (mem_root) Equi_height<MYSQL_TIME>(
410 mem_root, schema_name, table_name, column_name, Value_map_type::TIME);
411 } else if (data_type->value() == "datetime") {
412 histogram = new (mem_root)
413 Equi_height<MYSQL_TIME>(mem_root, schema_name, table_name,
414 column_name, Value_map_type::DATETIME);
415 } else if (data_type->value() == "decimal") {
416 histogram = new (mem_root)
417 Equi_height<my_decimal>(mem_root, schema_name, table_name,
418 column_name, Value_map_type::DECIMAL);
419 } else {
420 return nullptr; /* purecov: deadcode */
421 }
422 } else if (histogram_type->value() == Histogram::singleton_str()) {
423 // Singleton histogram
424 if (data_type->value() == "double") {
425 histogram =
426 new (mem_root) Singleton<double>(mem_root, schema_name, table_name,
427 column_name, Value_map_type::DOUBLE);
428 } else if (data_type->value() == "int") {
429 histogram = new (mem_root) Singleton<longlong>(
430 mem_root, schema_name, table_name, column_name, Value_map_type::INT);
431 } else if (data_type->value() == "enum") {
432 histogram = new (mem_root) Singleton<longlong>(
433 mem_root, schema_name, table_name, column_name, Value_map_type::ENUM);
434 } else if (data_type->value() == "set") {
435 histogram = new (mem_root) Singleton<longlong>(
436 mem_root, schema_name, table_name, column_name, Value_map_type::SET);
437 } else if (data_type->value() == "uint") {
438 histogram = new (mem_root) Singleton<ulonglong>(
439 mem_root, schema_name, table_name, column_name, Value_map_type::UINT);
440 } else if (data_type->value() == "string") {
441 histogram =
442 new (mem_root) Singleton<String>(mem_root, schema_name, table_name,
443 column_name, Value_map_type::STRING);
444 } else if (data_type->value() == "datetime") {
445 histogram = new (mem_root)
446 Singleton<MYSQL_TIME>(mem_root, schema_name, table_name, column_name,
447 Value_map_type::DATETIME);
448 } else if (data_type->value() == "date") {
449 histogram = new (mem_root) Singleton<MYSQL_TIME>(
450 mem_root, schema_name, table_name, column_name, Value_map_type::DATE);
451 } else if (data_type->value() == "time") {
452 histogram = new (mem_root) Singleton<MYSQL_TIME>(
453 mem_root, schema_name, table_name, column_name, Value_map_type::TIME);
454 } else if (data_type->value() == "decimal") {
455 histogram = new (mem_root)
456 Singleton<my_decimal>(mem_root, schema_name, table_name, column_name,
457 Value_map_type::DECIMAL);
458 } else {
459 return nullptr; /* purecov: deadcode */
460 }
461 } else {
462 // Unsupported histogram type.
463 return nullptr; /* purecov: deadcode */
464 }
465
466 if (histogram != nullptr && histogram->json_to_histogram(json_object))
467 return nullptr; /* purecov: deadcode */
468 return histogram;
469 }
470
471 /*
472 All subclasses should also call this function in order to populate fields that
473 are shared among all histogram types (character set, null values fraction).
474 */
json_to_histogram(const Json_object & json_object)475 bool Histogram::json_to_histogram(const Json_object &json_object) {
476 // The sampling rate that was used to create the histogram.
477 const Json_dom *sampling_rate_dom = json_object.get(sampling_rate_str());
478 if (sampling_rate_dom == nullptr ||
479 sampling_rate_dom->json_type() != enum_json_type::J_DOUBLE) {
480 return true; /* purecov: deadcode */
481 }
482 const Json_double *sampling_rate =
483 down_cast<const Json_double *>(sampling_rate_dom);
484 m_sampling_rate = sampling_rate->value();
485
486 // The number of buckets originally specified by the user.
487 const Json_dom *num_buckets_specified_dom =
488 json_object.get(numer_of_buckets_specified_str());
489 if (num_buckets_specified_dom == nullptr ||
490 num_buckets_specified_dom->json_type() != enum_json_type::J_INT) {
491 return true; /* purecov: deadcode */
492 }
493 const Json_int *num_buckets_specified =
494 down_cast<const Json_int *>(num_buckets_specified_dom);
495 m_num_buckets_specified = num_buckets_specified->value();
496
497 // Fraction of SQL null-values in the original data set.
498 const Json_dom *null_values_dom = json_object.get(null_values_str());
499 if (null_values_dom == nullptr ||
500 null_values_dom->json_type() != enum_json_type::J_DOUBLE) {
501 return true; /* purecov: deadcode */
502 }
503 const Json_double *null_values =
504 down_cast<const Json_double *>(null_values_dom);
505 m_null_values_fraction = null_values->value();
506
507 // Character set ID
508 const Json_dom *charset_id_dom = json_object.get(collation_id_str());
509 if (charset_id_dom == nullptr ||
510 charset_id_dom->json_type() != enum_json_type::J_UINT) {
511 return true; /* purecov: deadcode */
512 }
513 const Json_uint *charset_id = down_cast<const Json_uint *>(charset_id_dom);
514
515 // Get the charset (my_sys.h)
516 m_charset = get_charset(static_cast<uint>(charset_id->value()), MYF(0));
517
518 return false;
519 }
520
521 static std::map<const Value_map_type, const std::string> value_map_type_to_str =
522 {{Value_map_type::DATETIME, "datetime"}, {Value_map_type::DATE, "date"},
523 {Value_map_type::TIME, "time"}, {Value_map_type::INT, "int"},
524 {Value_map_type::UINT, "uint"}, {Value_map_type::DOUBLE, "double"},
525 {Value_map_type::DECIMAL, "decimal"}, {Value_map_type::STRING, "string"},
526 {Value_map_type::ENUM, "enum"}, {Value_map_type::SET, "set"}};
527
histogram_data_type_to_json(Json_object * json_object) const528 bool Histogram::histogram_data_type_to_json(Json_object *json_object) const {
529 std::string foo = value_map_type_to_str[get_data_type()];
530 const Json_string json_value(foo);
531 return json_object->add_clone(data_type_str(), &json_value);
532 }
533
534 template <>
extract_json_dom_value(const Json_dom * json_dom,double * out)535 bool Histogram::extract_json_dom_value(const Json_dom *json_dom, double *out) {
536 if (json_dom->json_type() != enum_json_type::J_DOUBLE)
537 return true; /* purecov: deadcode */
538 *out = down_cast<const Json_double *>(json_dom)->value();
539 return false;
540 }
541
542 template <>
extract_json_dom_value(const Json_dom * json_dom,String * out)543 bool Histogram::extract_json_dom_value(const Json_dom *json_dom, String *out) {
544 DBUG_ASSERT(get_character_set() != nullptr);
545 if (json_dom->json_type() != enum_json_type::J_OPAQUE)
546 return true; /* purecov: deadcode */
547 const Json_opaque *json_opaque = down_cast<const Json_opaque *>(json_dom);
548
549 String value(json_opaque->value(), json_opaque->size(), get_character_set());
550
551 /*
552 Make a copy of the data, since the JSON opaque will free it before we need
553 it.
554 */
555 char *value_dup_data = value.dup(get_mem_root());
556 if (value_dup_data == nullptr) {
557 DBUG_ASSERT(false); /* purecov: deadcode */
558 return true; // OOM
559 }
560
561 out->set(value_dup_data, value.length(), value.charset());
562 return false;
563 }
564
565 template <>
extract_json_dom_value(const Json_dom * json_dom,ulonglong * out)566 bool Histogram::extract_json_dom_value(const Json_dom *json_dom,
567 ulonglong *out) {
568 if (json_dom->json_type() != enum_json_type::J_UINT)
569 return true; /* purecov: deadcode */
570 *out = down_cast<const Json_uint *>(json_dom)->value();
571 return false;
572 }
573
574 template <>
extract_json_dom_value(const Json_dom * json_dom,longlong * out)575 bool Histogram::extract_json_dom_value(const Json_dom *json_dom,
576 longlong *out) {
577 if (json_dom->json_type() != enum_json_type::J_INT)
578 return true; /* purecov: deadcode */
579 *out = down_cast<const Json_int *>(json_dom)->value();
580 return false;
581 }
582
583 template <>
extract_json_dom_value(const Json_dom * json_dom,MYSQL_TIME * out)584 bool Histogram::extract_json_dom_value(const Json_dom *json_dom,
585 MYSQL_TIME *out) {
586 if (json_dom->json_type() != enum_json_type::J_DATE &&
587 json_dom->json_type() != enum_json_type::J_TIME &&
588 json_dom->json_type() != enum_json_type::J_DATETIME &&
589 json_dom->json_type() != enum_json_type::J_TIMESTAMP)
590 return true; /* purecov: deadcode */
591 *out = *down_cast<const Json_datetime *>(json_dom)->value();
592 return false;
593 }
594
595 template <>
extract_json_dom_value(const Json_dom * json_dom,my_decimal * out)596 bool Histogram::extract_json_dom_value(const Json_dom *json_dom,
597 my_decimal *out) {
598 if (json_dom->json_type() != enum_json_type::J_DECIMAL)
599 return true; /* purecov: deadcode */
600 *out = *down_cast<const Json_decimal *>(json_dom)->value();
601 return false;
602 }
603
604 /**
605 Check if a field is covered by a single-part unique index (primary key or
606 unique index). Indexes that are marked as invisible are ignored.
607
608 @param thd The current session.
609 @param field The field to check.
610
611 @return true if the field is covered by a single-part unique index. False
612 otherwise.
613 */
covered_by_single_part_index(const THD * thd,const Field * field)614 static bool covered_by_single_part_index(const THD *thd, const Field *field) {
615 Key_map possible_keys;
616 possible_keys.merge(field->table->s->usable_indexes(thd));
617 possible_keys.intersect(field->key_start);
618 DBUG_ASSERT(field->table->s->keys <= possible_keys.length());
619 for (uint i = 0; i < field->table->s->keys; ++i) {
620 if (possible_keys.is_set(i) &&
621 field->table->s->key_info[i].user_defined_key_parts == 1 &&
622 (field->table->s->key_info[i].flags & HA_NOSAME)) {
623 return true;
624 }
625 }
626
627 return false;
628 }
629
630 /**
631 Prepare one Value_map for each field we are creating histogram statistics for.
632 We will also estimate how many bytes one row will consume. For example, if we
633 are creating histogram statistics for two INTEGER columns, we estimate that
634 one row will consume (sizeof(longlong) * 2) bytes (16 bytes).
635
636 @param fields A vector with all the fields we are creating
637 histogram statistics for.
638 @param[out] value_maps A map where the Value_maps will be initialized.
639 @param[out] row_size_bytes An estimation of how many bytes one row will
640 consume.
641
642 @return true on error, false otherwise.
643 */
prepare_value_maps(std::vector<Field *,Histogram_key_allocator<Field * >> & fields,value_map_collection & value_maps,size_t * row_size_bytes)644 static bool prepare_value_maps(
645 std::vector<Field *, Histogram_key_allocator<Field *>> &fields,
646 value_map_collection &value_maps, size_t *row_size_bytes) {
647 *row_size_bytes = 0;
648 for (const Field *field : fields) {
649 histograms::Value_map_base *value_map = nullptr;
650
651 const Value_map_type value_map_type =
652 histograms::field_type_to_value_map_type(field);
653
654 switch (value_map_type) {
655 case histograms::Value_map_type::STRING: {
656 size_t max_field_length =
657 std::min(static_cast<size_t>(field->field_length),
658 histograms::HISTOGRAM_MAX_COMPARE_LENGTH);
659 *row_size_bytes += max_field_length * field->charset()->mbmaxlen;
660 value_map =
661 new histograms::Value_map<String>(field->charset(), value_map_type);
662 break;
663 }
664 case histograms::Value_map_type::DOUBLE: {
665 value_map =
666 new histograms::Value_map<double>(field->charset(), value_map_type);
667 break;
668 }
669 case histograms::Value_map_type::INT:
670 case histograms::Value_map_type::ENUM:
671 case histograms::Value_map_type::SET: {
672 value_map = new histograms::Value_map<longlong>(field->charset(),
673 value_map_type);
674 break;
675 }
676 case histograms::Value_map_type::UINT: {
677 value_map = new histograms::Value_map<ulonglong>(field->charset(),
678 value_map_type);
679 break;
680 }
681 case histograms::Value_map_type::DATETIME:
682 case histograms::Value_map_type::DATE:
683 case histograms::Value_map_type::TIME: {
684 value_map = new histograms::Value_map<MYSQL_TIME>(field->charset(),
685 value_map_type);
686 break;
687 }
688 case histograms::Value_map_type::DECIMAL: {
689 value_map = new histograms::Value_map<my_decimal>(field->charset(),
690 value_map_type);
691 break;
692 }
693 case histograms::Value_map_type::INVALID: {
694 DBUG_ASSERT(false); /* purecov: deadcode */
695 return true;
696 }
697 }
698
699 // Overhead for each element
700 *row_size_bytes += value_map->element_overhead();
701
702 value_maps.emplace(field->field_index(),
703 std::unique_ptr<histograms::Value_map_base>(value_map));
704 }
705
706 return false;
707 }
708
709 /**
710 Read data from a table into the provided Value_maps. We will read data using
711 sampling with the provided sampling percentage.
712
713 @param fields A vector with the fields we are reading data from.
714 @param sample_percentage The sampling percentage we will use for sampling.
715 Must be between 0.0 and 100.0.
716 @param table The table we are reading the data from.
717 @param value_maps The Value_maps we are reading data into.
718
719 @return true on error, false otherwise.
720 */
fill_value_maps(const std::vector<Field *,Histogram_key_allocator<Field * >> & fields,double sample_percentage,const TABLE * table,value_map_collection & value_maps)721 static bool fill_value_maps(
722 const std::vector<Field *, Histogram_key_allocator<Field *>> &fields,
723 double sample_percentage, const TABLE *table,
724 value_map_collection &value_maps) {
725 DBUG_ASSERT(sample_percentage > 0.0);
726 DBUG_ASSERT(sample_percentage <= 100.0);
727 DBUG_ASSERT(fields.size() == value_maps.size());
728
729 std::random_device rd;
730 std::uniform_int_distribution<int> dist;
731 int sampling_seed = dist(rd);
732
733 DBUG_EXECUTE_IF("histogram_force_sampling", {
734 sampling_seed = 1;
735 sample_percentage = 50.0;
736 });
737
738 void *scan_ctx = nullptr;
739
740 for (auto &value_map : value_maps)
741 value_map.second->set_sampling_rate(sample_percentage / 100.0);
742
743 if (table->file->ha_sample_init(scan_ctx, sample_percentage, sampling_seed,
744 enum_sampling_method::SYSTEM)) {
745 return true;
746 }
747
748 auto handler_guard = create_scope_guard([table, scan_ctx]() {
749 table->file->ha_sample_end(scan_ctx); /* purecov: deadcode */
750 });
751
752 // Read the data from each column into its own Value_map.
753 int res = table->file->ha_sample_next(scan_ctx, table->record[0]);
754
755 while (res == 0) {
756 for (Field *field : fields) {
757 histograms::Value_map_base *value_map =
758 value_maps.at(field->field_index()).get();
759
760 switch (histograms::field_type_to_value_map_type(field)) {
761 case histograms::Value_map_type::STRING: {
762 StringBuffer<MAX_FIELD_WIDTH> str_buf(field->charset());
763 field->val_str(&str_buf);
764
765 if (field->is_null())
766 value_map->add_null_values(1);
767 else if (value_map->add_values(static_cast<String>(str_buf), 1))
768 return true; /* purecov: deadcode */
769 break;
770 }
771 case histograms::Value_map_type::DOUBLE: {
772 double value = field->val_real();
773 if (field->is_null())
774 value_map->add_null_values(1);
775 else if (value_map->add_values(value, 1))
776 return true; /* purecov: deadcode */
777 break;
778 }
779 case histograms::Value_map_type::INT:
780 case histograms::Value_map_type::ENUM:
781 case histograms::Value_map_type::SET: {
782 longlong value = field->val_int();
783 if (field->is_null())
784 value_map->add_null_values(1);
785 else if (value_map->add_values(value, 1))
786 return true; /* purecov: deadcode */
787 break;
788 }
789 case histograms::Value_map_type::UINT: {
790 ulonglong value = static_cast<ulonglong>(field->val_int());
791 if (field->is_null())
792 value_map->add_null_values(1);
793 else if (value_map->add_values(value, 1))
794 return true; /* purecov: deadcode */
795 break;
796 }
797 case histograms::Value_map_type::DATE: {
798 MYSQL_TIME time_value;
799 TIME_from_longlong_date_packed(&time_value,
800 field->val_date_temporal());
801 if (field->is_null())
802 value_map->add_null_values(1);
803 else if (value_map->add_values(time_value, 1))
804 return true; /* purecov: deadcode */
805 break;
806 }
807 case histograms::Value_map_type::TIME: {
808 MYSQL_TIME time_value;
809 TIME_from_longlong_time_packed(&time_value,
810 field->val_time_temporal());
811 if (field->is_null())
812 value_map->add_null_values(1);
813 else if (value_map->add_values(time_value, 1))
814 return true; /* purecov: deadcode */
815 break;
816 }
817 case histograms::Value_map_type::DATETIME: {
818 MYSQL_TIME time_value;
819 TIME_from_longlong_datetime_packed(&time_value,
820 field->val_date_temporal());
821 if (field->is_null())
822 value_map->add_null_values(1);
823 else if (value_map->add_values(time_value, 1))
824 return true; /* purecov: deadcode */
825 break;
826 }
827 case histograms::Value_map_type::DECIMAL: {
828 my_decimal buffer;
829 my_decimal *value;
830 value = field->val_decimal(&buffer);
831
832 if (field->is_null())
833 value_map->add_null_values(1);
834 else if (value_map->add_values(*value, 1))
835 return true; /* purecov: deadcode */
836 break;
837 }
838 case histograms::Value_map_type::INVALID: {
839 DBUG_ASSERT(false); /* purecov: deadcode */
840 break;
841 }
842 }
843 }
844
845 res = table->file->ha_sample_next(scan_ctx, table->record[0]);
846
847 DBUG_EXECUTE_IF(
848 "sample_read_sample_half", static uint count = 1;
849 if (count == std::max(1ULL, table->file->stats.records) / 2) {
850 res = HA_ERR_END_OF_FILE;
851 break;
852 } ++count;);
853 }
854
855 if (res != HA_ERR_END_OF_FILE) return true; /* purecov: deadcode */
856
857 // Close the handler
858 handler_guard.commit();
859 if (table->file->ha_sample_end(scan_ctx)) {
860 DBUG_ASSERT(false); /* purecov: deadcode */
861 return true;
862 }
863
864 return false;
865 }
866
update_histogram(THD * thd,TABLE_LIST * table,const columns_set & columns,int num_buckets,results_map & results)867 bool update_histogram(THD *thd, TABLE_LIST *table, const columns_set &columns,
868 int num_buckets, results_map &results) {
869 dd::cache::Dictionary_client::Auto_releaser auto_releaser(thd->dd_client());
870
871 // Read only should have been stopped at an earlier stage.
872 DBUG_ASSERT(!check_readonly(thd, false));
873 DBUG_ASSERT(!thd->tx_read_only);
874
875 DBUG_ASSERT(results.empty());
876 DBUG_ASSERT(!columns.empty());
877
878 // Only one table should be specified in ANALYZE TABLE .. UPDATE HISTOGRAM
879 DBUG_ASSERT(table->next_local == nullptr);
880
881 if (table->table != nullptr && table->table->s->tmp_table != NO_TMP_TABLE) {
882 /*
883 Normally, the table we are going to read data from is not initialized at
884 this point. But if table->table is not a null-pointer, it has already been
885 initialized at an earlier stage. This will happen if the table is a
886 temporary table.
887 */
888 results.emplace("", Message::TEMPORARY_TABLE);
889 return true;
890 }
891
892 /*
893 Create two scope guards; one for disabling autocommit and one that will do a
894 rollback and ensure that any open tables are closed before returning.
895 */
896 Disable_autocommit_guard autocommit_guard(thd);
897 auto tables_guard = create_scope_guard([thd]() {
898 if (trans_rollback_stmt(thd) || trans_rollback(thd))
899 DBUG_ASSERT(false); /* purecov: deadcode */
900 close_thread_tables(thd);
901 });
902
903 table->reinit_before_use(thd);
904 if (open_and_lock_tables(thd, table, 0)) {
905 return true;
906 }
907
908 DBUG_EXECUTE_IF("histogram_fail_after_open_table", { return true; });
909
910 if (table->is_view()) {
911 results.emplace("", Message::VIEW);
912 return true;
913 }
914
915 DBUG_ASSERT(table->table != nullptr);
916 TABLE *tbl = table->table;
917
918 if (tbl->s->encrypt_type.length > 0 &&
919 my_strcasecmp(system_charset_info, "n", tbl->s->encrypt_type.str) != 0) {
920 results.emplace("", Message::ENCRYPTED_TABLE);
921 return true;
922 }
923
924 /*
925 Check if the provided column names exist, and that they have a supported
926 data type. If they do, mark them in the read set.
927 */
928 bitmap_clear_all(tbl->write_set);
929 bitmap_clear_all(tbl->read_set);
930 std::vector<Field *, Histogram_key_allocator<Field *>> resolved_fields;
931
932 for (const std::string &column_name : columns) {
933 Field *field = find_field_in_table_sef(tbl, column_name.c_str());
934
935 if (field == nullptr) {
936 // Field not found in table
937 results.emplace(column_name, Message::FIELD_NOT_FOUND);
938 continue;
939 } else if (histograms::field_type_to_value_map_type(field) ==
940 histograms::Value_map_type::INVALID) {
941 // Unsupported data type
942 results.emplace(column_name, Message::UNSUPPORTED_DATA_TYPE);
943 continue;
944 }
945
946 /*
947 Check if this field is covered by a single-part unique index. If it is, we
948 don't want to create histogram statistics for it.
949 */
950 if (covered_by_single_part_index(thd, field)) {
951 results.emplace(column_name,
952 Message::COVERED_BY_SINGLE_PART_UNIQUE_INDEX);
953 continue;
954 }
955 resolved_fields.push_back(field);
956
957 bitmap_set_bit(tbl->read_set, field->field_index());
958 if (field->is_gcol()) {
959 bitmap_set_bit(tbl->write_set, field->field_index());
960 /*
961 The base columns needs to be in the write set in case of nested
962 generated columns:
963
964 CREATE TABLE t1 (
965 col1 INT,
966 col2 INT AS (col1 + 1) VIRTUAL,
967 col3 INT AS (col2 + 1) VIRTUAL);
968
969 If we are reading data from "col3", we also need to update the data in
970 "col2" in order for the generated value to be correct.
971 */
972 bitmap_union(tbl->write_set, &field->gcol_info->base_columns_map);
973 bitmap_union(tbl->read_set, &field->gcol_info->base_columns_map);
974 }
975 }
976
977 /*
978 If we don't have any fields, we just quit here. Return "true" so we don't
979 write empty transactions/statements to the binlog.
980 */
981 if (resolved_fields.empty()) return true;
982
983 /*
984 Prepare one Value_map for each field we are creating histogram statistics
985 for. Also, estimate how many bytes one row will consume so that we can
986 estimate how many rows we can fit into memory permitted by
987 histogram_generation_max_mem_size.
988 */
989 size_t row_size_bytes = 0;
990 value_map_collection value_maps;
991 if (prepare_value_maps(resolved_fields, value_maps, &row_size_bytes))
992 return true; /* purecov: deadcode */
993
994 /*
995 Caclulate how many rows we can fit into memory permitted by
996 histogram_generation_max_mem_size.
997 */
998 double rows_in_memory = thd->variables.histogram_generation_max_mem_size /
999 static_cast<double>(row_size_bytes);
1000
1001 /*
1002 Ensure that we estimate at least one row in the table, so we avoid
1003 division by zero error.
1004
1005 NOTE: We ignore errors from "fetch_number_of_rows()" on purpose, since we
1006 don't consider it fatal not having the correct row estimate.
1007 */
1008 table->fetch_number_of_rows();
1009 ha_rows rows_in_table = std::max(1ULL, tbl->file->stats.records);
1010
1011 double sample_percentage = rows_in_memory / rows_in_table * 100.0;
1012 sample_percentage = std::min(sample_percentage, 100.0);
1013
1014 // Read data from the table into the Value_maps we have prepared.
1015 if (fill_value_maps(resolved_fields, sample_percentage, tbl, value_maps))
1016 return true; /* purecov: deadcode */
1017
1018 // Create a histogram for each Value_map, and store it to persistent storage.
1019 for (const Field *field : resolved_fields) {
1020 /*
1021 The MEM_ROOT is transferred to the dictionary object when
1022 histogram->store_histogram is called.
1023 */
1024 MEM_ROOT local_mem_root;
1025 init_alloc_root(key_memory_histograms, &local_mem_root, 256, 0);
1026
1027 std::string col_name(field->field_name);
1028 histograms::Histogram *histogram =
1029 value_maps.at(field->field_index())
1030 ->build_histogram(
1031 &local_mem_root, num_buckets,
1032 std::string(table->db, table->db_length),
1033 std::string(table->table_name, table->table_name_length),
1034 col_name);
1035
1036 if (histogram == nullptr) {
1037 /* purecov: begin inspected */
1038 my_error(ER_UNABLE_TO_BUILD_HISTOGRAM, MYF(0), field->field_name,
1039 table->db, table->table_name);
1040 return true;
1041 /* purecov: end */
1042 } else if (histogram->store_histogram(thd)) {
1043 // errors have already been reported
1044 return true; /* purecov: deadcode */
1045 }
1046
1047 results.emplace(col_name, Message::HISTOGRAM_CREATED);
1048 }
1049
1050 bool ret = trans_commit_stmt(thd) || trans_commit(thd);
1051 close_thread_tables(thd);
1052 tables_guard.commit();
1053 return ret;
1054 }
1055
drop_all_histograms(THD * thd,const TABLE_LIST & table,const dd::Table & table_definition,results_map & results)1056 bool drop_all_histograms(THD *thd, const TABLE_LIST &table,
1057 const dd::Table &table_definition,
1058 results_map &results) {
1059 columns_set columns;
1060 for (const auto &col : table_definition.columns())
1061 columns.emplace(col->name().c_str());
1062
1063 return drop_histograms(thd, table, columns, results);
1064 }
1065
drop_histograms(THD * thd,const TABLE_LIST & table,const columns_set & columns,results_map & results)1066 bool drop_histograms(THD *thd, const TABLE_LIST &table,
1067 const columns_set &columns, results_map &results) {
1068 dd::cache::Dictionary_client *client = thd->dd_client();
1069 dd::cache::Dictionary_client::Auto_releaser auto_releaser(client);
1070
1071 for (const std::string &column_name : columns) {
1072 MDL_key mdl_key;
1073 dd::Column_statistics::create_mdl_key(
1074 {table.db, table.db_length},
1075 {table.table_name, table.table_name_length}, column_name.c_str(),
1076 &mdl_key);
1077
1078 if (lock_for_write(thd, mdl_key))
1079 return true; // error is already reported.
1080
1081 dd::String_type dd_name = dd::Column_statistics::create_name(
1082 {table.db, table.db_length},
1083 {table.table_name, table.table_name_length}, column_name.c_str());
1084
1085 // Do we have an existing histogram for this column?
1086 const dd::Column_statistics *column_statistics = nullptr;
1087 if (client->acquire(dd_name, &column_statistics)) {
1088 // error is already reported.
1089 return true; /* purecov: deadcode */
1090 }
1091
1092 if (column_statistics == nullptr) {
1093 results.emplace(column_name, Message::NO_HISTOGRAM_FOUND);
1094 continue;
1095 }
1096
1097 if (client->drop(column_statistics)) {
1098 /* purecov: begin inspected */
1099 my_error(ER_UNABLE_TO_DROP_COLUMN_STATISTICS, MYF(0), column_name.c_str(),
1100 table.db, table.table_name);
1101 return true;
1102 /* purecov: end */
1103 }
1104
1105 results.emplace(column_name, Message::HISTOGRAM_DELETED);
1106 }
1107
1108 return false;
1109 }
1110
store_histogram(THD * thd) const1111 bool Histogram::store_histogram(THD *thd) const {
1112 dd::cache::Dictionary_client *client = thd->dd_client();
1113
1114 MDL_key mdl_key;
1115 dd::Column_statistics::create_mdl_key(get_database_name().str,
1116 get_table_name().str,
1117 get_column_name().str, &mdl_key);
1118
1119 if (lock_for_write(thd, mdl_key)) {
1120 // Error has already been reported
1121 return true; /* purecov: deadcode */
1122 }
1123
1124 DEBUG_SYNC(thd, "store_histogram_after_write_lock");
1125
1126 dd::String_type dd_name = dd::Column_statistics::create_name(
1127 get_database_name().str, get_table_name().str, get_column_name().str);
1128
1129 // Do we have an existing histogram for this column?
1130 dd::Column_statistics *column_stats = nullptr;
1131 if (client->acquire_for_modification(dd_name, &column_stats)) {
1132 // Error has already been reported
1133 return true; /* purecov: deadcode */
1134 }
1135
1136 if (column_stats != nullptr) {
1137 // Update the existing object.
1138 column_stats->set_histogram(this);
1139 if (client->update(column_stats)) {
1140 /* purecov: begin inspected */
1141 my_error(ER_UNABLE_TO_UPDATE_COLUMN_STATISTICS, MYF(0),
1142 get_column_name().str, get_database_name().str,
1143 get_table_name().str);
1144 return true;
1145 /* purecov: end */
1146 }
1147 } else {
1148 // Create a new object
1149 std::unique_ptr<dd::Column_statistics> column_statistics(
1150 dd::create_object<dd::Column_statistics>());
1151
1152 column_statistics.get()->set_schema_name(get_database_name().str);
1153 column_statistics.get()->set_table_name(get_table_name().str);
1154 column_statistics.get()->set_column_name(get_column_name().str);
1155 column_statistics.get()->set_name(dd_name);
1156 column_statistics.get()->set_histogram(this);
1157
1158 if (client->store(column_statistics.get())) {
1159 /* purecov: begin inspected */
1160 my_error(ER_UNABLE_TO_STORE_COLUMN_STATISTICS, MYF(0),
1161 get_column_name().str, get_database_name().str,
1162 get_table_name().str);
1163 return true;
1164 /* purecov: end */
1165 }
1166 }
1167
1168 return false;
1169 }
1170
1171 /**
1172 Rename a single histogram from a old schema/table name to a new schema/table
1173 name. It is used for instance by RENAME TABLE, where the contents of the
1174 histograms doesn't change.
1175
1176 @param thd Thread handler.
1177 @param old_schema_name The old schema name.
1178 @param old_table_name The old table name.
1179 @param new_schema_name The new schema name.
1180 @param new_table_name The new table name.
1181 @param column_name The column name.
1182 @param results A map where the result of the operation is stored.
1183
1184 @return false on success, true on error.
1185 */
rename_histogram(THD * thd,const char * old_schema_name,const char * old_table_name,const char * new_schema_name,const char * new_table_name,const char * column_name,results_map & results)1186 static bool rename_histogram(THD *thd, const char *old_schema_name,
1187 const char *old_table_name,
1188 const char *new_schema_name,
1189 const char *new_table_name,
1190 const char *column_name, results_map &results) {
1191 dd::cache::Dictionary_client *client = thd->dd_client();
1192 dd::cache::Dictionary_client::Auto_releaser auto_releaser(client);
1193
1194 // First find the histogram with the old name.
1195 MDL_key mdl_key;
1196 dd::Column_statistics::create_mdl_key(old_schema_name, old_table_name,
1197 column_name, &mdl_key);
1198
1199 if (lock_for_write(thd, mdl_key)) {
1200 // Error has already been reported
1201 return true; /* purecov: deadcode */
1202 }
1203
1204 dd::String_type dd_name = dd::Column_statistics::create_name(
1205 old_schema_name, old_table_name, column_name);
1206
1207 dd::Column_statistics *column_statistics = nullptr;
1208 if (client->acquire_for_modification(dd_name, &column_statistics)) {
1209 // Error has already been reported
1210 return true; /* purecov: deadcode */
1211 }
1212
1213 if (column_statistics == nullptr) {
1214 results.emplace(column_name, Message::NO_HISTOGRAM_FOUND);
1215 return false;
1216 }
1217
1218 dd::Column_statistics::create_mdl_key(new_schema_name, new_table_name,
1219 column_name, &mdl_key);
1220
1221 if (lock_for_write(thd, mdl_key)) {
1222 // Error has already been reported
1223 return true; /* purecov: deadcode */
1224 }
1225
1226 column_statistics->set_schema_name(new_schema_name);
1227 column_statistics->set_table_name(new_table_name);
1228 column_statistics->set_column_name(column_name);
1229 column_statistics->set_name(column_statistics->create_name());
1230 if (client->update(column_statistics)) {
1231 /* purecov: begin inspected */
1232 my_error(ER_UNABLE_TO_UPDATE_COLUMN_STATISTICS, MYF(0), column_name,
1233 old_schema_name, old_table_name);
1234 return true;
1235 /* purecov: end */
1236 }
1237
1238 results.emplace(column_name, Message::HISTOGRAM_DELETED);
1239 return false;
1240 }
1241
rename_histograms(THD * thd,const char * old_schema_name,const char * old_table_name,const char * new_schema_name,const char * new_table_name,results_map & results)1242 bool rename_histograms(THD *thd, const char *old_schema_name,
1243 const char *old_table_name, const char *new_schema_name,
1244 const char *new_table_name, results_map &results) {
1245 dd::cache::Dictionary_client::Auto_releaser releaser(thd->dd_client());
1246
1247 MDL_request mdl_request;
1248 MDL_REQUEST_INIT(&mdl_request, MDL_key::TABLE, old_schema_name,
1249 old_table_name, MDL_SHARED_READ_ONLY, MDL_TRANSACTION);
1250
1251 if (thd->mdl_context.acquire_lock(&mdl_request,
1252 thd->variables.lock_wait_timeout)) {
1253 // error has already been reported
1254 return true; /* purecov: deadcode */
1255 }
1256
1257 /*
1258 We have to look up the new table since it already will be renamed at this
1259 point.
1260 */
1261 const dd::Table *table_def = nullptr;
1262 if (thd->dd_client()->acquire(new_schema_name, new_table_name, &table_def)) {
1263 // error has already been reported
1264 return false; /* purecov: deadcode */
1265 }
1266
1267 if (table_def == nullptr) {
1268 DBUG_ASSERT(false); /* purecov: deadcode */
1269 return false;
1270 }
1271
1272 for (const auto &col : table_def->columns()) {
1273 if (rename_histogram(thd, old_schema_name, old_table_name, new_schema_name,
1274 new_table_name, col->name().c_str(), results))
1275 return true; /* purecov: deadcode */
1276 }
1277
1278 return false;
1279 }
1280
find_histogram(THD * thd,const std::string & schema_name,const std::string & table_name,const std::string & column_name,const Histogram ** histogram)1281 bool find_histogram(THD *thd, const std::string &schema_name,
1282 const std::string &table_name,
1283 const std::string &column_name,
1284 const Histogram **histogram) {
1285 DBUG_ASSERT(*histogram == nullptr);
1286
1287 if (schema_name == "mysql" || table_name == "column_statistics") return false;
1288
1289 dd::String_type dd_name = dd::Column_statistics::create_name(
1290 schema_name.c_str(), table_name.c_str(), column_name.c_str());
1291
1292 const dd::Column_statistics *column_statistics = nullptr;
1293 dd::cache::Dictionary_client *client = thd->dd_client();
1294 if (client->acquire<dd::Column_statistics>(dd_name, &column_statistics))
1295 return true; /* purecov: deadcode */
1296
1297 if (column_statistics == nullptr) return false;
1298
1299 *histogram = column_statistics->histogram();
1300 return false;
1301 }
1302
1303 template <class T>
get_less_than_selectivity_dispatcher(const T & value) const1304 double Histogram::get_less_than_selectivity_dispatcher(const T &value) const {
1305 switch (get_histogram_type()) {
1306 case enum_histogram_type::SINGLETON: {
1307 const Singleton<T> *singleton = down_cast<const Singleton<T> *>(this);
1308 return singleton->get_less_than_selectivity(value);
1309 }
1310 case enum_histogram_type::EQUI_HEIGHT: {
1311 const Equi_height<T> *equi_height =
1312 down_cast<const Equi_height<T> *>(this);
1313 return equi_height->get_less_than_selectivity(value);
1314 }
1315 }
1316 /* purecov: begin deadcode */
1317 DBUG_ASSERT(false);
1318 return 0.0;
1319 /* purecov: end deadcode */
1320 }
1321
1322 template <class T>
get_greater_than_selectivity_dispatcher(const T & value) const1323 double Histogram::get_greater_than_selectivity_dispatcher(
1324 const T &value) const {
1325 switch (get_histogram_type()) {
1326 case enum_histogram_type::SINGLETON: {
1327 const Singleton<T> *singleton = down_cast<const Singleton<T> *>(this);
1328 return singleton->get_greater_than_selectivity(value);
1329 }
1330 case enum_histogram_type::EQUI_HEIGHT: {
1331 const Equi_height<T> *equi_height =
1332 down_cast<const Equi_height<T> *>(this);
1333 return equi_height->get_greater_than_selectivity(value);
1334 }
1335 }
1336 /* purecov: begin deadcode */
1337 DBUG_ASSERT(false);
1338 return 0.0;
1339 /* purecov: end deadcode */
1340 }
1341
1342 template <class T>
get_equal_to_selectivity_dispatcher(const T & value) const1343 double Histogram::get_equal_to_selectivity_dispatcher(const T &value) const {
1344 switch (get_histogram_type()) {
1345 case enum_histogram_type::SINGLETON: {
1346 const Singleton<T> *singleton = down_cast<const Singleton<T> *>(this);
1347 return singleton->get_equal_to_selectivity(value);
1348 }
1349 case enum_histogram_type::EQUI_HEIGHT: {
1350 const Equi_height<T> *equi_height =
1351 down_cast<const Equi_height<T> *>(this);
1352 return equi_height->get_equal_to_selectivity(value);
1353 }
1354 }
1355 /* purecov: begin deadcode */
1356 DBUG_ASSERT(false);
1357 return 0.0;
1358 /* purecov: end deadcode */
1359 }
1360
get_temporal(Item * item,Value_map_type preferred_type,MYSQL_TIME * time_value)1361 static bool get_temporal(Item *item, Value_map_type preferred_type,
1362 MYSQL_TIME *time_value) {
1363 if (item->is_temporal_with_date_and_time()) {
1364 TIME_from_longlong_datetime_packed(time_value, item->val_date_temporal());
1365 } else if (item->is_temporal_with_date()) {
1366 TIME_from_longlong_date_packed(time_value, item->val_date_temporal());
1367 } else if (item->is_temporal_with_time()) {
1368 TIME_from_longlong_time_packed(time_value, item->val_time_temporal());
1369 } else {
1370 switch (preferred_type) {
1371 case Value_map_type::DATE:
1372 case Value_map_type::DATETIME:
1373 if (item->get_date_from_non_temporal(time_value, 0)) return true;
1374 break;
1375 case Value_map_type::TIME:
1376 if (item->get_time_from_non_temporal(time_value)) return true;
1377 break;
1378 default:
1379 /* purecov: begin deadcode */
1380 DBUG_ASSERT(0);
1381 break;
1382 /* purecov: end deadcode */
1383 }
1384 }
1385
1386 return false;
1387 }
1388
1389 template <class T>
apply_operator(const enum_operator op,const T & value) const1390 double Histogram::apply_operator(const enum_operator op, const T &value) const {
1391 switch (op) {
1392 case enum_operator::LESS_THAN:
1393 return get_less_than_selectivity_dispatcher(value);
1394 case enum_operator::GREATER_THAN:
1395 return get_greater_than_selectivity_dispatcher(value);
1396 case enum_operator::EQUALS_TO:
1397 return get_equal_to_selectivity_dispatcher(value);
1398 default:
1399 /* purecov: begin deadcode */
1400 DBUG_ASSERT(false);
1401 return 1.0;
1402 /* purecov: end deadcode */
1403 }
1404 }
1405
get_selectivity_dispatcher(Item * item,const enum_operator op,const TYPELIB * typelib,double * selectivity) const1406 bool Histogram::get_selectivity_dispatcher(Item *item, const enum_operator op,
1407 const TYPELIB *typelib,
1408 double *selectivity) const {
1409 switch (this->get_data_type()) {
1410 case Value_map_type::INVALID: {
1411 /* purecov: begin deadcode */
1412 DBUG_ASSERT(false);
1413 return true;
1414 /* purecov: end deadcode */
1415 }
1416 case Value_map_type::STRING: {
1417 // Is the character set the same? If not, we cannot use the histogram
1418 if (item->collation.collation->number != get_character_set()->number)
1419 return true;
1420
1421 StringBuffer<MAX_FIELD_WIDTH> str_buf(item->collation.collation);
1422 const String *str = item->val_str(&str_buf);
1423 if (item->is_null()) return true;
1424
1425 *selectivity =
1426 apply_operator(op, str->substr(0, HISTOGRAM_MAX_COMPARE_LENGTH));
1427 return false;
1428 }
1429 case Value_map_type::INT: {
1430 const longlong value = item->val_int();
1431 if (item->is_null()) return true;
1432
1433 *selectivity = apply_operator(op, value);
1434 return false;
1435 }
1436 case Value_map_type::ENUM: {
1437 DBUG_ASSERT(typelib != nullptr);
1438
1439 longlong value;
1440 if (item->data_type() == MYSQL_TYPE_VARCHAR) {
1441 StringBuffer<MAX_FIELD_WIDTH> str_buf(item->collation.collation);
1442 const String *str = item->val_str(&str_buf);
1443 if (item->is_null()) return true;
1444
1445 // Remove any trailing whitespace
1446 size_t length = str->charset()->cset->lengthsp(
1447 str->charset(), str->ptr(), str->length());
1448 value = find_type2(typelib, str->ptr(), length, str->charset());
1449 } else {
1450 value = item->val_int();
1451 if (item->is_null()) return true;
1452 }
1453
1454 if (op == enum_operator::EQUALS_TO) {
1455 *selectivity = get_equal_to_selectivity_dispatcher(value);
1456 return false;
1457 }
1458
1459 return true; /* purecov: deadcode */
1460 }
1461 case Value_map_type::SET: {
1462 DBUG_ASSERT(typelib != nullptr);
1463
1464 longlong value;
1465 if (item->data_type() == MYSQL_TYPE_VARCHAR) {
1466 StringBuffer<MAX_FIELD_WIDTH> str_buf(item->collation.collation);
1467 const String *str = item->val_str(&str_buf);
1468 if (item->is_null()) return true;
1469
1470 bool got_warning;
1471 const char *not_used;
1472 uint not_used2;
1473 ulonglong tmp_value =
1474 find_set(typelib, str->ptr(), str->length(), str->charset(),
1475 ¬_used, ¬_used2, &got_warning);
1476
1477 value = static_cast<ulonglong>(tmp_value);
1478 } else {
1479 value = item->val_int();
1480 if (item->is_null()) return true;
1481 }
1482
1483 if (op == enum_operator::EQUALS_TO) {
1484 *selectivity = get_equal_to_selectivity_dispatcher(value);
1485 return false;
1486 }
1487
1488 return true; /* purecov: deadcode */
1489 }
1490 case Value_map_type::UINT: {
1491 const ulonglong value = static_cast<ulonglong>(item->val_int());
1492 if (item->is_null()) return true;
1493
1494 *selectivity = apply_operator(op, value);
1495 return false;
1496 }
1497 case Value_map_type::DOUBLE: {
1498 const double value = item->val_real();
1499 if (item->is_null()) return true;
1500
1501 *selectivity = apply_operator(op, value);
1502 return false;
1503 }
1504 case Value_map_type::DECIMAL: {
1505 my_decimal buffer;
1506 const my_decimal *value = item->val_decimal(&buffer);
1507 if (item->is_null()) return true;
1508
1509 *selectivity = apply_operator(op, *value);
1510 return false;
1511 }
1512 case Value_map_type::DATE:
1513 case Value_map_type::TIME:
1514 case Value_map_type::DATETIME: {
1515 MYSQL_TIME temporal_value;
1516 if (get_temporal(item, get_data_type(), &temporal_value) ||
1517 item->is_null())
1518 return true;
1519
1520 *selectivity = apply_operator(op, temporal_value);
1521 return false;
1522 }
1523 }
1524
1525 /* purecov: begin deadcode */
1526 DBUG_ASSERT(false);
1527 return true;
1528 /* purecov: end deadcode */
1529 }
1530
get_selectivity(Item ** items,size_t item_count,enum_operator op,double * selectivity) const1531 bool Histogram::get_selectivity(Item **items, size_t item_count,
1532 enum_operator op, double *selectivity) const {
1533 // Do some sanity checking first
1534 switch (op) {
1535 case enum_operator::EQUALS_TO:
1536 case enum_operator::GREATER_THAN:
1537 case enum_operator::LESS_THAN:
1538 case enum_operator::LESS_THAN_OR_EQUAL:
1539 case enum_operator::GREATER_THAN_OR_EQUAL:
1540 case enum_operator::NOT_EQUALS_TO:
1541 DBUG_ASSERT(item_count == 2);
1542 /*
1543 Verify that one side of the predicate is a column/field, and that the
1544 other side is a constant value.
1545
1546 Make sure that we have the constant item as the right side argument of
1547 the predicate internally.
1548 */
1549 if (items[0]->const_item() && items[1]->type() == Item::FIELD_ITEM) {
1550 // Flip the operators as well as the operator itself.
1551 switch (op) {
1552 case enum_operator::GREATER_THAN:
1553 op = enum_operator::LESS_THAN;
1554 break;
1555 case enum_operator::LESS_THAN:
1556 op = enum_operator::GREATER_THAN;
1557 break;
1558 case enum_operator::LESS_THAN_OR_EQUAL:
1559 op = enum_operator::GREATER_THAN_OR_EQUAL;
1560 break;
1561 case enum_operator::GREATER_THAN_OR_EQUAL:
1562 op = enum_operator::LESS_THAN_OR_EQUAL;
1563 break;
1564 default:
1565 break;
1566 }
1567 Item *items_flipped[2];
1568 items_flipped[0] = items[1];
1569 items_flipped[1] = items[0];
1570 return get_selectivity(items_flipped, item_count, op, selectivity);
1571 } else if (items[0]->type() != Item::FIELD_ITEM ||
1572 !items[1]->const_item()) {
1573 return true;
1574 }
1575 break;
1576 case enum_operator::BETWEEN:
1577 case enum_operator::NOT_BETWEEN:
1578 DBUG_ASSERT(item_count == 3);
1579
1580 if (items[0]->type() != Item::FIELD_ITEM || !items[1]->const_item() ||
1581 !items[2]->const_item()) {
1582 return true;
1583 }
1584 break;
1585 case enum_operator::IN_LIST:
1586 case enum_operator::NOT_IN_LIST:
1587 DBUG_ASSERT(item_count >= 2);
1588
1589 if (items[0]->type() != Item::FIELD_ITEM)
1590 return true; /* purecov: deadcode */
1591
1592 // This will only work if all items are const_items
1593 for (size_t i = 1; i < item_count; ++i) {
1594 if (!items[i]->const_item()) return true;
1595 }
1596 break;
1597 case enum_operator::IS_NULL:
1598 case enum_operator::IS_NOT_NULL:
1599 DBUG_ASSERT(item_count == 1);
1600 if (items[0]->type() != Item::FIELD_ITEM) return true;
1601 }
1602
1603 DBUG_ASSERT(items[0]->type() == Item::FIELD_ITEM);
1604
1605 const TYPELIB *typelib = nullptr;
1606 const Item_field *item_field = down_cast<const Item_field *>(items[0]);
1607 if (item_field->field->real_type() == MYSQL_TYPE_ENUM ||
1608 item_field->field->real_type() == MYSQL_TYPE_SET) {
1609 const Field_enum *field_enum =
1610 down_cast<const Field_enum *>(item_field->field);
1611 typelib = field_enum->typelib;
1612 }
1613
1614 switch (op) {
1615 case enum_operator::LESS_THAN:
1616 case enum_operator::EQUALS_TO:
1617 case enum_operator::GREATER_THAN: {
1618 return get_selectivity_dispatcher(items[1], op, typelib, selectivity);
1619 }
1620 case enum_operator::LESS_THAN_OR_EQUAL: {
1621 double less_than_selectivity;
1622 double equals_to_selectivity;
1623 if (get_selectivity_dispatcher(items[1], enum_operator::LESS_THAN,
1624 typelib, &less_than_selectivity) ||
1625 get_selectivity_dispatcher(items[1], enum_operator::EQUALS_TO,
1626 typelib, &equals_to_selectivity))
1627 return true;
1628
1629 *selectivity = std::min(less_than_selectivity + equals_to_selectivity,
1630 get_non_null_values_frequency());
1631 return false;
1632 }
1633 case enum_operator::GREATER_THAN_OR_EQUAL: {
1634 double greater_than_selectivity;
1635 double equals_to_selectivity;
1636 if (get_selectivity_dispatcher(items[1], enum_operator::GREATER_THAN,
1637 typelib, &greater_than_selectivity) ||
1638 get_selectivity_dispatcher(items[1], enum_operator::EQUALS_TO,
1639 typelib, &equals_to_selectivity))
1640 return true;
1641
1642 *selectivity = std::min(greater_than_selectivity + equals_to_selectivity,
1643 get_non_null_values_frequency());
1644 return false;
1645 }
1646 case enum_operator::NOT_EQUALS_TO: {
1647 double equals_to_selectivity;
1648 if (get_selectivity_dispatcher(items[1], enum_operator::EQUALS_TO,
1649 typelib, &equals_to_selectivity))
1650 return true;
1651
1652 *selectivity = std::max(
1653 get_non_null_values_frequency() - equals_to_selectivity, 0.0);
1654 return false;
1655 }
1656 case enum_operator::BETWEEN: {
1657 double less_than_selectivity;
1658 double greater_than_selectivity;
1659 if (get_selectivity_dispatcher(items[1], enum_operator::LESS_THAN,
1660 typelib, &less_than_selectivity) ||
1661 get_selectivity_dispatcher(items[2], enum_operator::GREATER_THAN,
1662 typelib, &greater_than_selectivity))
1663 return true;
1664
1665 *selectivity = this->get_non_null_values_frequency() -
1666 (less_than_selectivity + greater_than_selectivity);
1667
1668 /*
1669 Make sure that we don't return a value less than 0.0. This might happen
1670 with a query like:
1671 EXPLAIN SELECT a FROM t1 WHERE t1.a BETWEEN 3 AND 0;
1672 */
1673 *selectivity = std::max(0.0, *selectivity);
1674 return false;
1675 }
1676 case enum_operator::NOT_BETWEEN: {
1677 double less_than_selectivity;
1678 double greater_than_selectivity;
1679 if (get_selectivity_dispatcher(items[1], enum_operator::LESS_THAN,
1680 typelib, &less_than_selectivity) ||
1681 get_selectivity_dispatcher(items[2], enum_operator::GREATER_THAN,
1682 typelib, &greater_than_selectivity))
1683 return true;
1684
1685 /*
1686 Make sure that we don't return a value greater than 1.0. This might
1687 happen with a query like:
1688 EXPLAIN SELECT a FROM t1 WHERE t1.a NOT BETWEEN 3 AND 0;
1689 */
1690 *selectivity = std::min(less_than_selectivity + greater_than_selectivity,
1691 get_non_null_values_frequency());
1692 return false;
1693 }
1694 case enum_operator::IN_LIST: {
1695 *selectivity = 0.0;
1696 for (size_t i = 1; i < item_count; ++i) {
1697 double equals_to_selectivity;
1698 if (get_selectivity_dispatcher(items[i], enum_operator::EQUALS_TO,
1699 typelib, &equals_to_selectivity))
1700 return true;
1701
1702 *selectivity += equals_to_selectivity;
1703
1704 if (*selectivity >= get_non_null_values_frequency()) break;
1705 }
1706
1707 /*
1708 Long in-lists may easily exceed a selectivity of
1709 get_non_null_values_frequency() in certain cases.
1710 */
1711 *selectivity = std::min(*selectivity, get_non_null_values_frequency());
1712 return false;
1713 }
1714 case enum_operator::NOT_IN_LIST: {
1715 *selectivity = this->get_non_null_values_frequency();
1716 for (size_t i = 1; i < item_count; ++i) {
1717 double equals_to_selectivity;
1718 if (get_selectivity_dispatcher(items[i], enum_operator::EQUALS_TO,
1719 typelib, &equals_to_selectivity)) {
1720 if (items[i]->null_value) {
1721 // WHERE col1 NOT IN (..., NULL, ...) will return zero rows.
1722 *selectivity = 0.0;
1723 return false;
1724 }
1725
1726 return true; /* purecov: deadcode */
1727 }
1728
1729 *selectivity -= equals_to_selectivity;
1730 if (*selectivity <= 0.0) break;
1731 }
1732
1733 /*
1734 Long in-lists may easily estimate a selectivity less than 0.0 in certain
1735 cases.
1736 */
1737 *selectivity = std::max(*selectivity, 0.0);
1738 return false;
1739 }
1740 case enum_operator::IS_NULL:
1741 *selectivity = this->get_null_values_fraction();
1742 return false;
1743 case enum_operator::IS_NOT_NULL:
1744 *selectivity = 1.0 - this->get_null_values_fraction();
1745 return false;
1746 }
1747
1748 /* purecov: begin deadcode */
1749 DBUG_ASSERT(false);
1750 return true;
1751 /* purecov: end deadcode */
1752 }
1753
1754 // Explicit template instantiations.
1755 template Histogram *build_histogram(MEM_ROOT *, const Value_map<double> &,
1756 size_t, const std::string &,
1757 const std::string &, const std::string &);
1758
1759 template Histogram *build_histogram(MEM_ROOT *, const Value_map<String> &,
1760 size_t, const std::string &,
1761 const std::string &, const std::string &);
1762
1763 template Histogram *build_histogram(MEM_ROOT *, const Value_map<ulonglong> &,
1764 size_t, const std::string &,
1765 const std::string &, const std::string &);
1766
1767 template Histogram *build_histogram(MEM_ROOT *, const Value_map<longlong> &,
1768 size_t, const std::string &,
1769 const std::string &, const std::string &);
1770
1771 template Histogram *build_histogram(MEM_ROOT *, const Value_map<MYSQL_TIME> &,
1772 size_t, const std::string &,
1773 const std::string &, const std::string &);
1774
1775 template Histogram *build_histogram(MEM_ROOT *, const Value_map<my_decimal> &,
1776 size_t, const std::string &,
1777 const std::string &, const std::string &);
1778
1779 } // namespace histograms
1780