1 /*****************************************************************************
2
3 Copyright (c) 2009, 2021, Oracle and/or its affiliates.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation. The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License, version 2.0, for more details.
20
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24
25 *****************************************************************************/
26
27 /**************************************************//**
28 @file dict/dict0stats.cc
29 Code used for calculating and manipulating table statistics.
30
31 Created Jan 06, 2010 Vasil Dimov
32 *******************************************************/
33
34 #ifndef UNIV_HOTBACKUP
35
36 #include "univ.i"
37
38 #include "ut0ut.h"
39 #include "ut0rnd.h"
40 #include "dyn0buf.h"
41 #include "row0sel.h"
42 #include "trx0trx.h"
43 #include "pars0pars.h"
44 #include "dict0stats.h"
45 #include "ha_prototypes.h"
46 #include "ut0new.h"
47 #include <mysql_com.h>
48 #include "row0mysql.h"
49
50 #include <algorithm>
51 #include <map>
52 #include <vector>
53
54 /* Sampling algorithm description @{
55
56 The algorithm is controlled by one number - N_SAMPLE_PAGES(index),
57 let it be A, which is the number of leaf pages to analyze for a given index
58 for each n-prefix (if the index is on 3 columns, then 3*A leaf pages will be
59 analyzed).
60
61 Let the total number of leaf pages in the table be T.
62 Level 0 - leaf pages, level H - root.
63
64 Definition: N-prefix-boring record is a record on a non-leaf page that equals
65 the next (to the right, cross page boundaries, skipping the supremum and
66 infimum) record on the same level when looking at the fist n-prefix columns.
67 The last (user) record on a level is not boring (it does not match the
68 non-existent user record to the right). We call the records boring because all
69 the records on the page below a boring record are equal to that boring record.
70
71 We avoid diving below boring records when searching for a leaf page to
72 estimate the number of distinct records because we know that such a leaf
73 page will have number of distinct records == 1.
74
75 For each n-prefix: start from the root level and full scan subsequent lower
76 levels until a level that contains at least A*10 distinct records is found.
77 Lets call this level LA.
78 As an optimization the search is canceled if it has reached level 1 (never
79 descend to the level 0 (leaf)) and also if the next level to be scanned
80 would contain more than A pages. The latter is because the user has asked
81 to analyze A leaf pages and it does not make sense to scan much more than
82 A non-leaf pages with the sole purpose of finding a good sample of A leaf
83 pages.
84
85 After finding the appropriate level LA with >A*10 distinct records (or less in
86 the exceptions described above), divide it into groups of equal records and
87 pick A such groups. Then pick the last record from each group. For example,
88 let the level be:
89
90 index: 0,1,2,3,4,5,6,7,8,9,10
91 record: 1,1,1,2,2,7,7,7,7,7,9
92
93 There are 4 groups of distinct records and if A=2 random ones are selected,
94 e.g. 1,1,1 and 7,7,7,7,7, then records with indexes 2 and 9 will be selected.
95
96 After selecting A records as described above, dive below them to find A leaf
97 pages and analyze them, finding the total number of distinct records. The
98 dive to the leaf level is performed by selecting a non-boring record from
99 each page and diving below it.
100
101 This way, a total of A leaf pages are analyzed for the given n-prefix.
102
103 Let the number of different key values found in each leaf page i be Pi (i=1..A).
104 Let N_DIFF_AVG_LEAF be (P1 + P2 + ... + PA) / A.
105 Let the number of different key values on level LA be N_DIFF_LA.
106 Let the total number of records on level LA be TOTAL_LA.
107 Let R be N_DIFF_LA / TOTAL_LA, we assume this ratio is the same on the
108 leaf level.
109 Let the number of leaf pages be N.
110 Then the total number of different key values on the leaf level is:
111 N * R * N_DIFF_AVG_LEAF.
112 See REF01 for the implementation.
113
114 The above describes how to calculate the cardinality of an index.
115 This algorithm is executed for each n-prefix of a multi-column index
116 where n=1..n_uniq.
117 @} */
118
119 /* names of the tables from the persistent statistics storage */
120 #define TABLE_STATS_NAME "mysql/innodb_table_stats"
121 #define TABLE_STATS_NAME_PRINT "mysql.innodb_table_stats"
122 #define INDEX_STATS_NAME "mysql/innodb_index_stats"
123 #define INDEX_STATS_NAME_PRINT "mysql.innodb_index_stats"
124
125 #ifdef UNIV_STATS_DEBUG
126 #define DEBUG_PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__)
127 #else /* UNIV_STATS_DEBUG */
128 #define DEBUG_PRINTF(fmt, ...) /* noop */
129 #endif /* UNIV_STATS_DEBUG */
130
131 /* Gets the number of leaf pages to sample in persistent stats estimation */
132 #define N_SAMPLE_PAGES(index) \
133 static_cast<ib_uint64_t>( \
134 (index)->table->stats_sample_pages != 0 \
135 ? (index)->table->stats_sample_pages \
136 : srv_stats_persistent_sample_pages)
137
138 /* number of distinct records on a given level that are required to stop
139 descending to lower levels and fetch N_SAMPLE_PAGES(index) records
140 from that level */
141 #define N_DIFF_REQUIRED(index) (N_SAMPLE_PAGES(index) * 10)
142
143 /* A dynamic array where we store the boundaries of each distinct group
144 of keys. For example if a btree level is:
145 index: 0,1,2,3,4,5,6,7,8,9,10,11,12
146 data: b,b,b,b,b,b,g,g,j,j,j, x, y
147 then we would store 5,7,10,11,12 in the array. */
148 typedef std::vector<ib_uint64_t, ut_allocator<ib_uint64_t> > boundaries_t;
149
150 /** Allocator type used for index_map_t. */
151 typedef ut_allocator<std::pair<const char* const, dict_index_t*> >
152 index_map_t_allocator;
153
154 /** Auxiliary map used for sorting indexes by name in dict_stats_save(). */
155 typedef std::map<const char*, dict_index_t*, ut_strcmp_functor,
156 index_map_t_allocator> index_map_t;
157
158 /*********************************************************************//**
159 Checks whether an index should be ignored in stats manipulations:
160 * stats fetch
161 * stats recalc
162 * stats save
163 @return true if exists and all tables are ok */
164 UNIV_INLINE
165 bool
dict_stats_should_ignore_index(const dict_index_t * index)166 dict_stats_should_ignore_index(
167 /*===========================*/
168 const dict_index_t* index) /*!< in: index */
169 {
170 return((index->type & DICT_FTS)
171 || dict_index_is_corrupted(index)
172 || dict_index_is_spatial(index)
173 || index->to_be_dropped
174 || !index->is_committed());
175 }
176
177 /*********************************************************************//**
178 Checks whether the persistent statistics storage exists and that all
179 tables have the proper structure.
180 @return true if exists and all tables are ok */
181 static
182 bool
dict_stats_persistent_storage_check(bool caller_has_dict_sys_mutex)183 dict_stats_persistent_storage_check(
184 /*================================*/
185 bool caller_has_dict_sys_mutex) /*!< in: true if the caller
186 owns dict_sys->mutex */
187 {
188 /* definition for the table TABLE_STATS_NAME */
189 dict_col_meta_t table_stats_columns[] = {
190 {"database_name", DATA_VARMYSQL,
191 DATA_NOT_NULL, 192},
192
193 {"table_name", DATA_VARMYSQL,
194 DATA_NOT_NULL, 597},
195
196 {"last_update", DATA_FIXBINARY,
197 DATA_NOT_NULL, 4},
198
199 {"n_rows", DATA_INT,
200 DATA_NOT_NULL | DATA_UNSIGNED, 8},
201
202 {"clustered_index_size", DATA_INT,
203 DATA_NOT_NULL | DATA_UNSIGNED, 8},
204
205 {"sum_of_other_index_sizes", DATA_INT,
206 DATA_NOT_NULL | DATA_UNSIGNED, 8}
207 };
208 dict_table_schema_t table_stats_schema = {
209 TABLE_STATS_NAME,
210 UT_ARR_SIZE(table_stats_columns),
211 table_stats_columns,
212 0 /* n_foreign */,
213 0 /* n_referenced */
214 };
215
216 /* definition for the table INDEX_STATS_NAME */
217 dict_col_meta_t index_stats_columns[] = {
218 {"database_name", DATA_VARMYSQL,
219 DATA_NOT_NULL, 192},
220
221 {"table_name", DATA_VARMYSQL,
222 DATA_NOT_NULL, 597},
223
224 {"index_name", DATA_VARMYSQL,
225 DATA_NOT_NULL, 192},
226
227 {"last_update", DATA_FIXBINARY,
228 DATA_NOT_NULL, 4},
229
230 {"stat_name", DATA_VARMYSQL,
231 DATA_NOT_NULL, 64*3},
232
233 {"stat_value", DATA_INT,
234 DATA_NOT_NULL | DATA_UNSIGNED, 8},
235
236 {"sample_size", DATA_INT,
237 DATA_UNSIGNED, 8},
238
239 {"stat_description", DATA_VARMYSQL,
240 DATA_NOT_NULL, 1024*3}
241 };
242 dict_table_schema_t index_stats_schema = {
243 INDEX_STATS_NAME,
244 UT_ARR_SIZE(index_stats_columns),
245 index_stats_columns,
246 0 /* n_foreign */,
247 0 /* n_referenced */
248 };
249
250 char errstr[512];
251 dberr_t ret;
252
253 if (!caller_has_dict_sys_mutex) {
254 mutex_enter(&dict_sys->mutex);
255 }
256
257 ut_ad(mutex_own(&dict_sys->mutex));
258
259 /* first check table_stats */
260 ret = dict_table_schema_check(&table_stats_schema, errstr,
261 sizeof(errstr));
262 if (ret == DB_SUCCESS) {
263 /* if it is ok, then check index_stats */
264 ret = dict_table_schema_check(&index_stats_schema, errstr,
265 sizeof(errstr));
266 }
267
268 if (!caller_has_dict_sys_mutex) {
269 mutex_exit(&dict_sys->mutex);
270 }
271
272 if (ret != DB_SUCCESS) {
273 ib::error() << errstr;
274 return(false);
275 }
276 /* else */
277
278 return(true);
279 }
280
281 /** Executes a given SQL statement using the InnoDB internal SQL parser.
282 This function will free the pinfo object.
283 @param[in,out] pinfo pinfo to pass to que_eval_sql() must already
284 have any literals bound to it
285 @param[in] sql SQL string to execute
286 @param[in,out] trx in case of NULL the function will allocate and
287 free the trx object. If it is not NULL then it will be rolled back
288 only in the case of error, but not freed.
289 @return DB_SUCCESS or error code */
290 static
291 dberr_t
dict_stats_exec_sql(pars_info_t * pinfo,const char * sql,trx_t * trx)292 dict_stats_exec_sql(
293 pars_info_t* pinfo,
294 const char* sql,
295 trx_t* trx)
296 {
297 dberr_t err;
298 bool trx_started = false;
299
300 ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
301 ut_ad(mutex_own(&dict_sys->mutex));
302
303 if (!dict_stats_persistent_storage_check(true)) {
304 pars_info_free(pinfo);
305 return(DB_STATS_DO_NOT_EXIST);
306 }
307
308 if (trx == NULL) {
309 trx = trx_allocate_for_background();
310 trx_started = true;
311
312 if (srv_read_only_mode) {
313 trx_start_internal_read_only(trx);
314 } else {
315 trx_start_internal(trx);
316 }
317 }
318
319 err = que_eval_sql(pinfo, sql, FALSE, trx); /* pinfo is freed here */
320
321 DBUG_EXECUTE_IF("stats_index_error",
322 if (!trx_started) {
323 err = DB_STATS_DO_NOT_EXIST;
324 trx->error_state = DB_STATS_DO_NOT_EXIST;
325 });
326
327 if (!trx_started && err == DB_SUCCESS) {
328 return(DB_SUCCESS);
329 }
330
331 if (err == DB_SUCCESS) {
332 trx_commit_for_mysql(trx);
333 } else {
334 trx->op_info = "rollback of internal trx on stats tables";
335 trx->dict_operation_lock_mode = RW_X_LATCH;
336 trx_rollback_to_savepoint(trx, NULL);
337 trx->dict_operation_lock_mode = 0;
338 trx->op_info = "";
339 ut_a(trx->error_state == DB_SUCCESS);
340 }
341
342 if (trx_started) {
343 trx_free_for_background(trx);
344 }
345
346 return(err);
347 }
348
349 /*********************************************************************//**
350 Duplicate a table object and its indexes.
351 This function creates a dummy dict_table_t object and initializes the
352 following table and index members:
353 dict_table_t::id (copied)
354 dict_table_t::heap (newly created)
355 dict_table_t::name (copied)
356 dict_table_t::corrupted (copied)
357 dict_table_t::indexes<> (newly created)
358 dict_table_t::magic_n
359 for each entry in dict_table_t::indexes, the following are initialized:
360 (indexes that have DICT_FTS set in index->type are skipped)
361 dict_index_t::id (copied)
362 dict_index_t::name (copied)
363 dict_index_t::table_name (points to the copied table name)
364 dict_index_t::table (points to the above semi-initialized object)
365 dict_index_t::type (copied)
366 dict_index_t::to_be_dropped (copied)
367 dict_index_t::online_status (copied)
368 dict_index_t::n_uniq (copied)
369 dict_index_t::fields[] (newly created, only first n_uniq, only fields[i].name)
370 dict_index_t::indexes<> (newly created)
371 dict_index_t::stat_n_diff_key_vals[] (only allocated, left uninitialized)
372 dict_index_t::stat_n_sample_sizes[] (only allocated, left uninitialized)
373 dict_index_t::stat_n_non_null_key_vals[] (only allocated, left uninitialized)
374 dict_index_t::magic_n
375 The returned object should be freed with dict_stats_table_clone_free()
376 when no longer needed.
377 @return incomplete table object */
378 static
379 dict_table_t*
dict_stats_table_clone_create(const dict_table_t * table)380 dict_stats_table_clone_create(
381 /*==========================*/
382 const dict_table_t* table) /*!< in: table whose stats to copy */
383 {
384 size_t heap_size;
385 dict_index_t* index;
386
387 /* Estimate the size needed for the table and all of its indexes */
388
389 heap_size = 0;
390 heap_size += sizeof(dict_table_t);
391 heap_size += strlen(table->name.m_name) + 1;
392
393 for (index = dict_table_get_first_index(table);
394 index != NULL;
395 index = dict_table_get_next_index(index)) {
396
397 if (dict_stats_should_ignore_index(index)) {
398 continue;
399 }
400
401 ut_ad(!dict_index_is_ibuf(index));
402
403 ulint n_uniq = dict_index_get_n_unique(index);
404
405 heap_size += sizeof(dict_index_t);
406 heap_size += strlen(index->name) + 1;
407 heap_size += n_uniq * sizeof(index->fields[0]);
408 for (ulint i = 0; i < n_uniq; i++) {
409 heap_size += strlen(index->fields[i].name) + 1;
410 }
411 heap_size += n_uniq * sizeof(index->stat_n_diff_key_vals[0]);
412 heap_size += n_uniq * sizeof(index->stat_n_sample_sizes[0]);
413 heap_size += n_uniq * sizeof(index->stat_n_non_null_key_vals[0]);
414 }
415
416 /* Allocate the memory and copy the members */
417
418 mem_heap_t* heap;
419
420 heap = mem_heap_create(heap_size);
421
422 dict_table_t* t;
423
424 t = (dict_table_t*) mem_heap_alloc(heap, sizeof(*t));
425
426 UNIV_MEM_ASSERT_RW_ABORT(&table->id, sizeof(table->id));
427 t->id = table->id;
428
429 t->heap = heap;
430
431 t->name.m_name = mem_heap_strdup(heap, table->name.m_name);
432
433 t->corrupted = table->corrupted;
434
435 /* This private object "t" is not shared with other threads, so
436 we do not need the stats_latch (thus we pass false below). The
437 dict_table_stats_lock()/unlock() routines will do nothing. */
438 dict_table_stats_latch_create(t, false);
439
440 UT_LIST_INIT(t->indexes, &dict_index_t::indexes);
441
442 for (index = dict_table_get_first_index(table);
443 index != NULL;
444 index = dict_table_get_next_index(index)) {
445
446 if (dict_stats_should_ignore_index(index)) {
447 continue;
448 }
449
450 ut_ad(!dict_index_is_ibuf(index));
451
452 dict_index_t* idx;
453
454 idx = (dict_index_t*) mem_heap_alloc(heap, sizeof(*idx));
455
456 UNIV_MEM_ASSERT_RW_ABORT(&index->id, sizeof(index->id));
457 idx->id = index->id;
458
459 idx->name = mem_heap_strdup(heap, index->name);
460
461 idx->table_name = t->name.m_name;
462
463 idx->table = t;
464
465 idx->type = index->type;
466
467 idx->to_be_dropped = 0;
468
469 idx->online_status = ONLINE_INDEX_COMPLETE;
470 idx->set_committed(true);
471
472 idx->n_uniq = index->n_uniq;
473
474 idx->fields = (dict_field_t*) mem_heap_alloc(
475 heap, idx->n_uniq * sizeof(idx->fields[0]));
476
477 for (ulint i = 0; i < idx->n_uniq; i++) {
478 idx->fields[i].name = mem_heap_strdup(
479 heap, index->fields[i].name);
480 }
481
482 /* hook idx into t->indexes */
483 UT_LIST_ADD_LAST(t->indexes, idx);
484
485 idx->stat_n_diff_key_vals = (ib_uint64_t*) mem_heap_alloc(
486 heap,
487 idx->n_uniq * sizeof(idx->stat_n_diff_key_vals[0]));
488
489 idx->stat_n_sample_sizes = (ib_uint64_t*) mem_heap_alloc(
490 heap,
491 idx->n_uniq * sizeof(idx->stat_n_sample_sizes[0]));
492
493 idx->stat_n_non_null_key_vals = (ib_uint64_t*) mem_heap_alloc(
494 heap,
495 idx->n_uniq * sizeof(idx->stat_n_non_null_key_vals[0]));
496 ut_d(idx->magic_n = DICT_INDEX_MAGIC_N);
497 }
498
499 ut_d(t->magic_n = DICT_TABLE_MAGIC_N);
500
501 return(t);
502 }
503
504 /*********************************************************************//**
505 Free the resources occupied by an object returned by
506 dict_stats_table_clone_create(). */
507 static
508 void
dict_stats_table_clone_free(dict_table_t * t)509 dict_stats_table_clone_free(
510 /*========================*/
511 dict_table_t* t) /*!< in: dummy table object to free */
512 {
513 dict_table_stats_latch_destroy(t);
514 mem_heap_free(t->heap);
515 }
516
517 /*********************************************************************//**
518 Write all zeros (or 1 where it makes sense) into an index
519 statistics members. The resulting stats correspond to an empty index.
520 The caller must own index's table stats latch in X mode
521 (dict_table_stats_lock(table, RW_X_LATCH)) */
522 static
523 void
dict_stats_empty_index(dict_index_t * index)524 dict_stats_empty_index(
525 /*===================*/
526 dict_index_t* index) /*!< in/out: index */
527 {
528 ut_ad(!(index->type & DICT_FTS));
529 ut_ad(!dict_index_is_ibuf(index));
530
531 ulint n_uniq = index->n_uniq;
532
533 for (ulint i = 0; i < n_uniq; i++) {
534 index->stat_n_diff_key_vals[i] = 0;
535 index->stat_n_sample_sizes[i] = 1;
536 index->stat_n_non_null_key_vals[i] = 0;
537 }
538
539 index->stat_index_size = 1;
540 index->stat_n_leaf_pages = 1;
541 }
542
543 /*********************************************************************//**
544 Write all zeros (or 1 where it makes sense) into a table and its indexes'
545 statistics members. The resulting stats correspond to an empty table. */
546 static
547 void
dict_stats_empty_table(dict_table_t * table)548 dict_stats_empty_table(
549 /*===================*/
550 dict_table_t* table) /*!< in/out: table */
551 {
552 /* Zero the stats members */
553
554 dict_table_stats_lock(table, RW_X_LATCH);
555
556 table->stat_n_rows = 0;
557 table->stat_clustered_index_size = 1;
558 /* 1 page for each index, not counting the clustered */
559 table->stat_sum_of_other_index_sizes
560 = UT_LIST_GET_LEN(table->indexes) - 1;
561 table->stat_modified_counter = 0;
562
563 dict_index_t* index;
564
565 for (index = dict_table_get_first_index(table);
566 index != NULL;
567 index = dict_table_get_next_index(index)) {
568
569 if (index->type & DICT_FTS) {
570 continue;
571 }
572
573 ut_ad(!dict_index_is_ibuf(index));
574
575 dict_stats_empty_index(index);
576 }
577
578 table->stat_initialized = TRUE;
579
580 dict_table_stats_unlock(table, RW_X_LATCH);
581 }
582
583 /*********************************************************************//**
584 Check whether index's stats are initialized (assert if they are not). */
585 static
586 void
dict_stats_assert_initialized_index(const dict_index_t * index)587 dict_stats_assert_initialized_index(
588 /*================================*/
589 const dict_index_t* index) /*!< in: index */
590 {
591 UNIV_MEM_ASSERT_RW_ABORT(
592 index->stat_n_diff_key_vals,
593 index->n_uniq * sizeof(index->stat_n_diff_key_vals[0]));
594
595 UNIV_MEM_ASSERT_RW_ABORT(
596 index->stat_n_sample_sizes,
597 index->n_uniq * sizeof(index->stat_n_sample_sizes[0]));
598
599 UNIV_MEM_ASSERT_RW_ABORT(
600 index->stat_n_non_null_key_vals,
601 index->n_uniq * sizeof(index->stat_n_non_null_key_vals[0]));
602
603 UNIV_MEM_ASSERT_RW_ABORT(
604 &index->stat_index_size,
605 sizeof(index->stat_index_size));
606
607 UNIV_MEM_ASSERT_RW_ABORT(
608 &index->stat_n_leaf_pages,
609 sizeof(index->stat_n_leaf_pages));
610 }
611
612 /*********************************************************************//**
613 Check whether table's stats are initialized (assert if they are not). */
614 static
615 void
dict_stats_assert_initialized(const dict_table_t * table)616 dict_stats_assert_initialized(
617 /*==========================*/
618 const dict_table_t* table) /*!< in: table */
619 {
620 ut_a(table->stat_initialized);
621
622 UNIV_MEM_ASSERT_RW_ABORT(&table->stats_last_recalc,
623 sizeof(table->stats_last_recalc));
624
625 UNIV_MEM_ASSERT_RW_ABORT(&table->stat_persistent,
626 sizeof(table->stat_persistent));
627
628 UNIV_MEM_ASSERT_RW_ABORT(&table->stats_auto_recalc,
629 sizeof(table->stats_auto_recalc));
630
631 UNIV_MEM_ASSERT_RW_ABORT(&table->stats_sample_pages,
632 sizeof(table->stats_sample_pages));
633
634 UNIV_MEM_ASSERT_RW_ABORT(&table->stat_n_rows,
635 sizeof(table->stat_n_rows));
636
637 UNIV_MEM_ASSERT_RW_ABORT(&table->stat_clustered_index_size,
638 sizeof(table->stat_clustered_index_size));
639
640 UNIV_MEM_ASSERT_RW_ABORT(&table->stat_sum_of_other_index_sizes,
641 sizeof(table->stat_sum_of_other_index_sizes));
642
643 UNIV_MEM_ASSERT_RW_ABORT(&table->stat_modified_counter,
644 sizeof(table->stat_modified_counter));
645
646 UNIV_MEM_ASSERT_RW_ABORT(&table->stats_bg_flag,
647 sizeof(table->stats_bg_flag));
648
649 for (dict_index_t* index = dict_table_get_first_index(table);
650 index != NULL;
651 index = dict_table_get_next_index(index)) {
652
653 if (!dict_stats_should_ignore_index(index)) {
654 dict_stats_assert_initialized_index(index);
655 }
656 }
657 }
658
659 #define INDEX_EQ(i1, i2) \
660 ((i1) != NULL \
661 && (i2) != NULL \
662 && (i1)->id == (i2)->id \
663 && strcmp((i1)->name, (i2)->name) == 0)
664
665 /*********************************************************************//**
666 Copy table and index statistics from one table to another, including index
667 stats. Extra indexes in src are ignored and extra indexes in dst are
668 initialized to correspond to an empty index. */
669 static
670 void
dict_stats_copy(dict_table_t * dst,const dict_table_t * src)671 dict_stats_copy(
672 /*============*/
673 dict_table_t* dst, /*!< in/out: destination table */
674 const dict_table_t* src) /*!< in: source table */
675 {
676 dst->stats_last_recalc = src->stats_last_recalc;
677 dst->stat_n_rows = src->stat_n_rows;
678 dst->stat_clustered_index_size = src->stat_clustered_index_size;
679 dst->stat_sum_of_other_index_sizes = src->stat_sum_of_other_index_sizes;
680 dst->stat_modified_counter = src->stat_modified_counter;
681
682 dict_index_t* dst_idx;
683 dict_index_t* src_idx;
684
685 for (dst_idx = dict_table_get_first_index(dst),
686 src_idx = dict_table_get_first_index(src);
687 dst_idx != NULL;
688 dst_idx = dict_table_get_next_index(dst_idx),
689 (src_idx != NULL
690 && (src_idx = dict_table_get_next_index(src_idx)))) {
691
692 if (dict_stats_should_ignore_index(dst_idx)) {
693 if (!(dst_idx->type & DICT_FTS)) {
694 dict_stats_empty_index(dst_idx);
695 }
696 continue;
697 }
698
699 ut_ad(!dict_index_is_ibuf(dst_idx));
700
701 if (!INDEX_EQ(src_idx, dst_idx)) {
702 for (src_idx = dict_table_get_first_index(src);
703 src_idx != NULL;
704 src_idx = dict_table_get_next_index(src_idx)) {
705
706 if (INDEX_EQ(src_idx, dst_idx)) {
707 break;
708 }
709 }
710 }
711
712 if (!INDEX_EQ(src_idx, dst_idx)) {
713 dict_stats_empty_index(dst_idx);
714 continue;
715 }
716
717 ulint n_copy_el;
718
719 if (dst_idx->n_uniq > src_idx->n_uniq) {
720 n_copy_el = src_idx->n_uniq;
721 /* Since src is smaller some elements in dst
722 will remain untouched by the following memmove(),
723 thus we init all of them here. */
724 dict_stats_empty_index(dst_idx);
725 } else {
726 n_copy_el = dst_idx->n_uniq;
727 }
728
729 memmove(dst_idx->stat_n_diff_key_vals,
730 src_idx->stat_n_diff_key_vals,
731 n_copy_el * sizeof(dst_idx->stat_n_diff_key_vals[0]));
732
733 memmove(dst_idx->stat_n_sample_sizes,
734 src_idx->stat_n_sample_sizes,
735 n_copy_el * sizeof(dst_idx->stat_n_sample_sizes[0]));
736
737 memmove(dst_idx->stat_n_non_null_key_vals,
738 src_idx->stat_n_non_null_key_vals,
739 n_copy_el * sizeof(dst_idx->stat_n_non_null_key_vals[0]));
740
741 dst_idx->stat_index_size = src_idx->stat_index_size;
742
743 dst_idx->stat_n_leaf_pages = src_idx->stat_n_leaf_pages;
744 }
745
746 dst->stat_initialized = TRUE;
747 }
748
749 /** Duplicate the stats of a table and its indexes.
750 This function creates a dummy dict_table_t object and copies the input
751 table's stats into it. The returned table object is not in the dictionary
752 cache and cannot be accessed by any other threads. In addition to the
753 members copied in dict_stats_table_clone_create() this function initializes
754 the following:
755 dict_table_t::stat_initialized
756 dict_table_t::stat_persistent
757 dict_table_t::stat_n_rows
758 dict_table_t::stat_clustered_index_size
759 dict_table_t::stat_sum_of_other_index_sizes
760 dict_table_t::stat_modified_counter
761 dict_index_t::stat_n_diff_key_vals[]
762 dict_index_t::stat_n_sample_sizes[]
763 dict_index_t::stat_n_non_null_key_vals[]
764 dict_index_t::stat_index_size
765 dict_index_t::stat_n_leaf_pages
766 The returned object should be freed with dict_stats_snapshot_free()
767 when no longer needed.
768 @param[in] table table whose stats to copy
769 @return incomplete table object */
770 static
771 dict_table_t*
dict_stats_snapshot_create(dict_table_t * table)772 dict_stats_snapshot_create(
773 dict_table_t* table)
774 {
775 mutex_enter(&dict_sys->mutex);
776
777 dict_table_stats_lock(table, RW_S_LATCH);
778
779 dict_stats_assert_initialized(table);
780
781 dict_table_t* t;
782
783 t = dict_stats_table_clone_create(table);
784
785 dict_stats_copy(t, table);
786
787 t->stat_persistent = table->stat_persistent;
788 t->stats_auto_recalc = table->stats_auto_recalc;
789 t->stats_sample_pages = table->stats_sample_pages;
790 t->stats_bg_flag = table->stats_bg_flag;
791
792 dict_table_stats_unlock(table, RW_S_LATCH);
793
794 mutex_exit(&dict_sys->mutex);
795
796 return(t);
797 }
798
799 /*********************************************************************//**
800 Free the resources occupied by an object returned by
801 dict_stats_snapshot_create(). */
802 static
803 void
dict_stats_snapshot_free(dict_table_t * t)804 dict_stats_snapshot_free(
805 /*=====================*/
806 dict_table_t* t) /*!< in: dummy table object to free */
807 {
808 dict_stats_table_clone_free(t);
809 }
810
811 /*********************************************************************//**
812 Calculates new estimates for index statistics. This function is
813 relatively quick and is used to calculate transient statistics that
814 are not saved on disk. This was the only way to calculate statistics
815 before the Persistent Statistics feature was introduced. */
816 static
817 void
dict_stats_update_transient_for_index(dict_index_t * index)818 dict_stats_update_transient_for_index(
819 /*==================================*/
820 dict_index_t* index) /*!< in/out: index */
821 {
822 if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO
823 && (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO
824 || !dict_index_is_clust(index))) {
825 /* If we have set a high innodb_force_recovery
826 level, do not calculate statistics, as a badly
827 corrupted index can cause a crash in it.
828 Initialize some bogus index cardinality
829 statistics, so that the data can be queried in
830 various means, also via secondary indexes. */
831 dict_stats_empty_index(index);
832 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
833 } else if (ibuf_debug && !dict_index_is_clust(index)) {
834 dict_stats_empty_index(index);
835 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
836 } else {
837 mtr_t mtr;
838 ulint size;
839
840 mtr_start(&mtr);
841 dict_disable_redo_if_temporary(index->table, &mtr);
842
843 mtr_s_lock(dict_index_get_lock(index), &mtr);
844
845 size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
846
847 if (size != ULINT_UNDEFINED) {
848 index->stat_index_size = size;
849
850 size = btr_get_size(
851 index, BTR_N_LEAF_PAGES, &mtr);
852 }
853
854 mtr_commit(&mtr);
855
856 switch (size) {
857 case ULINT_UNDEFINED:
858 dict_stats_empty_index(index);
859 return;
860 case 0:
861 /* The root node of the tree is a leaf */
862 size = 1;
863 }
864
865 index->stat_n_leaf_pages = size;
866
867 /* We don't handle the return value since it will be false
868 only when some thread is dropping the table and we don't
869 have to empty the statistics of the to be dropped index */
870 btr_estimate_number_of_different_key_vals(index);
871 }
872 }
873
874 /*********************************************************************//**
875 Calculates new estimates for table and index statistics. This function
876 is relatively quick and is used to calculate transient statistics that
877 are not saved on disk.
878 This was the only way to calculate statistics before the
879 Persistent Statistics feature was introduced. */
880 void
dict_stats_update_transient(dict_table_t * table)881 dict_stats_update_transient(
882 /*========================*/
883 dict_table_t* table) /*!< in/out: table */
884 {
885 dict_index_t* index;
886 ulint sum_of_index_sizes = 0;
887
888 dict_table_analyze_index_lock(table);
889
890 /* Find out the sizes of the indexes and how many different values
891 for the key they approximately have */
892
893 index = dict_table_get_first_index(table);
894
895 if (dict_table_is_discarded(table)) {
896 /* Nothing to do. */
897 dict_stats_empty_table(table);
898 dict_table_analyze_index_unlock(table);
899 return;
900 } else if (index == NULL) {
901 /* Table definition is corrupt */
902
903 ib::warn() << "Table " << table->name
904 << " has no indexes. Cannot calculate statistics.";
905 dict_stats_empty_table(table);
906 dict_table_analyze_index_unlock(table);
907 return;
908 }
909
910 for (; index != NULL; index = dict_table_get_next_index(index)) {
911
912 ut_ad(!dict_index_is_ibuf(index));
913
914 if (index->type & DICT_FTS || dict_index_is_spatial(index)) {
915 continue;
916 }
917
918 dict_stats_empty_index(index);
919
920 if (dict_stats_should_ignore_index(index)) {
921 continue;
922 }
923
924 dict_stats_update_transient_for_index(index);
925
926 sum_of_index_sizes += index->stat_index_size;
927 }
928
929 dict_table_stats_lock(table, RW_X_LATCH);
930
931 index = dict_table_get_first_index(table);
932
933 table->stat_n_rows = index->stat_n_diff_key_vals[
934 dict_index_get_n_unique(index) - 1];
935
936 table->stat_clustered_index_size = index->stat_index_size;
937
938 table->stat_sum_of_other_index_sizes = sum_of_index_sizes
939 - index->stat_index_size;
940
941 table->stats_last_recalc = ut_time_monotonic();
942
943 table->stat_modified_counter = 0;
944
945 table->stat_initialized = TRUE;
946
947 dict_table_stats_unlock(table, RW_X_LATCH);
948
949 dict_table_analyze_index_unlock(table);
950
951 }
952
953 /* @{ Pseudo code about the relation between the following functions
954
955 let N = N_SAMPLE_PAGES(index)
956
957 dict_stats_analyze_index()
958 for each n_prefix
959 search for good enough level:
960 dict_stats_analyze_index_level() // only called if level has <= N pages
961 // full scan of the level in one mtr
962 collect statistics about the given level
963 if we are not satisfied with the level, search next lower level
964 we have found a good enough level here
965 dict_stats_analyze_index_for_n_prefix(that level, stats collected above)
966 // full scan of the level in one mtr
967 dive below some records and analyze the leaf page there:
968 dict_stats_analyze_index_below_cur()
969 @} */
970
971 /*********************************************************************//**
972 Find the total number and the number of distinct keys on a given level in
973 an index. Each of the 1..n_uniq prefixes are looked up and the results are
974 saved in the array n_diff[0] .. n_diff[n_uniq - 1]. The total number of
975 records on the level is saved in total_recs.
976 Also, the index of the last record in each group of equal records is saved
977 in n_diff_boundaries[0..n_uniq - 1], records indexing starts from the leftmost
978 record on the level and continues cross pages boundaries, counting from 0. */
979 static
980 void
dict_stats_analyze_index_level(dict_index_t * index,ulint level,ib_uint64_t * n_diff,ib_uint64_t * total_recs,ib_uint64_t * total_pages,boundaries_t * n_diff_boundaries,mtr_t * mtr)981 dict_stats_analyze_index_level(
982 /*===========================*/
983 dict_index_t* index, /*!< in: index */
984 ulint level, /*!< in: level */
985 ib_uint64_t* n_diff, /*!< out: array for number of
986 distinct keys for all prefixes */
987 ib_uint64_t* total_recs, /*!< out: total number of records */
988 ib_uint64_t* total_pages, /*!< out: total number of pages */
989 boundaries_t* n_diff_boundaries,/*!< out: boundaries of the groups
990 of distinct keys */
991 mtr_t* mtr) /*!< in/out: mini-transaction */
992 {
993 ulint n_uniq;
994 mem_heap_t* heap;
995 btr_pcur_t pcur;
996 const page_t* page;
997 const rec_t* rec;
998 const rec_t* prev_rec;
999 bool prev_rec_is_copied;
1000 byte* prev_rec_buf = NULL;
1001 ulint prev_rec_buf_size = 0;
1002 ulint* rec_offsets;
1003 ulint* prev_rec_offsets;
1004 ulint i;
1005
1006 DEBUG_PRINTF(" %s(table=%s, index=%s, level=%lu)\n", __func__,
1007 index->table->name, index->name, level);
1008
1009 ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
1010 MTR_MEMO_SX_LOCK));
1011
1012 n_uniq = dict_index_get_n_unique(index);
1013
1014 /* elements in the n_diff array are 0..n_uniq-1 (inclusive) */
1015 memset(n_diff, 0x0, n_uniq * sizeof(n_diff[0]));
1016
1017 /* Allocate space for the offsets header (the allocation size at
1018 offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_fields + 1,
1019 so that this will never be less than the size calculated in
1020 rec_get_offsets_func(). */
1021 i = (REC_OFFS_HEADER_SIZE + 1 + 1) + index->n_fields;
1022
1023 heap = mem_heap_create((2 * sizeof *rec_offsets) * i);
1024 rec_offsets = static_cast<ulint*>(
1025 mem_heap_alloc(heap, i * sizeof *rec_offsets));
1026 prev_rec_offsets = static_cast<ulint*>(
1027 mem_heap_alloc(heap, i * sizeof *prev_rec_offsets));
1028 rec_offs_set_n_alloc(rec_offsets, i);
1029 rec_offs_set_n_alloc(prev_rec_offsets, i);
1030
1031 /* reset the dynamic arrays n_diff_boundaries[0..n_uniq-1] */
1032 if (n_diff_boundaries != NULL) {
1033 for (i = 0; i < n_uniq; i++) {
1034 n_diff_boundaries[i].erase(
1035 n_diff_boundaries[i].begin(),
1036 n_diff_boundaries[i].end());
1037 }
1038 }
1039
1040 /* Position pcur on the leftmost record on the leftmost page
1041 on the desired level. */
1042
1043 btr_pcur_open_at_index_side(
1044 true, index, BTR_SEARCH_TREE | BTR_ALREADY_S_LATCHED,
1045 &pcur, true, level, mtr);
1046 btr_pcur_move_to_next_on_page(&pcur);
1047
1048 page = btr_pcur_get_page(&pcur);
1049
1050 /* The page must not be empty, except when
1051 it is the root page (and the whole index is empty). */
1052 ut_ad(btr_pcur_is_on_user_rec(&pcur) || page_is_leaf(page));
1053 ut_ad(btr_pcur_get_rec(&pcur)
1054 == page_rec_get_next_const(page_get_infimum_rec(page)));
1055
1056 /* check that we are indeed on the desired level */
1057 ut_a(btr_page_get_level(page, mtr) == level);
1058
1059 /* there should not be any pages on the left */
1060 ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
1061
1062 /* check whether the first record on the leftmost page is marked
1063 as such, if we are on a non-leaf level */
1064 ut_a((level == 0)
1065 == !(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
1066 btr_pcur_get_rec(&pcur), page_is_comp(page))));
1067
1068 prev_rec = NULL;
1069 prev_rec_is_copied = false;
1070
1071 /* no records by default */
1072 *total_recs = 0;
1073
1074 *total_pages = 0;
1075
1076 /* iterate over all user records on this level
1077 and compare each two adjacent ones, even the last on page
1078 X and the fist on page X+1 */
1079 for (;
1080 btr_pcur_is_on_user_rec(&pcur);
1081 btr_pcur_move_to_next_user_rec(&pcur, mtr)) {
1082
1083 bool rec_is_last_on_page;
1084
1085 rec = btr_pcur_get_rec(&pcur);
1086
1087 /* If rec and prev_rec are on different pages, then prev_rec
1088 must have been copied, because we hold latch only on the page
1089 where rec resides. */
1090 if (prev_rec != NULL
1091 && page_align(rec) != page_align(prev_rec)) {
1092
1093 ut_a(prev_rec_is_copied);
1094 }
1095
1096 rec_is_last_on_page =
1097 page_rec_is_supremum(page_rec_get_next_const(rec));
1098
1099 /* increment the pages counter at the end of each page */
1100 if (rec_is_last_on_page) {
1101
1102 (*total_pages)++;
1103 }
1104
1105 /* Skip delete-marked records on the leaf level. If we
1106 do not skip them, then ANALYZE quickly after DELETE
1107 could count them or not (purge may have already wiped
1108 them away) which brings non-determinism. We skip only
1109 leaf-level delete marks because delete marks on
1110 non-leaf level do not make sense. */
1111
1112 if (level == 0 && (srv_stats_include_delete_marked ? 0:
1113 rec_get_deleted_flag(
1114 rec,
1115 page_is_comp(btr_pcur_get_page(&pcur))))) {
1116
1117 if (rec_is_last_on_page
1118 && !prev_rec_is_copied
1119 && prev_rec != NULL) {
1120 /* copy prev_rec */
1121
1122 prev_rec_offsets = rec_get_offsets(
1123 prev_rec, index, prev_rec_offsets,
1124 n_uniq, &heap);
1125
1126 prev_rec = rec_copy_prefix_to_buf(
1127 prev_rec, index,
1128 rec_offs_n_fields(prev_rec_offsets),
1129 &prev_rec_buf, &prev_rec_buf_size);
1130
1131 prev_rec_is_copied = true;
1132 }
1133
1134 continue;
1135 }
1136 rec_offsets = rec_get_offsets(
1137 rec, index, rec_offsets, n_uniq, &heap);
1138
1139 (*total_recs)++;
1140
1141 if (prev_rec != NULL) {
1142 ulint matched_fields;
1143
1144 prev_rec_offsets = rec_get_offsets(
1145 prev_rec, index, prev_rec_offsets,
1146 n_uniq, &heap);
1147
1148 cmp_rec_rec_with_match(rec,
1149 prev_rec,
1150 rec_offsets,
1151 prev_rec_offsets,
1152 index,
1153 false,
1154 false,
1155 &matched_fields);
1156
1157 for (i = matched_fields; i < n_uniq; i++) {
1158
1159 if (n_diff_boundaries != NULL) {
1160 /* push the index of the previous
1161 record, that is - the last one from
1162 a group of equal keys */
1163
1164 ib_uint64_t idx;
1165
1166 /* the index of the current record
1167 is total_recs - 1, the index of the
1168 previous record is total_recs - 2;
1169 we know that idx is not going to
1170 become negative here because if we
1171 are in this branch then there is a
1172 previous record and thus
1173 total_recs >= 2 */
1174 idx = *total_recs - 2;
1175
1176 n_diff_boundaries[i].push_back(idx);
1177 }
1178
1179 /* increment the number of different keys
1180 for n_prefix=i+1 (e.g. if i=0 then we increment
1181 for n_prefix=1 which is stored in n_diff[0]) */
1182 n_diff[i]++;
1183 }
1184 } else {
1185 /* this is the first non-delete marked record */
1186 for (i = 0; i < n_uniq; i++) {
1187 n_diff[i] = 1;
1188 }
1189 }
1190
1191 if (rec_is_last_on_page) {
1192 /* end of a page has been reached */
1193
1194 /* we need to copy the record instead of assigning
1195 like prev_rec = rec; because when we traverse the
1196 records on this level at some point we will jump from
1197 one page to the next and then rec and prev_rec will
1198 be on different pages and
1199 btr_pcur_move_to_next_user_rec() will release the
1200 latch on the page that prev_rec is on */
1201 prev_rec = rec_copy_prefix_to_buf(
1202 rec, index, rec_offs_n_fields(rec_offsets),
1203 &prev_rec_buf, &prev_rec_buf_size);
1204 prev_rec_is_copied = true;
1205
1206 } else {
1207 /* still on the same page, the next call to
1208 btr_pcur_move_to_next_user_rec() will not jump
1209 on the next page, we can simply assign pointers
1210 instead of copying the records like above */
1211
1212 prev_rec = rec;
1213 prev_rec_is_copied = false;
1214 }
1215 }
1216
1217 /* if *total_pages is left untouched then the above loop was not
1218 entered at all and there is one page in the whole tree which is
1219 empty or the loop was entered but this is level 0, contains one page
1220 and all records are delete-marked */
1221 if (*total_pages == 0) {
1222
1223 ut_ad(level == 0);
1224 ut_ad(*total_recs == 0);
1225
1226 *total_pages = 1;
1227 }
1228
1229 /* if there are records on this level and boundaries
1230 should be saved */
1231 if (*total_recs > 0 && n_diff_boundaries != NULL) {
1232
1233 /* remember the index of the last record on the level as the
1234 last one from the last group of equal keys; this holds for
1235 all possible prefixes */
1236 for (i = 0; i < n_uniq; i++) {
1237 ib_uint64_t idx;
1238
1239 idx = *total_recs - 1;
1240
1241 n_diff_boundaries[i].push_back(idx);
1242 }
1243 }
1244
1245 /* now in n_diff_boundaries[i] there are exactly n_diff[i] integers,
1246 for i=0..n_uniq-1 */
1247
1248 #ifdef UNIV_STATS_DEBUG
1249 for (i = 0; i < n_uniq; i++) {
1250
1251 DEBUG_PRINTF(" %s(): total recs: " UINT64PF
1252 ", total pages: " UINT64PF
1253 ", n_diff[%lu]: " UINT64PF "\n",
1254 __func__, *total_recs,
1255 *total_pages,
1256 i, n_diff[i]);
1257
1258 #if 0
1259 if (n_diff_boundaries != NULL) {
1260 ib_uint64_t j;
1261
1262 DEBUG_PRINTF(" %s(): boundaries[%lu]: ",
1263 __func__, i);
1264
1265 for (j = 0; j < n_diff[i]; j++) {
1266 ib_uint64_t idx;
1267
1268 idx = n_diff_boundaries[i][j];
1269
1270 DEBUG_PRINTF(UINT64PF "=" UINT64PF ", ",
1271 j, idx);
1272 }
1273 DEBUG_PRINTF("\n");
1274 }
1275 #endif
1276 }
1277 #endif /* UNIV_STATS_DEBUG */
1278
1279 /* Release the latch on the last page, because that is not done by
1280 btr_pcur_close(). This function works also for non-leaf pages. */
1281 btr_leaf_page_release(btr_pcur_get_block(&pcur), BTR_SEARCH_LEAF, mtr);
1282
1283 btr_pcur_close(&pcur);
1284 ut_free(prev_rec_buf);
1285 mem_heap_free(heap);
1286 }
1287
1288 /* aux enum for controlling the behavior of dict_stats_scan_page() @{ */
1289 enum page_scan_method_t {
1290 COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED,/* scan all records on
1291 the given page and count the number of
1292 distinct ones, also ignore delete marked
1293 records */
1294 QUIT_ON_FIRST_NON_BORING,/* quit when the first record that differs
1295 from its right neighbor is found */
1296 COUNT_ALL_NON_BORING_INCLUDE_DEL_MARKED/* scan all records on
1297 the given page and count the number of
1298 distinct ones, include delete marked
1299 records */
1300 };
1301 /* @} */
1302
1303 /** Scan a page, reading records from left to right and counting the number
1304 of distinct records (looking only at the first n_prefix
1305 columns) and the number of external pages pointed by records from this page.
1306 If scan_method is QUIT_ON_FIRST_NON_BORING then the function
1307 will return as soon as it finds a record that does not match its neighbor
1308 to the right, which means that in the case of QUIT_ON_FIRST_NON_BORING the
1309 returned n_diff can either be 0 (empty page), 1 (the whole page has all keys
1310 equal) or 2 (the function found a non-boring record and returned).
1311 @param[out] out_rec record, or NULL
1312 @param[out] offsets1 rec_get_offsets() working space (must
1313 be big enough)
1314 @param[out] offsets2 rec_get_offsets() working space (must
1315 be big enough)
1316 @param[in] index index of the page
1317 @param[in] page the page to scan
1318 @param[in] n_prefix look at the first n_prefix columns
1319 @param[in] scan_method scan to the end of the page or not
1320 @param[out] n_diff number of distinct records encountered
1321 @param[out] n_external_pages if this is non-NULL then it will be set
1322 to the number of externally stored pages which were encountered
1323 @return offsets1 or offsets2 (the offsets of *out_rec),
1324 or NULL if the page is empty and does not contain user records. */
1325 UNIV_INLINE
1326 ulint*
dict_stats_scan_page(const rec_t ** out_rec,ulint * offsets1,ulint * offsets2,const dict_index_t * index,const page_t * page,ulint n_prefix,page_scan_method_t scan_method,ib_uint64_t * n_diff,ib_uint64_t * n_external_pages)1327 dict_stats_scan_page(
1328 const rec_t** out_rec,
1329 ulint* offsets1,
1330 ulint* offsets2,
1331 const dict_index_t* index,
1332 const page_t* page,
1333 ulint n_prefix,
1334 page_scan_method_t scan_method,
1335 ib_uint64_t* n_diff,
1336 ib_uint64_t* n_external_pages)
1337 {
1338 ulint* offsets_rec = offsets1;
1339 ulint* offsets_next_rec = offsets2;
1340 const rec_t* rec;
1341 const rec_t* next_rec;
1342 /* A dummy heap, to be passed to rec_get_offsets().
1343 Because offsets1,offsets2 should be big enough,
1344 this memory heap should never be used. */
1345 mem_heap_t* heap = NULL;
1346 const rec_t* (*get_next)(const rec_t*);
1347
1348 if (scan_method == COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED) {
1349 get_next = page_rec_get_next_non_del_marked;
1350 } else {
1351 get_next = page_rec_get_next_const;
1352 }
1353
1354 const bool should_count_external_pages = n_external_pages != NULL;
1355
1356 if (should_count_external_pages) {
1357 *n_external_pages = 0;
1358 }
1359
1360 rec = get_next(page_get_infimum_rec(page));
1361
1362 if (page_rec_is_supremum(rec)) {
1363 /* the page is empty or contains only delete-marked records */
1364 *n_diff = 0;
1365 *out_rec = NULL;
1366 return(NULL);
1367 }
1368
1369 offsets_rec = rec_get_offsets(rec, index, offsets_rec,
1370 ULINT_UNDEFINED, &heap);
1371
1372 if (should_count_external_pages) {
1373 *n_external_pages += btr_rec_get_externally_stored_len(
1374 rec, offsets_rec);
1375 }
1376
1377 next_rec = get_next(rec);
1378
1379 *n_diff = 1;
1380
1381 while (!page_rec_is_supremum(next_rec)) {
1382
1383 ulint matched_fields;
1384
1385 offsets_next_rec = rec_get_offsets(next_rec, index,
1386 offsets_next_rec,
1387 ULINT_UNDEFINED,
1388 &heap);
1389
1390 /* check whether rec != next_rec when looking at
1391 the first n_prefix fields */
1392 cmp_rec_rec_with_match(rec, next_rec,
1393 offsets_rec, offsets_next_rec,
1394 index, false, false, &matched_fields);
1395
1396 if (matched_fields < n_prefix) {
1397 /* rec != next_rec, => rec is non-boring */
1398
1399 (*n_diff)++;
1400
1401 if (scan_method == QUIT_ON_FIRST_NON_BORING) {
1402 break;
1403 }
1404 }
1405
1406 rec = next_rec;
1407 {
1408 /* Assign offsets_rec = offsets_next_rec
1409 so that offsets_rec matches with rec which
1410 was just assigned rec = next_rec above.
1411 Also need to point offsets_next_rec to the
1412 place where offsets_rec was pointing before
1413 because we have just 2 placeholders where
1414 data is actually stored:
1415 offsets1 and offsets2 and we
1416 are using them in circular fashion
1417 (offsets[_next]_rec are just pointers to
1418 those placeholders). */
1419 ulint* offsets_tmp;
1420 offsets_tmp = offsets_rec;
1421 offsets_rec = offsets_next_rec;
1422 offsets_next_rec = offsets_tmp;
1423 }
1424
1425 if (should_count_external_pages) {
1426 *n_external_pages += btr_rec_get_externally_stored_len(
1427 rec, offsets_rec);
1428 }
1429
1430 next_rec = get_next(next_rec);
1431 }
1432
1433 /* offsets1,offsets2 should have been big enough */
1434 ut_a(heap == NULL);
1435 *out_rec = rec;
1436 return(offsets_rec);
1437 }
1438
1439 /** Dive below the current position of a cursor and calculate the number of
1440 distinct records on the leaf page, when looking at the fist n_prefix
1441 columns. Also calculate the number of external pages pointed by records
1442 on the leaf page.
1443 @param[in] cur cursor
1444 @param[in] n_prefix look at the first n_prefix columns
1445 when comparing records
1446 @param[out] n_diff number of distinct records
1447 @param[out] n_external_pages number of external pages
1448 @return number of distinct records on the leaf page */
1449 static
1450 void
dict_stats_analyze_index_below_cur(const btr_cur_t * cur,ulint n_prefix,ib_uint64_t * n_diff,ib_uint64_t * n_external_pages)1451 dict_stats_analyze_index_below_cur(
1452 const btr_cur_t* cur,
1453 ulint n_prefix,
1454 ib_uint64_t* n_diff,
1455 ib_uint64_t* n_external_pages)
1456 {
1457 dict_index_t* index;
1458 buf_block_t* block;
1459 const page_t* page;
1460 mem_heap_t* heap;
1461 const rec_t* rec;
1462 ulint* offsets1;
1463 ulint* offsets2;
1464 ulint* offsets_rec;
1465 ulint size;
1466 mtr_t mtr;
1467
1468 index = btr_cur_get_index(cur);
1469
1470 /* Allocate offsets for the record and the node pointer, for
1471 node pointer records. In a secondary index, the node pointer
1472 record will consist of all index fields followed by a child
1473 page number.
1474 Allocate space for the offsets header (the allocation size at
1475 offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_fields + 1,
1476 so that this will never be less than the size calculated in
1477 rec_get_offsets_func(). */
1478 size = (1 + REC_OFFS_HEADER_SIZE) + 1 + dict_index_get_n_fields(index);
1479
1480 heap = mem_heap_create(size * (sizeof *offsets1 + sizeof *offsets2));
1481
1482 offsets1 = static_cast<ulint*>(mem_heap_alloc(
1483 heap, size * sizeof *offsets1));
1484
1485 offsets2 = static_cast<ulint*>(mem_heap_alloc(
1486 heap, size * sizeof *offsets2));
1487
1488 rec_offs_set_n_alloc(offsets1, size);
1489 rec_offs_set_n_alloc(offsets2, size);
1490
1491 rec = btr_cur_get_rec(cur);
1492
1493 offsets_rec = rec_get_offsets(rec, index, offsets1,
1494 ULINT_UNDEFINED, &heap);
1495
1496 page_id_t page_id(dict_index_get_space(index),
1497 btr_node_ptr_get_child_page_no(
1498 rec, offsets_rec));
1499 const page_size_t page_size(dict_table_page_size(index->table));
1500
1501 /* assume no external pages by default - in case we quit from this
1502 function without analyzing any leaf pages */
1503 *n_external_pages = 0;
1504
1505 mtr_start(&mtr);
1506
1507 /* descend to the leaf level on the B-tree */
1508 for (;;) {
1509
1510 block = buf_page_get_gen(page_id, page_size, RW_S_LATCH,
1511 NULL /* no guessed block */,
1512 BUF_GET, __FILE__, __LINE__, &mtr);
1513
1514 page = buf_block_get_frame(block);
1515
1516 if (btr_page_get_level(page, mtr) == 0) {
1517 /* leaf level */
1518 break;
1519 }
1520 /* else */
1521
1522 /* search for the first non-boring record on the page */
1523 offsets_rec = dict_stats_scan_page(
1524 &rec, offsets1, offsets2, index, page, n_prefix,
1525 QUIT_ON_FIRST_NON_BORING, n_diff, NULL);
1526
1527 /* pages on level > 0 are not allowed to be empty */
1528 ut_a(offsets_rec != NULL);
1529 /* if page is not empty (offsets_rec != NULL) then n_diff must
1530 be > 0, otherwise there is a bug in dict_stats_scan_page() */
1531 ut_a(*n_diff > 0);
1532
1533 if (*n_diff == 1) {
1534 mtr_commit(&mtr);
1535
1536 /* page has all keys equal and the end of the page
1537 was reached by dict_stats_scan_page(), no need to
1538 descend to the leaf level */
1539 mem_heap_free(heap);
1540 /* can't get an estimate for n_external_pages here
1541 because we do not dive to the leaf level, assume no
1542 external pages (*n_external_pages was assigned to 0
1543 above). */
1544 return;
1545 }
1546 /* else */
1547
1548 /* when we instruct dict_stats_scan_page() to quit on the
1549 first non-boring record it finds, then the returned n_diff
1550 can either be 0 (empty page), 1 (page has all keys equal) or
1551 2 (non-boring record was found) */
1552 ut_a(*n_diff == 2);
1553
1554 /* we have a non-boring record in rec, descend below it */
1555
1556 page_id.set_page_no(
1557 btr_node_ptr_get_child_page_no(rec, offsets_rec));
1558 }
1559
1560 /* make sure we got a leaf page as a result from the above loop */
1561 ut_ad(btr_page_get_level(page, &mtr) == 0);
1562
1563 /* scan the leaf page and find the number of distinct keys,
1564 when looking only at the first n_prefix columns; also estimate
1565 the number of externally stored pages pointed by records on this
1566 page */
1567
1568 offsets_rec = dict_stats_scan_page(
1569 &rec, offsets1, offsets2, index, page, n_prefix,
1570 srv_stats_include_delete_marked ?
1571 COUNT_ALL_NON_BORING_INCLUDE_DEL_MARKED:
1572 COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED, n_diff,
1573 n_external_pages);
1574
1575 #if 0
1576 DEBUG_PRINTF(" %s(): n_diff below page_no=%lu: " UINT64PF "\n",
1577 __func__, page_no, n_diff);
1578 #endif
1579
1580 mtr_commit(&mtr);
1581 mem_heap_free(heap);
1582 }
1583
1584 /** Input data that is used to calculate dict_index_t::stat_n_diff_key_vals[]
1585 for each n-columns prefix (n from 1 to n_uniq). */
1586 struct n_diff_data_t {
1587 /** Index of the level on which the descent through the btree
1588 stopped. level 0 is the leaf level. This is >= 1 because we
1589 avoid scanning the leaf level because it may contain too many
1590 pages and doing so is useless when combined with the random dives -
1591 if we are to scan the leaf level, this means a full scan and we can
1592 simply do that instead of fiddling with picking random records higher
1593 in the tree and to dive below them. At the start of the analyzing
1594 we may decide to do full scan of the leaf level, but then this
1595 structure is not used in that code path. */
1596 ulint level;
1597
1598 /** Number of records on the level where the descend through the btree
1599 stopped. When we scan the btree from the root, we stop at some mid
1600 level, choose some records from it and dive below them towards a leaf
1601 page to analyze. */
1602 ib_uint64_t n_recs_on_level;
1603
1604 /** Number of different key values that were found on the mid level. */
1605 ib_uint64_t n_diff_on_level;
1606
1607 /** Number of leaf pages that are analyzed. This is also the same as
1608 the number of records that we pick from the mid level and dive below
1609 them. */
1610 ib_uint64_t n_leaf_pages_to_analyze;
1611
1612 /** Cumulative sum of the number of different key values that were
1613 found on all analyzed pages. */
1614 ib_uint64_t n_diff_all_analyzed_pages;
1615
1616 /** Cumulative sum of the number of external pages (stored outside of
1617 the btree but in the same file segment). */
1618 ib_uint64_t n_external_pages_sum;
1619 };
1620
1621 /** Estimate the number of different key values in an index when looking at
1622 the first n_prefix columns. For a given level in an index select
1623 n_diff_data->n_leaf_pages_to_analyze records from that level and dive below
1624 them to the corresponding leaf pages, then scan those leaf pages and save the
1625 sampling results in n_diff_data->n_diff_all_analyzed_pages.
1626 @param[in] index index
1627 @param[in] n_prefix look at first 'n_prefix' columns when
1628 comparing records
1629 @param[in] boundaries a vector that contains
1630 n_diff_data->n_diff_on_level integers each of which represents the index (on
1631 level 'level', counting from left/smallest to right/biggest from 0) of the
1632 last record from each group of distinct keys
1633 @param[in,out] n_diff_data n_diff_all_analyzed_pages and
1634 n_external_pages_sum in this structure will be set by this function. The
1635 members level, n_diff_on_level and n_leaf_pages_to_analyze must be set by the
1636 caller in advance - they are used by some calculations inside this function
1637 @param[in,out] mtr mini-transaction */
1638 static
1639 void
dict_stats_analyze_index_for_n_prefix(dict_index_t * index,ulint n_prefix,const boundaries_t * boundaries,n_diff_data_t * n_diff_data,mtr_t * mtr)1640 dict_stats_analyze_index_for_n_prefix(
1641 dict_index_t* index,
1642 ulint n_prefix,
1643 const boundaries_t* boundaries,
1644 n_diff_data_t* n_diff_data,
1645 mtr_t* mtr)
1646 {
1647 btr_pcur_t pcur;
1648 const page_t* page;
1649 ib_uint64_t rec_idx;
1650 ib_uint64_t i;
1651
1652 #if 0
1653 DEBUG_PRINTF(" %s(table=%s, index=%s, level=%lu, n_prefix=%lu,"
1654 " n_diff_on_level=" UINT64PF ")\n",
1655 __func__, index->table->name, index->name, level,
1656 n_prefix, n_diff_data->n_diff_on_level);
1657 #endif
1658
1659 ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
1660 MTR_MEMO_SX_LOCK));
1661
1662 /* Position pcur on the leftmost record on the leftmost page
1663 on the desired level. */
1664
1665 btr_pcur_open_at_index_side(
1666 true, index, BTR_SEARCH_TREE | BTR_ALREADY_S_LATCHED,
1667 &pcur, true, n_diff_data->level, mtr);
1668 btr_pcur_move_to_next_on_page(&pcur);
1669
1670 page = btr_pcur_get_page(&pcur);
1671
1672 const rec_t* first_rec = btr_pcur_get_rec(&pcur);
1673
1674 /* We shouldn't be scanning the leaf level. The caller of this function
1675 should have stopped the descend on level 1 or higher. */
1676 ut_ad(n_diff_data->level > 0);
1677 ut_ad(!page_is_leaf(page));
1678
1679 /* The page must not be empty, except when
1680 it is the root page (and the whole index is empty). */
1681 ut_ad(btr_pcur_is_on_user_rec(&pcur));
1682 ut_ad(first_rec == page_rec_get_next_const(page_get_infimum_rec(page)));
1683
1684 /* check that we are indeed on the desired level */
1685 ut_a(btr_page_get_level(page, mtr) == n_diff_data->level);
1686
1687 /* there should not be any pages on the left */
1688 ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
1689
1690 /* check whether the first record on the leftmost page is marked
1691 as such; we are on a non-leaf level */
1692 ut_a(rec_get_info_bits(first_rec, page_is_comp(page))
1693 & REC_INFO_MIN_REC_FLAG);
1694
1695 const ib_uint64_t last_idx_on_level = boundaries->at(
1696 static_cast<unsigned>(n_diff_data->n_diff_on_level - 1));
1697
1698 rec_idx = 0;
1699
1700 n_diff_data->n_diff_all_analyzed_pages = 0;
1701 n_diff_data->n_external_pages_sum = 0;
1702
1703 for (i = 0; i < n_diff_data->n_leaf_pages_to_analyze; i++) {
1704 /* there are n_diff_on_level elements
1705 in 'boundaries' and we divide those elements
1706 into n_leaf_pages_to_analyze segments, for example:
1707
1708 let n_diff_on_level=100, n_leaf_pages_to_analyze=4, then:
1709 segment i=0: [0, 24]
1710 segment i=1: [25, 49]
1711 segment i=2: [50, 74]
1712 segment i=3: [75, 99] or
1713
1714 let n_diff_on_level=1, n_leaf_pages_to_analyze=1, then:
1715 segment i=0: [0, 0] or
1716
1717 let n_diff_on_level=2, n_leaf_pages_to_analyze=2, then:
1718 segment i=0: [0, 0]
1719 segment i=1: [1, 1] or
1720
1721 let n_diff_on_level=13, n_leaf_pages_to_analyze=7, then:
1722 segment i=0: [0, 0]
1723 segment i=1: [1, 2]
1724 segment i=2: [3, 4]
1725 segment i=3: [5, 6]
1726 segment i=4: [7, 8]
1727 segment i=5: [9, 10]
1728 segment i=6: [11, 12]
1729
1730 then we select a random record from each segment and dive
1731 below it */
1732 const ib_uint64_t n_diff = n_diff_data->n_diff_on_level;
1733 const ib_uint64_t n_pick
1734 = n_diff_data->n_leaf_pages_to_analyze;
1735
1736 const ib_uint64_t left = n_diff * i / n_pick;
1737 const ib_uint64_t right = n_diff * (i + 1) / n_pick - 1;
1738
1739 ut_a(left <= right);
1740 ut_a(right <= last_idx_on_level);
1741
1742 /* we do not pass (left, right) because we do not want to ask
1743 ut_rnd_interval() to work with too big numbers since
1744 ib_uint64_t could be bigger than ulint */
1745 const ulint rnd = ut_rnd_interval(
1746 0, static_cast<ulint>(right - left));
1747
1748 const ib_uint64_t dive_below_idx
1749 = boundaries->at(static_cast<unsigned>(left + rnd));
1750
1751 #if 0
1752 DEBUG_PRINTF(" %s(): dive below record with index="
1753 UINT64PF "\n", __func__, dive_below_idx);
1754 #endif
1755
1756 /* seek to the record with index dive_below_idx */
1757 while (rec_idx < dive_below_idx
1758 && btr_pcur_is_on_user_rec(&pcur)) {
1759
1760 btr_pcur_move_to_next_user_rec(&pcur, mtr);
1761 rec_idx++;
1762 }
1763
1764 /* if the level has finished before the record we are
1765 searching for, this means that the B-tree has changed in
1766 the meantime, quit our sampling and use whatever stats
1767 we have collected so far */
1768 if (rec_idx < dive_below_idx) {
1769
1770 ut_ad(!btr_pcur_is_on_user_rec(&pcur));
1771 break;
1772 }
1773
1774 /* it could be that the tree has changed in such a way that
1775 the record under dive_below_idx is the supremum record, in
1776 this case rec_idx == dive_below_idx and pcur is positioned
1777 on the supremum, we do not want to dive below it */
1778 if (!btr_pcur_is_on_user_rec(&pcur)) {
1779 break;
1780 }
1781
1782 ut_a(rec_idx == dive_below_idx);
1783
1784 ib_uint64_t n_diff_on_leaf_page;
1785 ib_uint64_t n_external_pages;
1786
1787 dict_stats_analyze_index_below_cur(btr_pcur_get_btr_cur(&pcur),
1788 n_prefix,
1789 &n_diff_on_leaf_page,
1790 &n_external_pages);
1791
1792 /* We adjust n_diff_on_leaf_page here to avoid counting
1793 one value twice - once as the last on some page and once
1794 as the first on another page. Consider the following example:
1795 Leaf level:
1796 page: (2,2,2,2,3,3)
1797 ... many pages like (3,3,3,3,3,3) ...
1798 page: (3,3,3,3,5,5)
1799 ... many pages like (5,5,5,5,5,5) ...
1800 page: (5,5,5,5,8,8)
1801 page: (8,8,8,8,9,9)
1802 our algo would (correctly) get an estimate that there are
1803 2 distinct records per page (average). Having 4 pages below
1804 non-boring records, it would (wrongly) estimate the number
1805 of distinct records to 8. */
1806 if (n_diff_on_leaf_page > 0) {
1807 n_diff_on_leaf_page--;
1808 }
1809
1810 n_diff_data->n_diff_all_analyzed_pages += n_diff_on_leaf_page;
1811
1812 n_diff_data->n_external_pages_sum += n_external_pages;
1813 }
1814
1815 btr_pcur_close(&pcur);
1816 }
1817
1818 /** Set dict_index_t::stat_n_diff_key_vals[] and stat_n_sample_sizes[].
1819 @param[in] n_diff_data input data to use to derive the results
1820 @param[in,out] index index whose stat_n_diff_key_vals[] to set */
1821 UNIV_INLINE
1822 void
dict_stats_index_set_n_diff(const n_diff_data_t * n_diff_data,dict_index_t * index)1823 dict_stats_index_set_n_diff(
1824 const n_diff_data_t* n_diff_data,
1825 dict_index_t* index)
1826 {
1827 for (ulint n_prefix = dict_index_get_n_unique(index);
1828 n_prefix >= 1;
1829 n_prefix--) {
1830 /* n_diff_all_analyzed_pages can be 0 here if
1831 all the leaf pages sampled contained only
1832 delete-marked records. In this case we should assign
1833 0 to index->stat_n_diff_key_vals[n_prefix - 1], which
1834 the formula below does. */
1835
1836 const n_diff_data_t* data = &n_diff_data[n_prefix - 1];
1837
1838 ut_ad(data->n_leaf_pages_to_analyze > 0);
1839 ut_ad(data->n_recs_on_level > 0);
1840
1841 ib_uint64_t n_ordinary_leaf_pages;
1842
1843 if (data->level == 1) {
1844 /* If we know the number of records on level 1, then
1845 this number is the same as the number of pages on
1846 level 0 (leaf). */
1847 n_ordinary_leaf_pages = data->n_recs_on_level;
1848 } else {
1849 /* If we analyzed D ordinary leaf pages and found E
1850 external pages in total linked from those D ordinary
1851 leaf pages, then this means that the ratio
1852 ordinary/external is D/E. Then the ratio ordinary/total
1853 is D / (D + E). Knowing that the total number of pages
1854 is T (including ordinary and external) then we estimate
1855 that the total number of ordinary leaf pages is
1856 T * D / (D + E). */
1857 n_ordinary_leaf_pages
1858 = index->stat_n_leaf_pages
1859 * data->n_leaf_pages_to_analyze
1860 / (data->n_leaf_pages_to_analyze
1861 + data->n_external_pages_sum);
1862 }
1863
1864 /* See REF01 for an explanation of the algorithm */
1865 index->stat_n_diff_key_vals[n_prefix - 1]
1866 = n_ordinary_leaf_pages
1867
1868 * data->n_diff_on_level
1869 / data->n_recs_on_level
1870
1871 * data->n_diff_all_analyzed_pages
1872 / data->n_leaf_pages_to_analyze;
1873
1874 index->stat_n_sample_sizes[n_prefix - 1]
1875 = data->n_leaf_pages_to_analyze;
1876
1877 DEBUG_PRINTF(" %s(): n_diff=" UINT64PF " for n_prefix=%lu"
1878 " (%lu"
1879 " * " UINT64PF " / " UINT64PF
1880 " * " UINT64PF " / " UINT64PF ")\n",
1881 __func__,
1882 index->stat_n_diff_key_vals[n_prefix - 1],
1883 n_prefix,
1884 index->stat_n_leaf_pages,
1885 data->n_diff_on_level,
1886 data->n_recs_on_level,
1887 data->n_diff_all_analyzed_pages,
1888 data->n_leaf_pages_to_analyze);
1889 }
1890 }
1891
1892 /*********************************************************************//**
1893 Calculates new statistics for a given index and saves them to the index
1894 members stat_n_diff_key_vals[], stat_n_sample_sizes[], stat_index_size and
1895 stat_n_leaf_pages. This function could be slow. */
1896 static
1897 void
dict_stats_analyze_index(dict_index_t * index)1898 dict_stats_analyze_index(
1899 /*=====================*/
1900 dict_index_t* index) /*!< in/out: index to analyze */
1901 {
1902 ulint root_level;
1903 ulint level;
1904 bool level_is_analyzed;
1905 ulint n_uniq;
1906 ulint n_prefix;
1907 ib_uint64_t total_recs;
1908 ib_uint64_t total_pages;
1909 mtr_t mtr;
1910 ulint size;
1911 DBUG_ENTER("dict_stats_analyze_index");
1912
1913 /* stats_latch is created on 1st lock. */
1914 ut_ad(!(index->table->stats_latch_created) ||
1915 !rw_lock_own(index->table->stats_latch, RW_X_LATCH));
1916
1917 DBUG_PRINT("info", ("index: %s, online status: %d", index->name(),
1918 dict_index_get_online_status(index)));
1919
1920 /* Disable update statistic for Rtree */
1921 if (dict_index_is_spatial(index)) {
1922 DBUG_VOID_RETURN;
1923 }
1924
1925 DEBUG_PRINTF(" %s(index=%s)\n", __func__, index->name());
1926
1927 dict_stats_empty_index(index);
1928
1929 mtr_start(&mtr);
1930
1931 mtr_s_lock(dict_index_get_lock(index), &mtr);
1932
1933 size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
1934
1935 if (size != ULINT_UNDEFINED) {
1936 index->stat_index_size = size;
1937 size = btr_get_size(index, BTR_N_LEAF_PAGES, &mtr);
1938 }
1939
1940 /* Release the X locks on the root page taken by btr_get_size() */
1941 mtr_commit(&mtr);
1942
1943 switch (size) {
1944 case ULINT_UNDEFINED:
1945 dict_stats_assert_initialized_index(index);
1946 DBUG_VOID_RETURN;
1947 case 0:
1948 /* The root node of the tree is a leaf */
1949 size = 1;
1950 }
1951
1952 index->stat_n_leaf_pages = size;
1953
1954 mtr_start(&mtr);
1955
1956 mtr_sx_lock(dict_index_get_lock(index), &mtr);
1957
1958 root_level = btr_height_get(index, &mtr);
1959
1960 n_uniq = dict_index_get_n_unique(index);
1961
1962 /* If the tree has just one level (and one page) or if the user
1963 has requested to sample too many pages then do full scan.
1964
1965 For each n-column prefix (for n=1..n_uniq) N_SAMPLE_PAGES(index)
1966 will be sampled, so in total N_SAMPLE_PAGES(index) * n_uniq leaf
1967 pages will be sampled. If that number is bigger than the total
1968 number of leaf pages then do full scan of the leaf level instead
1969 since it will be faster and will give better results. */
1970
1971 if (root_level == 0
1972 || N_SAMPLE_PAGES(index) * n_uniq > index->stat_n_leaf_pages) {
1973
1974 if (root_level == 0) {
1975 DEBUG_PRINTF(" %s(): just one page,"
1976 " doing full scan\n", __func__);
1977 } else {
1978 DEBUG_PRINTF(" %s(): too many pages requested for"
1979 " sampling, doing full scan\n", __func__);
1980 }
1981
1982 /* do full scan of level 0; save results directly
1983 into the index */
1984
1985 dict_stats_analyze_index_level(index,
1986 0 /* leaf level */,
1987 index->stat_n_diff_key_vals,
1988 &total_recs,
1989 &total_pages,
1990 NULL /* boundaries not needed */,
1991 &mtr);
1992
1993 for (ulint i = 0; i < n_uniq; i++) {
1994 index->stat_n_sample_sizes[i] = total_pages;
1995 }
1996
1997 mtr_commit(&mtr);
1998
1999 dict_stats_assert_initialized_index(index);
2000 DBUG_VOID_RETURN;
2001 }
2002
2003 /* For each level that is being scanned in the btree, this contains the
2004 number of different key values for all possible n-column prefixes. */
2005 ib_uint64_t* n_diff_on_level = UT_NEW_ARRAY(
2006 ib_uint64_t, n_uniq, mem_key_dict_stats_n_diff_on_level);
2007
2008 /* For each level that is being scanned in the btree, this contains the
2009 index of the last record from each group of equal records (when
2010 comparing only the first n columns, n=1..n_uniq). */
2011 boundaries_t* n_diff_boundaries = UT_NEW_ARRAY_NOKEY(boundaries_t,
2012 n_uniq);
2013
2014 /* For each n-column prefix this array contains the input data that is
2015 used to calculate dict_index_t::stat_n_diff_key_vals[]. */
2016 n_diff_data_t* n_diff_data = UT_NEW_ARRAY_NOKEY(n_diff_data_t, n_uniq);
2017
2018 /* total_recs is also used to estimate the number of pages on one
2019 level below, so at the start we have 1 page (the root) */
2020 total_recs = 1;
2021
2022 /* Here we use the following optimization:
2023 If we find that level L is the first one (searching from the
2024 root) that contains at least D distinct keys when looking at
2025 the first n_prefix columns, then:
2026 if we look at the first n_prefix-1 columns then the first
2027 level that contains D distinct keys will be either L or a
2028 lower one.
2029 So if we find that the first level containing D distinct
2030 keys (on n_prefix columns) is L, we continue from L when
2031 searching for D distinct keys on n_prefix-1 columns. */
2032 level = root_level;
2033 level_is_analyzed = false;
2034
2035 for (n_prefix = n_uniq; n_prefix >= 1; n_prefix--) {
2036
2037 DEBUG_PRINTF(" %s(): searching level with >=%llu"
2038 " distinct records, n_prefix=%lu\n",
2039 __func__, N_DIFF_REQUIRED(index), n_prefix);
2040
2041 /* Commit the mtr to release the tree S lock to allow
2042 other threads to do some work too. */
2043 mtr_commit(&mtr);
2044 mtr_start(&mtr);
2045 mtr_sx_lock(dict_index_get_lock(index), &mtr);
2046 if (root_level != btr_height_get(index, &mtr)) {
2047 /* Just quit if the tree has changed beyond
2048 recognition here. The old stats from previous
2049 runs will remain in the values that we have
2050 not calculated yet. Initially when the index
2051 object is created the stats members are given
2052 some sensible values so leaving them untouched
2053 here even the first time will not cause us to
2054 read uninitialized memory later. */
2055 break;
2056 }
2057
2058 /* check whether we should pick the current level;
2059 we pick level 1 even if it does not have enough
2060 distinct records because we do not want to scan the
2061 leaf level because it may contain too many records */
2062 if (level_is_analyzed
2063 && (n_diff_on_level[n_prefix - 1] >= N_DIFF_REQUIRED(index)
2064 || level == 1)) {
2065
2066 goto found_level;
2067 }
2068
2069 /* search for a level that contains enough distinct records */
2070
2071 if (level_is_analyzed && level > 1) {
2072
2073 /* if this does not hold we should be on
2074 "found_level" instead of here */
2075 ut_ad(n_diff_on_level[n_prefix - 1]
2076 < N_DIFF_REQUIRED(index));
2077
2078 level--;
2079 level_is_analyzed = false;
2080 }
2081
2082 /* descend into the tree, searching for "good enough" level */
2083 for (;;) {
2084
2085 /* make sure we do not scan the leaf level
2086 accidentally, it may contain too many pages */
2087 ut_ad(level > 0);
2088
2089 /* scanning the same level twice is an optimization
2090 bug */
2091 ut_ad(!level_is_analyzed);
2092
2093 /* Do not scan if this would read too many pages.
2094 Here we use the following fact:
2095 the number of pages on level L equals the number
2096 of records on level L+1, thus we deduce that the
2097 following call would scan total_recs pages, because
2098 total_recs is left from the previous iteration when
2099 we scanned one level upper or we have not scanned any
2100 levels yet in which case total_recs is 1. */
2101 if (total_recs > N_SAMPLE_PAGES(index)) {
2102
2103 /* if the above cond is true then we are
2104 not at the root level since on the root
2105 level total_recs == 1 (set before we
2106 enter the n-prefix loop) and cannot
2107 be > N_SAMPLE_PAGES(index) */
2108 ut_a(level != root_level);
2109
2110 /* step one level back and be satisfied with
2111 whatever it contains */
2112 level++;
2113 level_is_analyzed = true;
2114
2115 break;
2116 }
2117
2118 dict_stats_analyze_index_level(index,
2119 level,
2120 n_diff_on_level,
2121 &total_recs,
2122 &total_pages,
2123 n_diff_boundaries,
2124 &mtr);
2125
2126 level_is_analyzed = true;
2127
2128 if (level == 1
2129 || n_diff_on_level[n_prefix - 1]
2130 >= N_DIFF_REQUIRED(index)) {
2131 /* we have reached the last level we could scan
2132 or we found a good level with many distinct
2133 records */
2134 break;
2135 }
2136
2137 level--;
2138 level_is_analyzed = false;
2139 }
2140 found_level:
2141
2142 DEBUG_PRINTF(" %s(): found level %lu that has " UINT64PF
2143 " distinct records for n_prefix=%lu\n",
2144 __func__, level, n_diff_on_level[n_prefix - 1],
2145 n_prefix);
2146 /* here we are either on level 1 or the level that we are on
2147 contains >= N_DIFF_REQUIRED distinct keys or we did not scan
2148 deeper levels because they would contain too many pages */
2149
2150 ut_ad(level > 0);
2151
2152 ut_ad(level_is_analyzed);
2153
2154 /* if any of these is 0 then there is exactly one page in the
2155 B-tree and it is empty and we should have done full scan and
2156 should not be here */
2157 ut_ad(total_recs > 0);
2158 ut_ad(n_diff_on_level[n_prefix - 1] > 0);
2159
2160 ut_ad(N_SAMPLE_PAGES(index) > 0);
2161
2162 n_diff_data_t* data = &n_diff_data[n_prefix - 1];
2163
2164 data->level = level;
2165
2166 data->n_recs_on_level = total_recs;
2167
2168 data->n_diff_on_level = n_diff_on_level[n_prefix - 1];
2169
2170 data->n_leaf_pages_to_analyze = std::min(
2171 N_SAMPLE_PAGES(index),
2172 n_diff_on_level[n_prefix - 1]);
2173
2174 /* pick some records from this level and dive below them for
2175 the given n_prefix */
2176
2177 dict_stats_analyze_index_for_n_prefix(
2178 index, n_prefix, &n_diff_boundaries[n_prefix - 1],
2179 data, &mtr);
2180 }
2181
2182 mtr_commit(&mtr);
2183
2184 UT_DELETE_ARRAY(n_diff_boundaries);
2185
2186 UT_DELETE_ARRAY(n_diff_on_level);
2187
2188 /* n_prefix == 0 means that the above loop did not end up prematurely
2189 due to tree being changed and so n_diff_data[] is set up. */
2190 if (n_prefix == 0) {
2191 dict_stats_index_set_n_diff(n_diff_data, index);
2192 }
2193
2194 UT_DELETE_ARRAY(n_diff_data);
2195
2196 dict_stats_assert_initialized_index(index);
2197 DBUG_VOID_RETURN;
2198 }
2199
2200 /*********************************************************************//**
2201 Calculates new estimates for table and index statistics. This function
2202 is relatively slow and is used to calculate persistent statistics that
2203 will be saved on disk.
2204 @return DB_SUCCESS or error code */
2205 static
2206 dberr_t
dict_stats_update_persistent(dict_table_t * table)2207 dict_stats_update_persistent(
2208 /*=========================*/
2209 dict_table_t* table) /*!< in/out: table */
2210 {
2211 dict_index_t* index;
2212
2213 DEBUG_PRINTF("%s(table=%s)\n", __func__, table->name);
2214
2215 dict_table_analyze_index_lock(table);
2216
2217 DEBUG_SYNC_C("innodb_dict_stats_update_persistent");
2218
2219 /* analyze the clustered index first */
2220
2221 index = dict_table_get_first_index(table);
2222
2223 if (index == NULL
2224 || dict_index_is_corrupted(index)
2225 || (index->type | DICT_UNIQUE) != (DICT_CLUSTERED | DICT_UNIQUE)) {
2226
2227 /* Table definition is corrupt */
2228 dict_stats_empty_table(table);
2229 dict_table_analyze_index_unlock(table);
2230
2231 return(DB_CORRUPTION);
2232 }
2233
2234 ut_ad(!dict_index_is_ibuf(index));
2235
2236 dict_stats_analyze_index(index);
2237
2238 ulint n_unique = dict_index_get_n_unique(index);
2239
2240 ib_uint64_t stat_n_rows_tmp = index->stat_n_diff_key_vals[n_unique - 1];
2241
2242 ib_uint64_t stat_clustered_index_size_tmp = index->stat_index_size;
2243
2244 /* analyze other indexes from the table, if any */
2245
2246 ib_uint64_t stat_sum_of_other_index_sizes_tmp = 0;
2247
2248 for (index = dict_table_get_next_index(index);
2249 index != NULL;
2250 index = dict_table_get_next_index(index)) {
2251
2252 ut_ad(!dict_index_is_ibuf(index));
2253
2254 if (index->type & DICT_FTS || dict_index_is_spatial(index)) {
2255 continue;
2256 }
2257
2258 dict_stats_empty_index(index);
2259
2260 if (dict_stats_should_ignore_index(index)) {
2261 continue;
2262 }
2263
2264 if (!(table->stats_bg_flag & BG_STAT_SHOULD_QUIT)) {
2265 dict_stats_analyze_index(index);
2266 }
2267
2268 stat_sum_of_other_index_sizes_tmp
2269 += index->stat_index_size;
2270 }
2271
2272 dict_table_stats_lock(table, RW_X_LATCH);
2273
2274 table->stat_n_rows = stat_n_rows_tmp;
2275
2276 table->stat_clustered_index_size = stat_clustered_index_size_tmp;
2277
2278 table->stat_sum_of_other_index_sizes = stat_sum_of_other_index_sizes_tmp;
2279
2280 table->stats_last_recalc = ut_time_monotonic();
2281
2282 table->stat_modified_counter = 0;
2283
2284 table->stat_initialized = TRUE;
2285
2286 dict_stats_assert_initialized(table);
2287
2288 dict_table_stats_unlock(table, RW_X_LATCH);
2289
2290 dict_table_analyze_index_unlock(table);
2291
2292 return(DB_SUCCESS);
2293 }
2294
2295 #include "mysql_com.h"
2296 /** Save an individual index's statistic into the persistent statistics
2297 storage.
2298 @param[in] index index to be updated
2299 @param[in] last_update timestamp of the stat
2300 @param[in] stat_name name of the stat
2301 @param[in] stat_value value of the stat
2302 @param[in] sample_size n pages sampled or NULL
2303 @param[in] stat_description description of the stat
2304 @param[in,out] trx in case of NULL the function will
2305 allocate and free the trx object. If it is not NULL then it will be
2306 rolled back only in the case of error, but not freed.
2307 @return DB_SUCCESS or error code */
2308 static
2309 dberr_t
dict_stats_save_index_stat(dict_index_t * index,lint last_update,const char * stat_name,ib_uint64_t stat_value,ib_uint64_t * sample_size,const char * stat_description,trx_t * trx)2310 dict_stats_save_index_stat(
2311 dict_index_t* index,
2312 lint last_update,
2313 const char* stat_name,
2314 ib_uint64_t stat_value,
2315 ib_uint64_t* sample_size,
2316 const char* stat_description,
2317 trx_t* trx)
2318 {
2319 dberr_t ret;
2320 pars_info_t* pinfo;
2321 char db_utf8[MAX_DB_UTF8_LEN];
2322 char table_utf8[MAX_TABLE_UTF8_LEN];
2323
2324 ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
2325 ut_ad(mutex_own(&dict_sys->mutex));
2326
2327 dict_fs2utf8(index->table->name.m_name, db_utf8, sizeof(db_utf8),
2328 table_utf8, sizeof(table_utf8));
2329
2330 pinfo = pars_info_create();
2331 pars_info_add_str_literal(pinfo, "database_name", db_utf8);
2332 pars_info_add_str_literal(pinfo, "table_name", table_utf8);
2333 pars_info_add_str_literal(pinfo, "index_name", index->name);
2334 UNIV_MEM_ASSERT_RW_ABORT(&last_update, 4);
2335 pars_info_add_int4_literal(pinfo, "last_update", last_update);
2336 UNIV_MEM_ASSERT_RW_ABORT(stat_name, strlen(stat_name));
2337 pars_info_add_str_literal(pinfo, "stat_name", stat_name);
2338 UNIV_MEM_ASSERT_RW_ABORT(&stat_value, 8);
2339 pars_info_add_ull_literal(pinfo, "stat_value", stat_value);
2340 if (sample_size != NULL) {
2341 UNIV_MEM_ASSERT_RW_ABORT(sample_size, 8);
2342 pars_info_add_ull_literal(pinfo, "sample_size", *sample_size);
2343 } else {
2344 pars_info_add_literal(pinfo, "sample_size", NULL,
2345 UNIV_SQL_NULL, DATA_FIXBINARY, 0);
2346 }
2347 UNIV_MEM_ASSERT_RW_ABORT(stat_description, strlen(stat_description));
2348 pars_info_add_str_literal(pinfo, "stat_description",
2349 stat_description);
2350
2351 ret = dict_stats_exec_sql(
2352 pinfo,
2353 "PROCEDURE INDEX_STATS_SAVE () IS\n"
2354 "BEGIN\n"
2355
2356 "DELETE FROM \"" INDEX_STATS_NAME "\"\n"
2357 "WHERE\n"
2358 "database_name = :database_name AND\n"
2359 "table_name = :table_name AND\n"
2360 "index_name = :index_name AND\n"
2361 "stat_name = :stat_name;\n"
2362
2363 "INSERT INTO \"" INDEX_STATS_NAME "\"\n"
2364 "VALUES\n"
2365 "(\n"
2366 ":database_name,\n"
2367 ":table_name,\n"
2368 ":index_name,\n"
2369 ":last_update,\n"
2370 ":stat_name,\n"
2371 ":stat_value,\n"
2372 ":sample_size,\n"
2373 ":stat_description\n"
2374 ");\n"
2375 "END;", trx);
2376
2377 if (ret != DB_SUCCESS) {
2378 ib::error() << "Cannot save index statistics for table "
2379 << index->table->name
2380 << ", index " << index->name
2381 << ", stat name \"" << stat_name << "\": "
2382 << ut_strerr(ret);
2383 }
2384
2385 return(ret);
2386 }
2387
2388 /** Save the table's statistics into the persistent statistics storage.
2389 @param[in] table_orig table whose stats to save
2390 @param[in] only_for_index if this is non-NULL, then stats for indexes
2391 that are not equal to it will not be saved, if NULL, then all indexes' stats
2392 are saved
2393 @return DB_SUCCESS or error code */
2394 static
2395 dberr_t
dict_stats_save(dict_table_t * table_orig,const index_id_t * only_for_index)2396 dict_stats_save(
2397 dict_table_t* table_orig,
2398 const index_id_t* only_for_index)
2399 {
2400 pars_info_t* pinfo;
2401 lint now;
2402 dberr_t ret;
2403 dict_table_t* table;
2404 char db_utf8[MAX_DB_UTF8_LEN];
2405 char table_utf8[MAX_TABLE_UTF8_LEN];
2406
2407 table = dict_stats_snapshot_create(table_orig);
2408
2409 dict_fs2utf8(table->name.m_name, db_utf8, sizeof(db_utf8),
2410 table_utf8, sizeof(table_utf8));
2411
2412 rw_lock_x_lock(dict_operation_lock);
2413 mutex_enter(&dict_sys->mutex);
2414
2415 /* MySQL's timestamp is 4 byte, so we use
2416 pars_info_add_int4_literal() which takes a lint arg, so "now" is
2417 lint */
2418 now = (lint) ut_time();
2419
2420 pinfo = pars_info_create();
2421
2422 pars_info_add_str_literal(pinfo, "database_name", db_utf8);
2423 pars_info_add_str_literal(pinfo, "table_name", table_utf8);
2424 pars_info_add_int4_literal(pinfo, "last_update", now);
2425 pars_info_add_ull_literal(pinfo, "n_rows", table->stat_n_rows);
2426 pars_info_add_ull_literal(pinfo, "clustered_index_size",
2427 table->stat_clustered_index_size);
2428 pars_info_add_ull_literal(pinfo, "sum_of_other_index_sizes",
2429 table->stat_sum_of_other_index_sizes);
2430
2431 ret = dict_stats_exec_sql(
2432 pinfo,
2433 "PROCEDURE TABLE_STATS_SAVE () IS\n"
2434 "BEGIN\n"
2435
2436 "DELETE FROM \"" TABLE_STATS_NAME "\"\n"
2437 "WHERE\n"
2438 "database_name = :database_name AND\n"
2439 "table_name = :table_name;\n"
2440
2441 "INSERT INTO \"" TABLE_STATS_NAME "\"\n"
2442 "VALUES\n"
2443 "(\n"
2444 ":database_name,\n"
2445 ":table_name,\n"
2446 ":last_update,\n"
2447 ":n_rows,\n"
2448 ":clustered_index_size,\n"
2449 ":sum_of_other_index_sizes\n"
2450 ");\n"
2451 "END;", NULL);
2452
2453 if (ret != DB_SUCCESS) {
2454 ib::error() << "Cannot save table statistics for table "
2455 << table->name << ": " << ut_strerr(ret);
2456
2457 mutex_exit(&dict_sys->mutex);
2458 rw_lock_x_unlock(dict_operation_lock);
2459
2460 dict_stats_snapshot_free(table);
2461
2462 return(ret);
2463 }
2464
2465 trx_t* trx = trx_allocate_for_background();
2466
2467 if (srv_read_only_mode) {
2468 trx_start_internal_read_only(trx);
2469 } else {
2470 trx_start_internal(trx);
2471 }
2472
2473 dict_index_t* index;
2474 index_map_t indexes(
2475 (ut_strcmp_functor()),
2476 index_map_t_allocator(mem_key_dict_stats_index_map_t));
2477
2478 /* Below we do all the modifications in innodb_index_stats in a single
2479 transaction for performance reasons. Modifying more than one row in a
2480 single transaction may deadlock with other transactions if they
2481 lock the rows in different order. Other transaction could be for
2482 example when we DROP a table and do
2483 DELETE FROM innodb_index_stats WHERE database_name = '...'
2484 AND table_name = '...'; which will affect more than one row. To
2485 prevent deadlocks we always lock the rows in the same order - the
2486 order of the PK, which is (database_name, table_name, index_name,
2487 stat_name). This is why below we sort the indexes by name and then
2488 for each index, do the mods ordered by stat_name. */
2489
2490 for (index = dict_table_get_first_index(table);
2491 index != NULL;
2492 index = dict_table_get_next_index(index)) {
2493
2494 indexes[index->name] = index;
2495 }
2496
2497 index_map_t::const_iterator it;
2498
2499 for (it = indexes.begin(); it != indexes.end(); ++it) {
2500
2501 index = it->second;
2502
2503 if (only_for_index != NULL && index->id != *only_for_index) {
2504 continue;
2505 }
2506
2507 if (dict_stats_should_ignore_index(index)) {
2508 continue;
2509 }
2510
2511 ut_ad(!dict_index_is_ibuf(index));
2512
2513 for (ulint i = 0; i < index->n_uniq; i++) {
2514
2515 char stat_name[16];
2516 char stat_description[1024];
2517 ulint j;
2518
2519 ut_snprintf(stat_name, sizeof(stat_name),
2520 "n_diff_pfx%02lu", i + 1);
2521
2522 /* craft a string that contains the column names */
2523 ut_snprintf(stat_description,
2524 sizeof(stat_description),
2525 "%s", index->fields[0].name());
2526 for (j = 1; j <= i; j++) {
2527 size_t len;
2528
2529 len = strlen(stat_description);
2530
2531 ut_snprintf(stat_description + len,
2532 sizeof(stat_description) - len,
2533 ",%s", index->fields[j].name());
2534 }
2535
2536 ret = dict_stats_save_index_stat(
2537 index, now, stat_name,
2538 index->stat_n_diff_key_vals[i],
2539 &index->stat_n_sample_sizes[i],
2540 stat_description, trx);
2541
2542 if (ret != DB_SUCCESS) {
2543 goto end;
2544 }
2545 }
2546
2547 ret = dict_stats_save_index_stat(index, now, "n_leaf_pages",
2548 index->stat_n_leaf_pages,
2549 NULL,
2550 "Number of leaf pages "
2551 "in the index", trx);
2552 if (ret != DB_SUCCESS) {
2553 goto end;
2554 }
2555
2556 ret = dict_stats_save_index_stat(index, now, "size",
2557 index->stat_index_size,
2558 NULL,
2559 "Number of pages "
2560 "in the index", trx);
2561 if (ret != DB_SUCCESS) {
2562 goto end;
2563 }
2564 }
2565
2566 trx_commit_for_mysql(trx);
2567
2568 end:
2569 trx_free_for_background(trx);
2570
2571 mutex_exit(&dict_sys->mutex);
2572 rw_lock_x_unlock(dict_operation_lock);
2573
2574 dict_stats_snapshot_free(table);
2575
2576 return(ret);
2577 }
2578
2579 /*********************************************************************//**
2580 Called for the row that is selected by
2581 SELECT ... FROM mysql.innodb_table_stats WHERE table='...'
2582 The second argument is a pointer to the table and the fetched stats are
2583 written to it.
2584 @return non-NULL dummy */
2585 static
2586 ibool
dict_stats_fetch_table_stats_step(void * node_void,void * table_void)2587 dict_stats_fetch_table_stats_step(
2588 /*==============================*/
2589 void* node_void, /*!< in: select node */
2590 void* table_void) /*!< out: table */
2591 {
2592 sel_node_t* node = (sel_node_t*) node_void;
2593 dict_table_t* table = (dict_table_t*) table_void;
2594 que_common_t* cnode;
2595 int i;
2596
2597 /* this should loop exactly 3 times - for
2598 n_rows,clustered_index_size,sum_of_other_index_sizes */
2599 for (cnode = static_cast<que_common_t*>(node->select_list), i = 0;
2600 cnode != NULL;
2601 cnode = static_cast<que_common_t*>(que_node_get_next(cnode)),
2602 i++) {
2603
2604 const byte* data;
2605 dfield_t* dfield = que_node_get_val(cnode);
2606 dtype_t* type = dfield_get_type(dfield);
2607 ulint len = dfield_get_len(dfield);
2608
2609 data = static_cast<const byte*>(dfield_get_data(dfield));
2610
2611 switch (i) {
2612 case 0: /* mysql.innodb_table_stats.n_rows */
2613
2614 ut_a(dtype_get_mtype(type) == DATA_INT);
2615 ut_a(len == 8);
2616
2617 table->stat_n_rows = mach_read_from_8(data);
2618
2619 break;
2620
2621 case 1: /* mysql.innodb_table_stats.clustered_index_size */
2622
2623 ut_a(dtype_get_mtype(type) == DATA_INT);
2624 ut_a(len == 8);
2625
2626 table->stat_clustered_index_size
2627 = (ulint) mach_read_from_8(data);
2628
2629 break;
2630
2631 case 2: /* mysql.innodb_table_stats.sum_of_other_index_sizes */
2632
2633 ut_a(dtype_get_mtype(type) == DATA_INT);
2634 ut_a(len == 8);
2635
2636 table->stat_sum_of_other_index_sizes
2637 = (ulint) mach_read_from_8(data);
2638
2639 break;
2640
2641 default:
2642
2643 /* someone changed SELECT
2644 n_rows,clustered_index_size,sum_of_other_index_sizes
2645 to select more columns from innodb_table_stats without
2646 adjusting here */
2647 ut_error;
2648 }
2649 }
2650
2651 /* if i < 3 this means someone changed the
2652 SELECT n_rows,clustered_index_size,sum_of_other_index_sizes
2653 to select less columns from innodb_table_stats without adjusting here;
2654 if i > 3 we would have ut_error'ed earlier */
2655 ut_a(i == 3 /*n_rows,clustered_index_size,sum_of_other_index_sizes*/);
2656
2657 /* XXX this is not used but returning non-NULL is necessary */
2658 return(TRUE);
2659 }
2660
2661 /** Aux struct used to pass a table and a boolean to
2662 dict_stats_fetch_index_stats_step(). */
2663 struct index_fetch_t {
2664 dict_table_t* table; /*!< table whose indexes are to be modified */
2665 bool stats_were_modified; /*!< will be set to true if at
2666 least one index stats were modified */
2667 };
2668
2669 /*********************************************************************//**
2670 Called for the rows that are selected by
2671 SELECT ... FROM mysql.innodb_index_stats WHERE table='...'
2672 The second argument is a pointer to the table and the fetched stats are
2673 written to its indexes.
2674 Let a table has N indexes and each index has Ui unique columns for i=1..N,
2675 then mysql.innodb_index_stats will have SUM(Ui) i=1..N rows for that table.
2676 So this function will be called SUM(Ui) times where SUM(Ui) is of magnitude
2677 N*AVG(Ui). In each call it searches for the currently fetched index into
2678 table->indexes linearly, assuming this list is not sorted. Thus, overall,
2679 fetching all indexes' stats from mysql.innodb_index_stats is O(N^2) where N
2680 is the number of indexes.
2681 This can be improved if we sort table->indexes in a temporary area just once
2682 and then search in that sorted list. Then the complexity will be O(N*log(N)).
2683 We assume a table will not have more than 100 indexes, so we go with the
2684 simpler N^2 algorithm.
2685 @return non-NULL dummy */
2686 static
2687 ibool
dict_stats_fetch_index_stats_step(void * node_void,void * arg_void)2688 dict_stats_fetch_index_stats_step(
2689 /*==============================*/
2690 void* node_void, /*!< in: select node */
2691 void* arg_void) /*!< out: table + a flag that tells if we
2692 modified anything */
2693 {
2694 sel_node_t* node = (sel_node_t*) node_void;
2695 index_fetch_t* arg = (index_fetch_t*) arg_void;
2696 dict_table_t* table = arg->table;
2697 dict_index_t* index = NULL;
2698 que_common_t* cnode;
2699 const char* stat_name = NULL;
2700 ulint stat_name_len = ULINT_UNDEFINED;
2701 ib_uint64_t stat_value = UINT64_UNDEFINED;
2702 ib_uint64_t sample_size = UINT64_UNDEFINED;
2703 int i;
2704
2705 /* this should loop exactly 4 times - for the columns that
2706 were selected: index_name,stat_name,stat_value,sample_size */
2707 for (cnode = static_cast<que_common_t*>(node->select_list), i = 0;
2708 cnode != NULL;
2709 cnode = static_cast<que_common_t*>(que_node_get_next(cnode)),
2710 i++) {
2711
2712 const byte* data;
2713 dfield_t* dfield = que_node_get_val(cnode);
2714 dtype_t* type = dfield_get_type(dfield);
2715 ulint len = dfield_get_len(dfield);
2716
2717 data = static_cast<const byte*>(dfield_get_data(dfield));
2718
2719 switch (i) {
2720 case 0: /* mysql.innodb_index_stats.index_name */
2721
2722 ut_a(dtype_get_mtype(type) == DATA_VARMYSQL);
2723
2724 /* search for index in table's indexes whose name
2725 matches data; the fetched index name is in data,
2726 has no terminating '\0' and has length len */
2727 for (index = dict_table_get_first_index(table);
2728 index != NULL;
2729 index = dict_table_get_next_index(index)) {
2730
2731 if (index->is_committed()
2732 && strlen(index->name) == len
2733 && memcmp(index->name, data, len) == 0) {
2734 /* the corresponding index was found */
2735 break;
2736 }
2737 }
2738
2739 /* if index is NULL here this means that
2740 mysql.innodb_index_stats contains more rows than the
2741 number of indexes in the table; this is ok, we just
2742 return ignoring those extra rows; in other words
2743 dict_stats_fetch_index_stats_step() has been called
2744 for a row from index_stats with unknown index_name
2745 column */
2746 if (index == NULL) {
2747
2748 return(TRUE);
2749 }
2750
2751 break;
2752
2753 case 1: /* mysql.innodb_index_stats.stat_name */
2754
2755 ut_a(dtype_get_mtype(type) == DATA_VARMYSQL);
2756
2757 ut_a(index != NULL);
2758
2759 stat_name = (const char*) data;
2760 stat_name_len = len;
2761
2762 break;
2763
2764 case 2: /* mysql.innodb_index_stats.stat_value */
2765
2766 ut_a(dtype_get_mtype(type) == DATA_INT);
2767 ut_a(len == 8);
2768
2769 ut_a(index != NULL);
2770 ut_a(stat_name != NULL);
2771 ut_a(stat_name_len != ULINT_UNDEFINED);
2772
2773 stat_value = mach_read_from_8(data);
2774
2775 break;
2776
2777 case 3: /* mysql.innodb_index_stats.sample_size */
2778
2779 ut_a(dtype_get_mtype(type) == DATA_INT);
2780 ut_a(len == 8 || len == UNIV_SQL_NULL);
2781
2782 ut_a(index != NULL);
2783 ut_a(stat_name != NULL);
2784 ut_a(stat_name_len != ULINT_UNDEFINED);
2785 ut_a(stat_value != UINT64_UNDEFINED);
2786
2787 if (len == UNIV_SQL_NULL) {
2788 break;
2789 }
2790 /* else */
2791
2792 sample_size = mach_read_from_8(data);
2793
2794 break;
2795
2796 default:
2797
2798 /* someone changed
2799 SELECT index_name,stat_name,stat_value,sample_size
2800 to select more columns from innodb_index_stats without
2801 adjusting here */
2802 ut_error;
2803 }
2804 }
2805
2806 /* if i < 4 this means someone changed the
2807 SELECT index_name,stat_name,stat_value,sample_size
2808 to select less columns from innodb_index_stats without adjusting here;
2809 if i > 4 we would have ut_error'ed earlier */
2810 ut_a(i == 4 /* index_name,stat_name,stat_value,sample_size */);
2811
2812 ut_a(index != NULL);
2813 ut_a(stat_name != NULL);
2814 ut_a(stat_name_len != ULINT_UNDEFINED);
2815 ut_a(stat_value != UINT64_UNDEFINED);
2816 /* sample_size could be UINT64_UNDEFINED here, if it is NULL */
2817
2818 #define PFX "n_diff_pfx"
2819 #define PFX_LEN 10
2820
2821 if (stat_name_len == 4 /* strlen("size") */
2822 && native_strncasecmp("size", stat_name, stat_name_len) == 0) {
2823 index->stat_index_size = (ulint) stat_value;
2824 arg->stats_were_modified = true;
2825 } else if (stat_name_len == 12 /* strlen("n_leaf_pages") */
2826 && native_strncasecmp("n_leaf_pages", stat_name, stat_name_len)
2827 == 0) {
2828 index->stat_n_leaf_pages = (ulint) stat_value;
2829 arg->stats_were_modified = true;
2830 } else if (stat_name_len > PFX_LEN /* e.g. stat_name=="n_diff_pfx01" */
2831 && native_strncasecmp(PFX, stat_name, PFX_LEN) == 0) {
2832
2833 const char* num_ptr;
2834 unsigned long n_pfx;
2835
2836 /* point num_ptr into "1" from "n_diff_pfx12..." */
2837 num_ptr = stat_name + PFX_LEN;
2838
2839 /* stat_name should have exactly 2 chars appended to PFX
2840 and they should be digits */
2841 if (stat_name_len != PFX_LEN + 2
2842 || num_ptr[0] < '0' || num_ptr[0] > '9'
2843 || num_ptr[1] < '0' || num_ptr[1] > '9') {
2844
2845 char db_utf8[MAX_DB_UTF8_LEN];
2846 char table_utf8[MAX_TABLE_UTF8_LEN];
2847
2848 dict_fs2utf8(table->name.m_name,
2849 db_utf8, sizeof(db_utf8),
2850 table_utf8, sizeof(table_utf8));
2851
2852 ib::info out;
2853 out << "Ignoring strange row from "
2854 << INDEX_STATS_NAME_PRINT << " WHERE"
2855 " database_name = '" << db_utf8
2856 << "' AND table_name = '" << table_utf8
2857 << "' AND index_name = '" << index->name()
2858 << "' AND stat_name = '";
2859 out.write(stat_name, stat_name_len);
2860 out << "'; because stat_name is malformed";
2861 return(TRUE);
2862 }
2863 /* else */
2864
2865 /* extract 12 from "n_diff_pfx12..." into n_pfx
2866 note that stat_name does not have a terminating '\0' */
2867 n_pfx = (num_ptr[0] - '0') * 10 + (num_ptr[1] - '0');
2868
2869 ulint n_uniq = index->n_uniq;
2870
2871 if (n_pfx == 0 || n_pfx > n_uniq) {
2872
2873 char db_utf8[MAX_DB_UTF8_LEN];
2874 char table_utf8[MAX_TABLE_UTF8_LEN];
2875
2876 dict_fs2utf8(table->name.m_name,
2877 db_utf8, sizeof(db_utf8),
2878 table_utf8, sizeof(table_utf8));
2879
2880 ib::info out;
2881 out << "Ignoring strange row from "
2882 << INDEX_STATS_NAME_PRINT << " WHERE"
2883 " database_name = '" << db_utf8
2884 << "' AND table_name = '" << table_utf8
2885 << "' AND index_name = '" << index->name()
2886 << "' AND stat_name = '";
2887 out.write(stat_name, stat_name_len);
2888 out << "'; because stat_name is out of range, the index"
2889 " has " << n_uniq << " unique columns";
2890
2891 return(TRUE);
2892 }
2893 /* else */
2894
2895 index->stat_n_diff_key_vals[n_pfx - 1] = stat_value;
2896
2897 if (sample_size != UINT64_UNDEFINED) {
2898 index->stat_n_sample_sizes[n_pfx - 1] = sample_size;
2899 } else {
2900 /* hmm, strange... the user must have UPDATEd the
2901 table manually and SET sample_size = NULL */
2902 index->stat_n_sample_sizes[n_pfx - 1] = 0;
2903 }
2904
2905 index->stat_n_non_null_key_vals[n_pfx - 1] = 0;
2906
2907 arg->stats_were_modified = true;
2908 } else {
2909 /* silently ignore rows with unknown stat_name, the
2910 user may have developed her own stats */
2911 }
2912
2913 /* XXX this is not used but returning non-NULL is necessary */
2914 return(TRUE);
2915 }
2916
2917 /*********************************************************************//**
2918 Read table's statistics from the persistent statistics storage.
2919 @return DB_SUCCESS or error code */
2920 static
2921 dberr_t
dict_stats_fetch_from_ps(dict_table_t * table)2922 dict_stats_fetch_from_ps(
2923 /*=====================*/
2924 dict_table_t* table) /*!< in/out: table */
2925 {
2926 index_fetch_t index_fetch_arg;
2927 trx_t* trx;
2928 pars_info_t* pinfo;
2929 dberr_t ret;
2930 char db_utf8[MAX_DB_UTF8_LEN];
2931 char table_utf8[MAX_TABLE_UTF8_LEN];
2932
2933 ut_ad(!mutex_own(&dict_sys->mutex));
2934
2935 /* Initialize all stats to dummy values before fetching because if
2936 the persistent storage contains incomplete stats (e.g. missing stats
2937 for some index) then we would end up with (partially) uninitialized
2938 stats. */
2939 dict_stats_empty_table(table);
2940
2941 trx = trx_allocate_for_background();
2942
2943 /* Use 'read-uncommitted' so that the SELECTs we execute
2944 do not get blocked in case some user has locked the rows we
2945 are SELECTing */
2946
2947 trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
2948
2949 if (srv_read_only_mode) {
2950 trx_start_internal_read_only(trx);
2951 } else {
2952 trx_start_internal(trx);
2953 }
2954
2955 dict_fs2utf8(table->name.m_name, db_utf8, sizeof(db_utf8),
2956 table_utf8, sizeof(table_utf8));
2957
2958 pinfo = pars_info_create();
2959
2960 pars_info_add_str_literal(pinfo, "database_name", db_utf8);
2961
2962 pars_info_add_str_literal(pinfo, "table_name", table_utf8);
2963
2964 pars_info_bind_function(pinfo,
2965 "fetch_table_stats_step",
2966 dict_stats_fetch_table_stats_step,
2967 table);
2968
2969 index_fetch_arg.table = table;
2970 index_fetch_arg.stats_were_modified = false;
2971 pars_info_bind_function(pinfo,
2972 "fetch_index_stats_step",
2973 dict_stats_fetch_index_stats_step,
2974 &index_fetch_arg);
2975
2976 ret = que_eval_sql(pinfo,
2977 "PROCEDURE FETCH_STATS () IS\n"
2978 "found INT;\n"
2979 "DECLARE FUNCTION fetch_table_stats_step;\n"
2980 "DECLARE FUNCTION fetch_index_stats_step;\n"
2981 "DECLARE CURSOR table_stats_cur IS\n"
2982 " SELECT\n"
2983 /* if you change the selected fields, be
2984 sure to adjust
2985 dict_stats_fetch_table_stats_step() */
2986 " n_rows,\n"
2987 " clustered_index_size,\n"
2988 " sum_of_other_index_sizes\n"
2989 " FROM \"" TABLE_STATS_NAME "\"\n"
2990 " WHERE\n"
2991 " database_name = :database_name AND\n"
2992 " table_name = :table_name;\n"
2993 "DECLARE CURSOR index_stats_cur IS\n"
2994 " SELECT\n"
2995 /* if you change the selected fields, be
2996 sure to adjust
2997 dict_stats_fetch_index_stats_step() */
2998 " index_name,\n"
2999 " stat_name,\n"
3000 " stat_value,\n"
3001 " sample_size\n"
3002 " FROM \"" INDEX_STATS_NAME "\"\n"
3003 " WHERE\n"
3004 " database_name = :database_name AND\n"
3005 " table_name = :table_name;\n"
3006
3007 "BEGIN\n"
3008
3009 "OPEN table_stats_cur;\n"
3010 "FETCH table_stats_cur INTO\n"
3011 " fetch_table_stats_step();\n"
3012 "IF (SQL % NOTFOUND) THEN\n"
3013 " CLOSE table_stats_cur;\n"
3014 " RETURN;\n"
3015 "END IF;\n"
3016 "CLOSE table_stats_cur;\n"
3017
3018 "OPEN index_stats_cur;\n"
3019 "found := 1;\n"
3020 "WHILE found = 1 LOOP\n"
3021 " FETCH index_stats_cur INTO\n"
3022 " fetch_index_stats_step();\n"
3023 " IF (SQL % NOTFOUND) THEN\n"
3024 " found := 0;\n"
3025 " END IF;\n"
3026 "END LOOP;\n"
3027 "CLOSE index_stats_cur;\n"
3028
3029 "END;",
3030 TRUE, trx);
3031 /* pinfo is freed by que_eval_sql() */
3032
3033 trx_commit_for_mysql(trx);
3034
3035 trx_free_for_background(trx);
3036
3037 if (!index_fetch_arg.stats_were_modified) {
3038 return(DB_STATS_DO_NOT_EXIST);
3039 }
3040
3041 return(ret);
3042 }
3043
3044 /*********************************************************************//**
3045 Fetches or calculates new estimates for index statistics. */
3046 void
dict_stats_update_for_index(dict_index_t * index)3047 dict_stats_update_for_index(
3048 /*========================*/
3049 dict_index_t* index) /*!< in/out: index */
3050 {
3051 DBUG_ENTER("dict_stats_update_for_index");
3052
3053 ut_ad(!mutex_own(&dict_sys->mutex));
3054
3055 if (dict_stats_is_persistent_enabled(index->table)) {
3056
3057 if (dict_stats_persistent_storage_check(false)) {
3058 dict_table_analyze_index_lock(index->table);
3059 dict_stats_analyze_index(index);
3060 ulint stat_sum_of_other_index_sizes_tmp = index->stat_index_size;
3061 dict_table_stats_lock(index->table, RW_X_LATCH);
3062 index->table->stat_sum_of_other_index_sizes += stat_sum_of_other_index_sizes_tmp;
3063 dict_table_stats_unlock(index->table, RW_X_LATCH);
3064 dict_table_analyze_index_unlock(index->table);
3065 dict_stats_save(index->table, &index->id);
3066 DBUG_VOID_RETURN;
3067 }
3068 /* else */
3069
3070 /* Fall back to transient stats since the persistent
3071 storage is not present or is corrupted */
3072
3073 ib::info() << "Recalculation of persistent statistics"
3074 " requested for table " << index->table->name
3075 << " index " << index->name
3076 << " but the required"
3077 " persistent statistics storage is not present or is"
3078 " corrupted. Using transient stats instead.";
3079 }
3080
3081 dict_table_stats_lock(index->table, RW_X_LATCH);
3082 dict_stats_update_transient_for_index(index);
3083 dict_table_stats_unlock(index->table, RW_X_LATCH);
3084
3085 DBUG_VOID_RETURN;
3086 }
3087
3088 /*********************************************************************//**
3089 Calculates new estimates for table and index statistics. The statistics
3090 are used in query optimization.
3091 @return DB_SUCCESS or error code */
3092 dberr_t
dict_stats_update(dict_table_t * table,dict_stats_upd_option_t stats_upd_option)3093 dict_stats_update(
3094 /*==============*/
3095 dict_table_t* table, /*!< in/out: table */
3096 dict_stats_upd_option_t stats_upd_option)
3097 /*!< in: whether to (re) calc
3098 the stats or to fetch them from
3099 the persistent statistics
3100 storage */
3101 {
3102 ut_ad(!mutex_own(&dict_sys->mutex));
3103
3104 if (table->ibd_file_missing) {
3105
3106 ib::warn() << "Cannot calculate statistics for table "
3107 << table->name
3108 << " because the .ibd file is missing. "
3109 << TROUBLESHOOTING_MSG;
3110
3111 dict_stats_empty_table(table);
3112 return(DB_TABLESPACE_DELETED);
3113 } else if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
3114 /* If we have set a high innodb_force_recovery level, do
3115 not calculate statistics, as a badly corrupted index can
3116 cause a crash in it. */
3117 dict_stats_empty_table(table);
3118 return(DB_SUCCESS);
3119 }
3120
3121 switch (stats_upd_option) {
3122 case DICT_STATS_RECALC_PERSISTENT:
3123
3124 if (srv_read_only_mode) {
3125 goto transient;
3126 }
3127
3128 /* wakes the last purge batch for exact recalculation */
3129 if (trx_sys->rseg_history_len > 0) {
3130 srv_wake_purge_thread_if_not_active();
3131 }
3132
3133 /* Persistent recalculation requested, called from
3134 1) ANALYZE TABLE, or
3135 2) the auto recalculation background thread, or
3136 3) open table if stats do not exist on disk and auto recalc
3137 is enabled */
3138
3139 /* InnoDB internal tables (e.g. SYS_TABLES) cannot have
3140 persistent stats enabled */
3141 ut_a(strchr(table->name.m_name, '/') != NULL);
3142
3143 /* check if the persistent statistics storage exists
3144 before calling the potentially slow function
3145 dict_stats_update_persistent(); that is a
3146 prerequisite for dict_stats_save() succeeding */
3147 if (dict_stats_persistent_storage_check(false)) {
3148
3149 dberr_t err;
3150
3151 err = dict_stats_update_persistent(table);
3152
3153 if (err != DB_SUCCESS) {
3154 return(err);
3155 }
3156
3157 err = dict_stats_save(table, NULL);
3158
3159 return(err);
3160 }
3161
3162 /* Fall back to transient stats since the persistent
3163 storage is not present or is corrupted */
3164
3165 ib::warn() << "Recalculation of persistent statistics"
3166 " requested for table "
3167 << table->name
3168 << " but the required persistent"
3169 " statistics storage is not present or is corrupted."
3170 " Using transient stats instead.";
3171
3172 goto transient;
3173
3174 case DICT_STATS_RECALC_TRANSIENT:
3175
3176 goto transient;
3177
3178 case DICT_STATS_EMPTY_TABLE:
3179
3180 dict_stats_empty_table(table);
3181
3182 /* If table is using persistent stats,
3183 then save the stats on disk */
3184
3185 if (dict_stats_is_persistent_enabled(table)) {
3186
3187 if (dict_stats_persistent_storage_check(false)) {
3188
3189 return(dict_stats_save(table, NULL));
3190 }
3191
3192 return(DB_STATS_DO_NOT_EXIST);
3193 }
3194
3195 return(DB_SUCCESS);
3196
3197 case DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY:
3198
3199 /* fetch requested, either fetch from persistent statistics
3200 storage or use the old method */
3201
3202 if (table->stat_initialized) {
3203 return(DB_SUCCESS);
3204 }
3205
3206 /* InnoDB internal tables (e.g. SYS_TABLES) cannot have
3207 persistent stats enabled */
3208 ut_a(strchr(table->name.m_name, '/') != NULL);
3209
3210 if (!dict_stats_persistent_storage_check(false)) {
3211 /* persistent statistics storage does not exist
3212 or is corrupted, calculate the transient stats */
3213
3214 ib::error() << "Fetch of persistent statistics"
3215 " requested for table "
3216 << table->name
3217 << " but the required system tables "
3218 << TABLE_STATS_NAME_PRINT
3219 << " and " << INDEX_STATS_NAME_PRINT
3220 << " are not present or have unexpected"
3221 " structure. Using transient stats instead.";
3222
3223 goto transient;
3224 }
3225
3226 dict_table_t* t;
3227
3228 /* Create a dummy table object with the same name and
3229 indexes, suitable for fetching the stats into it. */
3230 t = dict_stats_table_clone_create(table);
3231
3232 dberr_t err = dict_stats_fetch_from_ps(t);
3233
3234 t->stats_last_recalc = table->stats_last_recalc;
3235 t->stat_modified_counter = 0;
3236
3237 switch (err) {
3238 case DB_SUCCESS:
3239
3240 dict_table_stats_lock(table, RW_X_LATCH);
3241
3242 dict_stats_copy(table, t);
3243
3244 dict_stats_assert_initialized(table);
3245
3246 dict_table_stats_unlock(table, RW_X_LATCH);
3247
3248 dict_stats_table_clone_free(t);
3249
3250 return(DB_SUCCESS);
3251 case DB_STATS_DO_NOT_EXIST:
3252
3253 dict_stats_table_clone_free(t);
3254
3255 if (srv_read_only_mode) {
3256 goto transient;
3257 }
3258
3259 if (dict_stats_auto_recalc_is_enabled(table)) {
3260 return(dict_stats_update(
3261 table,
3262 DICT_STATS_RECALC_PERSISTENT));
3263 }
3264
3265 ib::info() << "Trying to use table " << table->name
3266 << " which has persistent statistics enabled,"
3267 " but auto recalculation turned off and the"
3268 " statistics do not exist in "
3269 TABLE_STATS_NAME_PRINT
3270 " and " INDEX_STATS_NAME_PRINT
3271 ". Please either run \"ANALYZE TABLE "
3272 << table->name << ";\" manually or enable the"
3273 " auto recalculation with \"ALTER TABLE "
3274 << table->name << " STATS_AUTO_RECALC=1;\"."
3275 " InnoDB will now use transient statistics for "
3276 << table->name << ".";
3277
3278 goto transient;
3279 default:
3280
3281 dict_stats_table_clone_free(t);
3282
3283 ib::error() << "Error fetching persistent statistics"
3284 " for table "
3285 << table->name
3286 << " from " TABLE_STATS_NAME_PRINT " and "
3287 INDEX_STATS_NAME_PRINT ": " << ut_strerr(err)
3288 << ". Using transient stats method instead.";
3289
3290 goto transient;
3291 }
3292 /* no "default:" in order to produce a compilation warning
3293 about unhandled enumeration value */
3294 }
3295
3296 transient:
3297
3298 dict_stats_update_transient(table);
3299
3300 return(DB_SUCCESS);
3301 }
3302
3303 /*********************************************************************//**
3304 Removes the information for a particular index's stats from the persistent
3305 storage if it exists and if there is data stored for this index.
3306 This function creates its own trx and commits it.
3307 A note from Marko why we cannot edit user and sys_* tables in one trx:
3308 marko: The problem is that ibuf merges should be disabled while we are
3309 rolling back dict transactions.
3310 marko: If ibuf merges are not disabled, we need to scan the *.ibd files.
3311 But we shouldn't open *.ibd files before we have rolled back dict
3312 transactions and opened the SYS_* records for the *.ibd files.
3313 @return DB_SUCCESS or error code */
3314 dberr_t
dict_stats_drop_index(const char * db_and_table,const char * iname,char * errstr,ulint errstr_sz)3315 dict_stats_drop_index(
3316 /*==================*/
3317 const char* db_and_table,/*!< in: db and table, e.g. 'db/table' */
3318 const char* iname, /*!< in: index name */
3319 char* errstr, /*!< out: error message if != DB_SUCCESS
3320 is returned */
3321 ulint errstr_sz)/*!< in: size of the errstr buffer */
3322 {
3323 char db_utf8[MAX_DB_UTF8_LEN];
3324 char table_utf8[MAX_TABLE_UTF8_LEN];
3325 pars_info_t* pinfo;
3326 dberr_t ret;
3327
3328 ut_ad(!mutex_own(&dict_sys->mutex));
3329
3330 /* skip indexes whose table names do not contain a database name
3331 e.g. if we are dropping an index from SYS_TABLES */
3332 if (strchr(db_and_table, '/') == NULL) {
3333
3334 return(DB_SUCCESS);
3335 }
3336
3337 dict_fs2utf8(db_and_table, db_utf8, sizeof(db_utf8),
3338 table_utf8, sizeof(table_utf8));
3339
3340 pinfo = pars_info_create();
3341
3342 pars_info_add_str_literal(pinfo, "database_name", db_utf8);
3343
3344 pars_info_add_str_literal(pinfo, "table_name", table_utf8);
3345
3346 pars_info_add_str_literal(pinfo, "index_name", iname);
3347
3348 rw_lock_x_lock(dict_operation_lock);
3349 mutex_enter(&dict_sys->mutex);
3350
3351 ret = dict_stats_exec_sql(
3352 pinfo,
3353 "PROCEDURE DROP_INDEX_STATS () IS\n"
3354 "BEGIN\n"
3355 "DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
3356 "database_name = :database_name AND\n"
3357 "table_name = :table_name AND\n"
3358 "index_name = :index_name;\n"
3359 "END;\n", NULL);
3360
3361 mutex_exit(&dict_sys->mutex);
3362 rw_lock_x_unlock(dict_operation_lock);
3363
3364 if (ret == DB_STATS_DO_NOT_EXIST) {
3365 ret = DB_SUCCESS;
3366 }
3367
3368 if (ret != DB_SUCCESS) {
3369 ut_snprintf(errstr, errstr_sz,
3370 "Unable to delete statistics for index %s"
3371 " from %s%s: %s. They can be deleted later using"
3372 " DELETE FROM %s WHERE"
3373 " database_name = '%s' AND"
3374 " table_name = '%s' AND"
3375 " index_name = '%s';",
3376 iname,
3377 INDEX_STATS_NAME_PRINT,
3378 (ret == DB_LOCK_WAIT_TIMEOUT
3379 ? " because the rows are locked"
3380 : ""),
3381 ut_strerr(ret),
3382 INDEX_STATS_NAME_PRINT,
3383 db_utf8,
3384 table_utf8,
3385 iname);
3386
3387 ut_print_timestamp(stderr);
3388 fprintf(stderr, " InnoDB: %s\n", errstr);
3389 }
3390
3391 return(ret);
3392 }
3393
3394 /*********************************************************************//**
3395 Executes
3396 DELETE FROM mysql.innodb_table_stats
3397 WHERE database_name = '...' AND table_name = '...';
3398 Creates its own transaction and commits it.
3399 @return DB_SUCCESS or error code */
3400 UNIV_INLINE
3401 dberr_t
dict_stats_delete_from_table_stats(const char * database_name,const char * table_name)3402 dict_stats_delete_from_table_stats(
3403 /*===============================*/
3404 const char* database_name, /*!< in: database name, e.g. 'db' */
3405 const char* table_name) /*!< in: table name, e.g. 'table' */
3406 {
3407 pars_info_t* pinfo;
3408 dberr_t ret;
3409
3410 ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
3411 ut_ad(mutex_own(&dict_sys->mutex));
3412
3413 pinfo = pars_info_create();
3414
3415 pars_info_add_str_literal(pinfo, "database_name", database_name);
3416 pars_info_add_str_literal(pinfo, "table_name", table_name);
3417
3418 ret = dict_stats_exec_sql(
3419 pinfo,
3420 "PROCEDURE DELETE_FROM_TABLE_STATS () IS\n"
3421 "BEGIN\n"
3422 "DELETE FROM \"" TABLE_STATS_NAME "\" WHERE\n"
3423 "database_name = :database_name AND\n"
3424 "table_name = :table_name;\n"
3425 "END;\n", NULL);
3426
3427 return(ret);
3428 }
3429
3430 /*********************************************************************//**
3431 Executes
3432 DELETE FROM mysql.innodb_index_stats
3433 WHERE database_name = '...' AND table_name = '...';
3434 Creates its own transaction and commits it.
3435 @return DB_SUCCESS or error code */
3436 UNIV_INLINE
3437 dberr_t
dict_stats_delete_from_index_stats(const char * database_name,const char * table_name)3438 dict_stats_delete_from_index_stats(
3439 /*===============================*/
3440 const char* database_name, /*!< in: database name, e.g. 'db' */
3441 const char* table_name) /*!< in: table name, e.g. 'table' */
3442 {
3443 pars_info_t* pinfo;
3444 dberr_t ret;
3445
3446 ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
3447 ut_ad(mutex_own(&dict_sys->mutex));
3448
3449 pinfo = pars_info_create();
3450
3451 pars_info_add_str_literal(pinfo, "database_name", database_name);
3452 pars_info_add_str_literal(pinfo, "table_name", table_name);
3453
3454 ret = dict_stats_exec_sql(
3455 pinfo,
3456 "PROCEDURE DELETE_FROM_INDEX_STATS () IS\n"
3457 "BEGIN\n"
3458 "DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
3459 "database_name = :database_name AND\n"
3460 "table_name = :table_name;\n"
3461 "END;\n", NULL);
3462
3463 return(ret);
3464 }
3465
3466 /*********************************************************************//**
3467 Removes the statistics for a table and all of its indexes from the
3468 persistent statistics storage if it exists and if there is data stored for
3469 the table. This function creates its own transaction and commits it.
3470 @return DB_SUCCESS or error code */
3471 dberr_t
dict_stats_drop_table(const char * db_and_table,char * errstr,ulint errstr_sz)3472 dict_stats_drop_table(
3473 /*==================*/
3474 const char* db_and_table, /*!< in: db and table, e.g. 'db/table' */
3475 char* errstr, /*!< out: error message
3476 if != DB_SUCCESS is returned */
3477 ulint errstr_sz) /*!< in: size of errstr buffer */
3478 {
3479 char db_utf8[MAX_DB_UTF8_LEN];
3480 char table_utf8[MAX_TABLE_UTF8_LEN];
3481 dberr_t ret;
3482
3483 ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
3484 ut_ad(mutex_own(&dict_sys->mutex));
3485
3486 /* skip tables that do not contain a database name
3487 e.g. if we are dropping SYS_TABLES */
3488 if (strchr(db_and_table, '/') == NULL) {
3489
3490 return(DB_SUCCESS);
3491 }
3492
3493 /* skip innodb_table_stats and innodb_index_stats themselves */
3494 if (strcmp(db_and_table, TABLE_STATS_NAME) == 0
3495 || strcmp(db_and_table, INDEX_STATS_NAME) == 0) {
3496
3497 return(DB_SUCCESS);
3498 }
3499
3500 dict_fs2utf8(db_and_table, db_utf8, sizeof(db_utf8),
3501 table_utf8, sizeof(table_utf8));
3502
3503 ret = dict_stats_delete_from_table_stats(db_utf8, table_utf8);
3504
3505 if (ret == DB_SUCCESS) {
3506 ret = dict_stats_delete_from_index_stats(db_utf8, table_utf8);
3507 }
3508
3509 if (ret == DB_STATS_DO_NOT_EXIST) {
3510 ret = DB_SUCCESS;
3511 }
3512
3513 if (ret != DB_SUCCESS) {
3514
3515 ut_snprintf(errstr, errstr_sz,
3516 "Unable to delete statistics for table %s.%s: %s."
3517 " They can be deleted later using"
3518
3519 " DELETE FROM %s WHERE"
3520 " database_name = '%s' AND"
3521 " table_name = '%s';"
3522
3523 " DELETE FROM %s WHERE"
3524 " database_name = '%s' AND"
3525 " table_name = '%s';",
3526
3527 db_utf8, table_utf8,
3528 ut_strerr(ret),
3529
3530 INDEX_STATS_NAME_PRINT,
3531 db_utf8, table_utf8,
3532
3533 TABLE_STATS_NAME_PRINT,
3534 db_utf8, table_utf8);
3535 }
3536
3537 return(ret);
3538 }
3539
3540 /*********************************************************************//**
3541 Executes
3542 UPDATE mysql.innodb_table_stats SET
3543 database_name = '...', table_name = '...'
3544 WHERE database_name = '...' AND table_name = '...';
3545 Creates its own transaction and commits it.
3546 @return DB_SUCCESS or error code */
3547 UNIV_INLINE
3548 dberr_t
dict_stats_rename_table_in_table_stats(const char * old_dbname_utf8,const char * old_tablename_utf8,const char * new_dbname_utf8,const char * new_tablename_utf8)3549 dict_stats_rename_table_in_table_stats(
3550 /*===================================*/
3551 const char* old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */
3552 const char* old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */
3553 const char* new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */
3554 const char* new_tablename_utf8)/*!< in: table name, e.g. 'newtable' */
3555 {
3556 pars_info_t* pinfo;
3557 dberr_t ret;
3558
3559 ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
3560 ut_ad(mutex_own(&dict_sys->mutex));
3561
3562 pinfo = pars_info_create();
3563
3564 pars_info_add_str_literal(pinfo, "old_dbname_utf8", old_dbname_utf8);
3565 pars_info_add_str_literal(pinfo, "old_tablename_utf8", old_tablename_utf8);
3566 pars_info_add_str_literal(pinfo, "new_dbname_utf8", new_dbname_utf8);
3567 pars_info_add_str_literal(pinfo, "new_tablename_utf8", new_tablename_utf8);
3568
3569 ret = dict_stats_exec_sql(
3570 pinfo,
3571 "PROCEDURE RENAME_TABLE_IN_TABLE_STATS () IS\n"
3572 "BEGIN\n"
3573 "UPDATE \"" TABLE_STATS_NAME "\" SET\n"
3574 "database_name = :new_dbname_utf8,\n"
3575 "table_name = :new_tablename_utf8\n"
3576 "WHERE\n"
3577 "database_name = :old_dbname_utf8 AND\n"
3578 "table_name = :old_tablename_utf8;\n"
3579 "END;\n", NULL);
3580
3581 return(ret);
3582 }
3583
3584 /*********************************************************************//**
3585 Executes
3586 UPDATE mysql.innodb_index_stats SET
3587 database_name = '...', table_name = '...'
3588 WHERE database_name = '...' AND table_name = '...';
3589 Creates its own transaction and commits it.
3590 @return DB_SUCCESS or error code */
3591 UNIV_INLINE
3592 dberr_t
dict_stats_rename_table_in_index_stats(const char * old_dbname_utf8,const char * old_tablename_utf8,const char * new_dbname_utf8,const char * new_tablename_utf8)3593 dict_stats_rename_table_in_index_stats(
3594 /*===================================*/
3595 const char* old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */
3596 const char* old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */
3597 const char* new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */
3598 const char* new_tablename_utf8)/*!< in: table name, e.g. 'newtable' */
3599 {
3600 pars_info_t* pinfo;
3601 dberr_t ret;
3602
3603 ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
3604 ut_ad(mutex_own(&dict_sys->mutex));
3605
3606 pinfo = pars_info_create();
3607
3608 pars_info_add_str_literal(pinfo, "old_dbname_utf8", old_dbname_utf8);
3609 pars_info_add_str_literal(pinfo, "old_tablename_utf8", old_tablename_utf8);
3610 pars_info_add_str_literal(pinfo, "new_dbname_utf8", new_dbname_utf8);
3611 pars_info_add_str_literal(pinfo, "new_tablename_utf8", new_tablename_utf8);
3612
3613 ret = dict_stats_exec_sql(
3614 pinfo,
3615 "PROCEDURE RENAME_TABLE_IN_INDEX_STATS () IS\n"
3616 "BEGIN\n"
3617 "UPDATE \"" INDEX_STATS_NAME "\" SET\n"
3618 "database_name = :new_dbname_utf8,\n"
3619 "table_name = :new_tablename_utf8\n"
3620 "WHERE\n"
3621 "database_name = :old_dbname_utf8 AND\n"
3622 "table_name = :old_tablename_utf8;\n"
3623 "END;\n", NULL);
3624
3625 return(ret);
3626 }
3627
3628 /*********************************************************************//**
3629 Renames a table in InnoDB persistent stats storage.
3630 This function creates its own transaction and commits it.
3631 @return DB_SUCCESS or error code */
3632 dberr_t
dict_stats_rename_table(bool dict_locked,const char * old_name,const char * new_name,char * errstr,size_t errstr_sz)3633 dict_stats_rename_table(
3634 /*====================*/
3635 bool dict_locked, /*!< in: true if dict_sys mutex
3636 and dict_operation_lock are held,
3637 otherwise false*/
3638 const char* old_name, /*!< in: old name, e.g. 'db/table' */
3639 const char* new_name, /*!< in: new name, e.g. 'db/table' */
3640 char* errstr, /*!< out: error string if != DB_SUCCESS
3641 is returned */
3642 size_t errstr_sz) /*!< in: errstr size */
3643 {
3644 char old_db_utf8[MAX_DB_UTF8_LEN];
3645 char new_db_utf8[MAX_DB_UTF8_LEN];
3646 char old_table_utf8[MAX_TABLE_UTF8_LEN];
3647 char new_table_utf8[MAX_TABLE_UTF8_LEN];
3648 dberr_t ret;
3649
3650 if (!dict_locked) {
3651 ut_ad(!rw_lock_own(dict_operation_lock, RW_LOCK_X));
3652 ut_ad(!mutex_own(&dict_sys->mutex));
3653 }
3654 /* skip innodb_table_stats and innodb_index_stats themselves */
3655 if (strcmp(old_name, TABLE_STATS_NAME) == 0
3656 || strcmp(old_name, INDEX_STATS_NAME) == 0
3657 || strcmp(new_name, TABLE_STATS_NAME) == 0
3658 || strcmp(new_name, INDEX_STATS_NAME) == 0) {
3659
3660 return(DB_SUCCESS);
3661 }
3662
3663 dict_fs2utf8(old_name, old_db_utf8, sizeof(old_db_utf8),
3664 old_table_utf8, sizeof(old_table_utf8));
3665
3666 dict_fs2utf8(new_name, new_db_utf8, sizeof(new_db_utf8),
3667 new_table_utf8, sizeof(new_table_utf8));
3668
3669 if (!dict_locked) {
3670 rw_lock_x_lock(dict_operation_lock);
3671 mutex_enter(&dict_sys->mutex);
3672 }
3673 ulint n_attempts = 0;
3674 do {
3675 n_attempts++;
3676
3677 ret = dict_stats_rename_table_in_table_stats(
3678 old_db_utf8, old_table_utf8,
3679 new_db_utf8, new_table_utf8);
3680
3681 if (ret == DB_DUPLICATE_KEY) {
3682 dict_stats_delete_from_table_stats(
3683 new_db_utf8, new_table_utf8);
3684 }
3685
3686 if (ret == DB_STATS_DO_NOT_EXIST) {
3687 ret = DB_SUCCESS;
3688 }
3689 DBUG_EXECUTE_IF("rename_stats",
3690 mutex_exit(&dict_sys->mutex);
3691 rw_lock_x_unlock(dict_operation_lock);
3692 os_thread_sleep(20000000);
3693 DEBUG_SYNC_C("rename_stats");
3694 rw_lock_x_lock(dict_operation_lock);
3695 mutex_enter(&dict_sys->mutex););
3696
3697 if (ret != DB_SUCCESS) {
3698 mutex_exit(&dict_sys->mutex);
3699 rw_lock_x_unlock(dict_operation_lock);
3700 os_thread_sleep(200000 /* 0.2 sec */);
3701 rw_lock_x_lock(dict_operation_lock);
3702 mutex_enter(&dict_sys->mutex);
3703 }
3704 } while ((ret == DB_DEADLOCK
3705 || ret == DB_DUPLICATE_KEY
3706 || ret == DB_LOCK_WAIT_TIMEOUT)
3707 && n_attempts < 5);
3708
3709 if (ret != DB_SUCCESS) {
3710 ut_snprintf(errstr, errstr_sz,
3711 "Unable to rename statistics from"
3712 " %s.%s to %s.%s in %s: %s."
3713 " They can be renamed later using"
3714
3715 " UPDATE %s SET"
3716 " database_name = '%s',"
3717 " table_name = '%s'"
3718 " WHERE"
3719 " database_name = '%s' AND"
3720 " table_name = '%s';",
3721
3722 old_db_utf8, old_table_utf8,
3723 new_db_utf8, new_table_utf8,
3724 TABLE_STATS_NAME_PRINT,
3725 ut_strerr(ret),
3726
3727 TABLE_STATS_NAME_PRINT,
3728 new_db_utf8, new_table_utf8,
3729 old_db_utf8, old_table_utf8);
3730 mutex_exit(&dict_sys->mutex);
3731 rw_lock_x_unlock(dict_operation_lock);
3732 return(ret);
3733 }
3734 /* else */
3735
3736 n_attempts = 0;
3737 do {
3738 n_attempts++;
3739
3740 ret = dict_stats_rename_table_in_index_stats(
3741 old_db_utf8, old_table_utf8,
3742 new_db_utf8, new_table_utf8);
3743
3744 if (ret == DB_DUPLICATE_KEY) {
3745 dict_stats_delete_from_index_stats(
3746 new_db_utf8, new_table_utf8);
3747 }
3748
3749 if (ret == DB_STATS_DO_NOT_EXIST) {
3750 ret = DB_SUCCESS;
3751 }
3752
3753 if (ret != DB_SUCCESS) {
3754 mutex_exit(&dict_sys->mutex);
3755 rw_lock_x_unlock(dict_operation_lock);
3756 os_thread_sleep(200000 /* 0.2 sec */);
3757 rw_lock_x_lock(dict_operation_lock);
3758 mutex_enter(&dict_sys->mutex);
3759 }
3760 } while ((ret == DB_DEADLOCK
3761 || ret == DB_DUPLICATE_KEY
3762 || ret == DB_LOCK_WAIT_TIMEOUT)
3763 && n_attempts < 5);
3764
3765 if(!dict_locked) {
3766 mutex_exit(&dict_sys->mutex);
3767 rw_lock_x_unlock(dict_operation_lock);
3768 }
3769 if (ret != DB_SUCCESS) {
3770 ut_snprintf(errstr, errstr_sz,
3771 "Unable to rename statistics from"
3772 " %s.%s to %s.%s in %s: %s."
3773 " They can be renamed later using"
3774
3775 " UPDATE %s SET"
3776 " database_name = '%s',"
3777 " table_name = '%s'"
3778 " WHERE"
3779 " database_name = '%s' AND"
3780 " table_name = '%s';",
3781
3782 old_db_utf8, old_table_utf8,
3783 new_db_utf8, new_table_utf8,
3784 INDEX_STATS_NAME_PRINT,
3785 ut_strerr(ret),
3786
3787 INDEX_STATS_NAME_PRINT,
3788 new_db_utf8, new_table_utf8,
3789 old_db_utf8, old_table_utf8);
3790 }
3791
3792 return(ret);
3793 }
3794
3795 /*********************************************************************//**
3796 Renames an index in InnoDB persistent stats storage.
3797 This function creates its own transaction and commits it.
3798 @return DB_SUCCESS or error code. DB_STATS_DO_NOT_EXIST will be returned
3799 if the persistent stats do not exist. */
3800 dberr_t
dict_stats_rename_index(const dict_table_t * table,const char * old_index_name,const char * new_index_name)3801 dict_stats_rename_index(
3802 /*====================*/
3803 const dict_table_t* table, /*!< in: table whose index
3804 is renamed */
3805 const char* old_index_name, /*!< in: old index name */
3806 const char* new_index_name) /*!< in: new index name */
3807 {
3808 rw_lock_x_lock(dict_operation_lock);
3809 mutex_enter(&dict_sys->mutex);
3810
3811 if (!dict_stats_persistent_storage_check(true)) {
3812 mutex_exit(&dict_sys->mutex);
3813 rw_lock_x_unlock(dict_operation_lock);
3814 return(DB_STATS_DO_NOT_EXIST);
3815 }
3816
3817 char dbname_utf8[MAX_DB_UTF8_LEN];
3818 char tablename_utf8[MAX_TABLE_UTF8_LEN];
3819
3820 dict_fs2utf8(table->name.m_name, dbname_utf8, sizeof(dbname_utf8),
3821 tablename_utf8, sizeof(tablename_utf8));
3822
3823 pars_info_t* pinfo;
3824
3825 pinfo = pars_info_create();
3826
3827 pars_info_add_str_literal(pinfo, "dbname_utf8", dbname_utf8);
3828 pars_info_add_str_literal(pinfo, "tablename_utf8", tablename_utf8);
3829 pars_info_add_str_literal(pinfo, "new_index_name", new_index_name);
3830 pars_info_add_str_literal(pinfo, "old_index_name", old_index_name);
3831
3832 dberr_t ret;
3833
3834 ret = dict_stats_exec_sql(
3835 pinfo,
3836 "PROCEDURE RENAME_INDEX_IN_INDEX_STATS () IS\n"
3837 "BEGIN\n"
3838 "UPDATE \"" INDEX_STATS_NAME "\" SET\n"
3839 "index_name = :new_index_name\n"
3840 "WHERE\n"
3841 "database_name = :dbname_utf8 AND\n"
3842 "table_name = :tablename_utf8 AND\n"
3843 "index_name = :old_index_name;\n"
3844 "END;\n", NULL);
3845
3846 mutex_exit(&dict_sys->mutex);
3847 rw_lock_x_unlock(dict_operation_lock);
3848
3849 return(ret);
3850 }
3851
3852 /* tests @{ */
3853 #ifdef UNIV_ENABLE_UNIT_TEST_DICT_STATS
3854
3855 /* The following unit tests test some of the functions in this file
3856 individually, such testing cannot be performed by the mysql-test framework
3857 via SQL. */
3858
3859 /* test_dict_table_schema_check() @{ */
3860 void
test_dict_table_schema_check()3861 test_dict_table_schema_check()
3862 {
3863 /*
3864 CREATE TABLE tcheck (
3865 c01 VARCHAR(123),
3866 c02 INT,
3867 c03 INT NOT NULL,
3868 c04 INT UNSIGNED,
3869 c05 BIGINT,
3870 c06 BIGINT UNSIGNED NOT NULL,
3871 c07 TIMESTAMP
3872 ) ENGINE=INNODB;
3873 */
3874 /* definition for the table 'test/tcheck' */
3875 dict_col_meta_t columns[] = {
3876 {"c01", DATA_VARCHAR, 0, 123},
3877 {"c02", DATA_INT, 0, 4},
3878 {"c03", DATA_INT, DATA_NOT_NULL, 4},
3879 {"c04", DATA_INT, DATA_UNSIGNED, 4},
3880 {"c05", DATA_INT, 0, 8},
3881 {"c06", DATA_INT, DATA_NOT_NULL | DATA_UNSIGNED, 8},
3882 {"c07", DATA_INT, 0, 4},
3883 {"c_extra", DATA_INT, 0, 4}
3884 };
3885 dict_table_schema_t schema = {
3886 "test/tcheck",
3887 0 /* will be set individually for each test below */,
3888 columns
3889 };
3890 char errstr[512];
3891
3892 ut_snprintf(errstr, sizeof(errstr), "Table not found");
3893
3894 /* prevent any data dictionary modifications while we are checking
3895 the tables' structure */
3896
3897 mutex_enter(&dict_sys->mutex);
3898
3899 /* check that a valid table is reported as valid */
3900 schema.n_cols = 7;
3901 if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3902 == DB_SUCCESS) {
3903 printf("OK: test.tcheck ok\n");
3904 } else {
3905 printf("ERROR: %s\n", errstr);
3906 printf("ERROR: test.tcheck not present or corrupted\n");
3907 goto test_dict_table_schema_check_end;
3908 }
3909
3910 /* check columns with wrong length */
3911 schema.columns[1].len = 8;
3912 if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3913 != DB_SUCCESS) {
3914 printf("OK: test.tcheck.c02 has different length and is"
3915 " reported as corrupted\n");
3916 } else {
3917 printf("OK: test.tcheck.c02 has different length but is"
3918 " reported as ok\n");
3919 goto test_dict_table_schema_check_end;
3920 }
3921 schema.columns[1].len = 4;
3922
3923 /* request that c02 is NOT NULL while actually it does not have
3924 this flag set */
3925 schema.columns[1].prtype_mask |= DATA_NOT_NULL;
3926 if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3927 != DB_SUCCESS) {
3928 printf("OK: test.tcheck.c02 does not have NOT NULL while"
3929 " it should and is reported as corrupted\n");
3930 } else {
3931 printf("ERROR: test.tcheck.c02 does not have NOT NULL while"
3932 " it should and is not reported as corrupted\n");
3933 goto test_dict_table_schema_check_end;
3934 }
3935 schema.columns[1].prtype_mask &= ~DATA_NOT_NULL;
3936
3937 /* check a table that contains some extra columns */
3938 schema.n_cols = 6;
3939 if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3940 == DB_SUCCESS) {
3941 printf("ERROR: test.tcheck has more columns but is not"
3942 " reported as corrupted\n");
3943 goto test_dict_table_schema_check_end;
3944 } else {
3945 printf("OK: test.tcheck has more columns and is"
3946 " reported as corrupted\n");
3947 }
3948
3949 /* check a table that has some columns missing */
3950 schema.n_cols = 8;
3951 if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3952 != DB_SUCCESS) {
3953 printf("OK: test.tcheck has missing columns and is"
3954 " reported as corrupted\n");
3955 } else {
3956 printf("ERROR: test.tcheck has missing columns but is"
3957 " reported as ok\n");
3958 goto test_dict_table_schema_check_end;
3959 }
3960
3961 /* check non-existent table */
3962 schema.table_name = "test/tcheck_nonexistent";
3963 if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3964 != DB_SUCCESS) {
3965 printf("OK: test.tcheck_nonexistent is not present\n");
3966 } else {
3967 printf("ERROR: test.tcheck_nonexistent is present!?\n");
3968 goto test_dict_table_schema_check_end;
3969 }
3970
3971 test_dict_table_schema_check_end:
3972
3973 mutex_exit(&dict_sys->mutex);
3974 }
3975 /* @} */
3976
3977 /* save/fetch aux macros @{ */
3978 #define TEST_DATABASE_NAME "foobardb"
3979 #define TEST_TABLE_NAME "test_dict_stats"
3980
3981 #define TEST_N_ROWS 111
3982 #define TEST_CLUSTERED_INDEX_SIZE 222
3983 #define TEST_SUM_OF_OTHER_INDEX_SIZES 333
3984
3985 #define TEST_IDX1_NAME "tidx1"
3986 #define TEST_IDX1_COL1_NAME "tidx1_col1"
3987 #define TEST_IDX1_INDEX_SIZE 123
3988 #define TEST_IDX1_N_LEAF_PAGES 234
3989 #define TEST_IDX1_N_DIFF1 50
3990 #define TEST_IDX1_N_DIFF1_SAMPLE_SIZE 500
3991
3992 #define TEST_IDX2_NAME "tidx2"
3993 #define TEST_IDX2_COL1_NAME "tidx2_col1"
3994 #define TEST_IDX2_COL2_NAME "tidx2_col2"
3995 #define TEST_IDX2_COL3_NAME "tidx2_col3"
3996 #define TEST_IDX2_COL4_NAME "tidx2_col4"
3997 #define TEST_IDX2_INDEX_SIZE 321
3998 #define TEST_IDX2_N_LEAF_PAGES 432
3999 #define TEST_IDX2_N_DIFF1 60
4000 #define TEST_IDX2_N_DIFF1_SAMPLE_SIZE 600
4001 #define TEST_IDX2_N_DIFF2 61
4002 #define TEST_IDX2_N_DIFF2_SAMPLE_SIZE 610
4003 #define TEST_IDX2_N_DIFF3 62
4004 #define TEST_IDX2_N_DIFF3_SAMPLE_SIZE 620
4005 #define TEST_IDX2_N_DIFF4 63
4006 #define TEST_IDX2_N_DIFF4_SAMPLE_SIZE 630
4007 /* @} */
4008
4009 /* test_dict_stats_save() @{ */
4010 void
test_dict_stats_save()4011 test_dict_stats_save()
4012 {
4013 dict_table_t table;
4014 dict_index_t index1;
4015 dict_field_t index1_fields[1];
4016 ib_uint64_t index1_stat_n_diff_key_vals[1];
4017 ib_uint64_t index1_stat_n_sample_sizes[1];
4018 dict_index_t index2;
4019 dict_field_t index2_fields[4];
4020 ib_uint64_t index2_stat_n_diff_key_vals[4];
4021 ib_uint64_t index2_stat_n_sample_sizes[4];
4022 dberr_t ret;
4023
4024 /* craft a dummy dict_table_t */
4025 table.name.m_name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
4026 table.stat_n_rows = TEST_N_ROWS;
4027 table.stat_clustered_index_size = TEST_CLUSTERED_INDEX_SIZE;
4028 table.stat_sum_of_other_index_sizes = TEST_SUM_OF_OTHER_INDEX_SIZES;
4029 UT_LIST_INIT(table.indexes, &dict_index_t::indexes);
4030 UT_LIST_ADD_LAST(table.indexes, &index1);
4031 UT_LIST_ADD_LAST(table.indexes, &index2);
4032 ut_d(table.magic_n = DICT_TABLE_MAGIC_N);
4033 ut_d(index1.magic_n = DICT_INDEX_MAGIC_N);
4034
4035 index1.name = TEST_IDX1_NAME;
4036 index1.table = &table;
4037 index1.cached = 1;
4038 index1.n_uniq = 1;
4039 index1.fields = index1_fields;
4040 index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals;
4041 index1.stat_n_sample_sizes = index1_stat_n_sample_sizes;
4042 index1.stat_index_size = TEST_IDX1_INDEX_SIZE;
4043 index1.stat_n_leaf_pages = TEST_IDX1_N_LEAF_PAGES;
4044 index1_fields[0].name = TEST_IDX1_COL1_NAME;
4045 index1_stat_n_diff_key_vals[0] = TEST_IDX1_N_DIFF1;
4046 index1_stat_n_sample_sizes[0] = TEST_IDX1_N_DIFF1_SAMPLE_SIZE;
4047
4048 ut_d(index2.magic_n = DICT_INDEX_MAGIC_N);
4049 index2.name = TEST_IDX2_NAME;
4050 index2.table = &table;
4051 index2.cached = 1;
4052 index2.n_uniq = 4;
4053 index2.fields = index2_fields;
4054 index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals;
4055 index2.stat_n_sample_sizes = index2_stat_n_sample_sizes;
4056 index2.stat_index_size = TEST_IDX2_INDEX_SIZE;
4057 index2.stat_n_leaf_pages = TEST_IDX2_N_LEAF_PAGES;
4058 index2_fields[0].name = TEST_IDX2_COL1_NAME;
4059 index2_fields[1].name = TEST_IDX2_COL2_NAME;
4060 index2_fields[2].name = TEST_IDX2_COL3_NAME;
4061 index2_fields[3].name = TEST_IDX2_COL4_NAME;
4062 index2_stat_n_diff_key_vals[0] = TEST_IDX2_N_DIFF1;
4063 index2_stat_n_diff_key_vals[1] = TEST_IDX2_N_DIFF2;
4064 index2_stat_n_diff_key_vals[2] = TEST_IDX2_N_DIFF3;
4065 index2_stat_n_diff_key_vals[3] = TEST_IDX2_N_DIFF4;
4066 index2_stat_n_sample_sizes[0] = TEST_IDX2_N_DIFF1_SAMPLE_SIZE;
4067 index2_stat_n_sample_sizes[1] = TEST_IDX2_N_DIFF2_SAMPLE_SIZE;
4068 index2_stat_n_sample_sizes[2] = TEST_IDX2_N_DIFF3_SAMPLE_SIZE;
4069 index2_stat_n_sample_sizes[3] = TEST_IDX2_N_DIFF4_SAMPLE_SIZE;
4070
4071 ret = dict_stats_save(&table, NULL);
4072
4073 ut_a(ret == DB_SUCCESS);
4074
4075 printf("\nOK: stats saved successfully, now go ahead and read"
4076 " what's inside %s and %s:\n\n",
4077 TABLE_STATS_NAME_PRINT,
4078 INDEX_STATS_NAME_PRINT);
4079
4080 printf("SELECT COUNT(*) = 1 AS table_stats_saved_successfully\n"
4081 "FROM %s\n"
4082 "WHERE\n"
4083 "database_name = '%s' AND\n"
4084 "table_name = '%s' AND\n"
4085 "n_rows = %d AND\n"
4086 "clustered_index_size = %d AND\n"
4087 "sum_of_other_index_sizes = %d;\n"
4088 "\n",
4089 TABLE_STATS_NAME_PRINT,
4090 TEST_DATABASE_NAME,
4091 TEST_TABLE_NAME,
4092 TEST_N_ROWS,
4093 TEST_CLUSTERED_INDEX_SIZE,
4094 TEST_SUM_OF_OTHER_INDEX_SIZES);
4095
4096 printf("SELECT COUNT(*) = 3 AS tidx1_stats_saved_successfully\n"
4097 "FROM %s\n"
4098 "WHERE\n"
4099 "database_name = '%s' AND\n"
4100 "table_name = '%s' AND\n"
4101 "index_name = '%s' AND\n"
4102 "(\n"
4103 " (stat_name = 'size' AND stat_value = %d AND"
4104 " sample_size IS NULL) OR\n"
4105 " (stat_name = 'n_leaf_pages' AND stat_value = %d AND"
4106 " sample_size IS NULL) OR\n"
4107 " (stat_name = 'n_diff_pfx01' AND stat_value = %d AND"
4108 " sample_size = '%d' AND stat_description = '%s')\n"
4109 ");\n"
4110 "\n",
4111 INDEX_STATS_NAME_PRINT,
4112 TEST_DATABASE_NAME,
4113 TEST_TABLE_NAME,
4114 TEST_IDX1_NAME,
4115 TEST_IDX1_INDEX_SIZE,
4116 TEST_IDX1_N_LEAF_PAGES,
4117 TEST_IDX1_N_DIFF1,
4118 TEST_IDX1_N_DIFF1_SAMPLE_SIZE,
4119 TEST_IDX1_COL1_NAME);
4120
4121 printf("SELECT COUNT(*) = 6 AS tidx2_stats_saved_successfully\n"
4122 "FROM %s\n"
4123 "WHERE\n"
4124 "database_name = '%s' AND\n"
4125 "table_name = '%s' AND\n"
4126 "index_name = '%s' AND\n"
4127 "(\n"
4128 " (stat_name = 'size' AND stat_value = %d AND"
4129 " sample_size IS NULL) OR\n"
4130 " (stat_name = 'n_leaf_pages' AND stat_value = %d AND"
4131 " sample_size IS NULL) OR\n"
4132 " (stat_name = 'n_diff_pfx01' AND stat_value = %d AND"
4133 " sample_size = '%d' AND stat_description = '%s') OR\n"
4134 " (stat_name = 'n_diff_pfx02' AND stat_value = %d AND"
4135 " sample_size = '%d' AND stat_description = '%s,%s') OR\n"
4136 " (stat_name = 'n_diff_pfx03' AND stat_value = %d AND"
4137 " sample_size = '%d' AND stat_description = '%s,%s,%s') OR\n"
4138 " (stat_name = 'n_diff_pfx04' AND stat_value = %d AND"
4139 " sample_size = '%d' AND stat_description = '%s,%s,%s,%s')\n"
4140 ");\n"
4141 "\n",
4142 INDEX_STATS_NAME_PRINT,
4143 TEST_DATABASE_NAME,
4144 TEST_TABLE_NAME,
4145 TEST_IDX2_NAME,
4146 TEST_IDX2_INDEX_SIZE,
4147 TEST_IDX2_N_LEAF_PAGES,
4148 TEST_IDX2_N_DIFF1,
4149 TEST_IDX2_N_DIFF1_SAMPLE_SIZE, TEST_IDX2_COL1_NAME,
4150 TEST_IDX2_N_DIFF2,
4151 TEST_IDX2_N_DIFF2_SAMPLE_SIZE,
4152 TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME,
4153 TEST_IDX2_N_DIFF3,
4154 TEST_IDX2_N_DIFF3_SAMPLE_SIZE,
4155 TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, TEST_IDX2_COL3_NAME,
4156 TEST_IDX2_N_DIFF4,
4157 TEST_IDX2_N_DIFF4_SAMPLE_SIZE,
4158 TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, TEST_IDX2_COL3_NAME,
4159 TEST_IDX2_COL4_NAME);
4160 }
4161 /* @} */
4162
4163 /* test_dict_stats_fetch_from_ps() @{ */
4164 void
test_dict_stats_fetch_from_ps()4165 test_dict_stats_fetch_from_ps()
4166 {
4167 dict_table_t table;
4168 dict_index_t index1;
4169 ib_uint64_t index1_stat_n_diff_key_vals[1];
4170 ib_uint64_t index1_stat_n_sample_sizes[1];
4171 dict_index_t index2;
4172 ib_uint64_t index2_stat_n_diff_key_vals[4];
4173 ib_uint64_t index2_stat_n_sample_sizes[4];
4174 dberr_t ret;
4175
4176 /* craft a dummy dict_table_t */
4177 table.name.m_name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
4178 UT_LIST_INIT(table.indexes, &dict_index_t::indexes);
4179 UT_LIST_ADD_LAST(table.indexes, &index1);
4180 UT_LIST_ADD_LAST(table.indexes, &index2);
4181 ut_d(table.magic_n = DICT_TABLE_MAGIC_N);
4182
4183 index1.name = TEST_IDX1_NAME;
4184 ut_d(index1.magic_n = DICT_INDEX_MAGIC_N);
4185 index1.cached = 1;
4186 index1.n_uniq = 1;
4187 index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals;
4188 index1.stat_n_sample_sizes = index1_stat_n_sample_sizes;
4189
4190 index2.name = TEST_IDX2_NAME;
4191 ut_d(index2.magic_n = DICT_INDEX_MAGIC_N);
4192 index2.cached = 1;
4193 index2.n_uniq = 4;
4194 index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals;
4195 index2.stat_n_sample_sizes = index2_stat_n_sample_sizes;
4196
4197 ret = dict_stats_fetch_from_ps(&table);
4198
4199 ut_a(ret == DB_SUCCESS);
4200
4201 ut_a(table.stat_n_rows == TEST_N_ROWS);
4202 ut_a(table.stat_clustered_index_size == TEST_CLUSTERED_INDEX_SIZE);
4203 ut_a(table.stat_sum_of_other_index_sizes
4204 == TEST_SUM_OF_OTHER_INDEX_SIZES);
4205
4206 ut_a(index1.stat_index_size == TEST_IDX1_INDEX_SIZE);
4207 ut_a(index1.stat_n_leaf_pages == TEST_IDX1_N_LEAF_PAGES);
4208 ut_a(index1_stat_n_diff_key_vals[0] == TEST_IDX1_N_DIFF1);
4209 ut_a(index1_stat_n_sample_sizes[0] == TEST_IDX1_N_DIFF1_SAMPLE_SIZE);
4210
4211 ut_a(index2.stat_index_size == TEST_IDX2_INDEX_SIZE);
4212 ut_a(index2.stat_n_leaf_pages == TEST_IDX2_N_LEAF_PAGES);
4213 ut_a(index2_stat_n_diff_key_vals[0] == TEST_IDX2_N_DIFF1);
4214 ut_a(index2_stat_n_sample_sizes[0] == TEST_IDX2_N_DIFF1_SAMPLE_SIZE);
4215 ut_a(index2_stat_n_diff_key_vals[1] == TEST_IDX2_N_DIFF2);
4216 ut_a(index2_stat_n_sample_sizes[1] == TEST_IDX2_N_DIFF2_SAMPLE_SIZE);
4217 ut_a(index2_stat_n_diff_key_vals[2] == TEST_IDX2_N_DIFF3);
4218 ut_a(index2_stat_n_sample_sizes[2] == TEST_IDX2_N_DIFF3_SAMPLE_SIZE);
4219 ut_a(index2_stat_n_diff_key_vals[3] == TEST_IDX2_N_DIFF4);
4220 ut_a(index2_stat_n_sample_sizes[3] == TEST_IDX2_N_DIFF4_SAMPLE_SIZE);
4221
4222 printf("OK: fetch successful\n");
4223 }
4224 /* @} */
4225
4226 /* test_dict_stats_all() @{ */
4227 void
test_dict_stats_all()4228 test_dict_stats_all()
4229 {
4230 test_dict_table_schema_check();
4231
4232 test_dict_stats_save();
4233
4234 test_dict_stats_fetch_from_ps();
4235 }
4236 /* @} */
4237
4238 #endif /* UNIV_ENABLE_UNIT_TEST_DICT_STATS */
4239 /* @} */
4240
4241 #endif /* UNIV_HOTBACKUP */
4242