1 /*****************************************************************************
2
3 Copyright (c) 2009, 2019, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2015, 2021, MariaDB Corporation.
5
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free Software
8 Foundation; version 2 of the License.
9
10 This program is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License along with
15 this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
17
18 *****************************************************************************/
19
20 /**************************************************//**
21 @file dict/dict0stats.cc
22 Code used for calculating and manipulating table statistics.
23
24 Created Jan 06, 2010 Vasil Dimov
25 *******************************************************/
26
27 #include "dict0stats.h"
28 #include "ut0ut.h"
29 #include "ut0rnd.h"
30 #include "dyn0buf.h"
31 #include "row0sel.h"
32 #include "trx0trx.h"
33 #include "pars0pars.h"
34 #include <mysql_com.h>
35 #include "btr0btr.h"
36 #include "sync0sync.h"
37
38 #include <algorithm>
39 #include <map>
40 #include <vector>
41
42 /* Sampling algorithm description @{
43
44 The algorithm is controlled by one number - N_SAMPLE_PAGES(index),
45 let it be A, which is the number of leaf pages to analyze for a given index
46 for each n-prefix (if the index is on 3 columns, then 3*A leaf pages will be
47 analyzed).
48
49 Let the total number of leaf pages in the table be T.
50 Level 0 - leaf pages, level H - root.
51
52 Definition: N-prefix-boring record is a record on a non-leaf page that equals
53 the next (to the right, cross page boundaries, skipping the supremum and
54 infimum) record on the same level when looking at the fist n-prefix columns.
55 The last (user) record on a level is not boring (it does not match the
56 non-existent user record to the right). We call the records boring because all
57 the records on the page below a boring record are equal to that boring record.
58
59 We avoid diving below boring records when searching for a leaf page to
60 estimate the number of distinct records because we know that such a leaf
61 page will have number of distinct records == 1.
62
63 For each n-prefix: start from the root level and full scan subsequent lower
64 levels until a level that contains at least A*10 distinct records is found.
65 Lets call this level LA.
66 As an optimization the search is canceled if it has reached level 1 (never
67 descend to the level 0 (leaf)) and also if the next level to be scanned
68 would contain more than A pages. The latter is because the user has asked
69 to analyze A leaf pages and it does not make sense to scan much more than
70 A non-leaf pages with the sole purpose of finding a good sample of A leaf
71 pages.
72
73 After finding the appropriate level LA with >A*10 distinct records (or less in
74 the exceptions described above), divide it into groups of equal records and
75 pick A such groups. Then pick the last record from each group. For example,
76 let the level be:
77
78 index: 0,1,2,3,4,5,6,7,8,9,10
79 record: 1,1,1,2,2,7,7,7,7,7,9
80
81 There are 4 groups of distinct records and if A=2 random ones are selected,
82 e.g. 1,1,1 and 7,7,7,7,7, then records with indexes 2 and 9 will be selected.
83
84 After selecting A records as described above, dive below them to find A leaf
85 pages and analyze them, finding the total number of distinct records. The
86 dive to the leaf level is performed by selecting a non-boring record from
87 each page and diving below it.
88
89 This way, a total of A leaf pages are analyzed for the given n-prefix.
90
91 Let the number of different key values found in each leaf page i be Pi (i=1..A).
92 Let N_DIFF_AVG_LEAF be (P1 + P2 + ... + PA) / A.
93 Let the number of different key values on level LA be N_DIFF_LA.
94 Let the total number of records on level LA be TOTAL_LA.
95 Let R be N_DIFF_LA / TOTAL_LA, we assume this ratio is the same on the
96 leaf level.
97 Let the number of leaf pages be N.
98 Then the total number of different key values on the leaf level is:
99 N * R * N_DIFF_AVG_LEAF.
100 See REF01 for the implementation.
101
102 The above describes how to calculate the cardinality of an index.
103 This algorithm is executed for each n-prefix of a multi-column index
104 where n=1..n_uniq.
105 @} */
106
107 /* names of the tables from the persistent statistics storage */
108 #define TABLE_STATS_NAME_PRINT "mysql.innodb_table_stats"
109 #define INDEX_STATS_NAME_PRINT "mysql.innodb_index_stats"
110
111 #ifdef UNIV_STATS_DEBUG
112 #define DEBUG_PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__)
113 #else /* UNIV_STATS_DEBUG */
114 #define DEBUG_PRINTF(fmt, ...) /* noop */
115 #endif /* UNIV_STATS_DEBUG */
116
117 /* Gets the number of leaf pages to sample in persistent stats estimation */
118 #define N_SAMPLE_PAGES(index) \
119 static_cast<ib_uint64_t>( \
120 (index)->table->stats_sample_pages != 0 \
121 ? (index)->table->stats_sample_pages \
122 : srv_stats_persistent_sample_pages)
123
124 /* number of distinct records on a given level that are required to stop
125 descending to lower levels and fetch N_SAMPLE_PAGES(index) records
126 from that level */
127 #define N_DIFF_REQUIRED(index) (N_SAMPLE_PAGES(index) * 10)
128
129 /* A dynamic array where we store the boundaries of each distinct group
130 of keys. For example if a btree level is:
131 index: 0,1,2,3,4,5,6,7,8,9,10,11,12
132 data: b,b,b,b,b,b,g,g,j,j,j, x, y
133 then we would store 5,7,10,11,12 in the array. */
134 typedef std::vector<ib_uint64_t, ut_allocator<ib_uint64_t> > boundaries_t;
135
136 /** Allocator type used for index_map_t. */
137 typedef ut_allocator<std::pair<const char* const, dict_index_t*> >
138 index_map_t_allocator;
139
140 /** Auxiliary map used for sorting indexes by name in dict_stats_save(). */
141 typedef std::map<const char*, dict_index_t*, ut_strcmp_functor,
142 index_map_t_allocator> index_map_t;
143
144 /*********************************************************************//**
145 Checks whether an index should be ignored in stats manipulations:
146 * stats fetch
147 * stats recalc
148 * stats save
149 @return true if exists and all tables are ok */
150 UNIV_INLINE
151 bool
dict_stats_should_ignore_index(const dict_index_t * index)152 dict_stats_should_ignore_index(
153 /*===========================*/
154 const dict_index_t* index) /*!< in: index */
155 {
156 return((index->type & (DICT_FTS | DICT_SPATIAL))
157 || index->is_corrupted()
158 || index->to_be_dropped
159 || !index->is_committed());
160 }
161
162 /*********************************************************************//**
163 Checks whether the persistent statistics storage exists and that all
164 tables have the proper structure.
165 @return true if exists and all tables are ok */
166 static
167 bool
dict_stats_persistent_storage_check(bool caller_has_dict_sys_mutex)168 dict_stats_persistent_storage_check(
169 /*================================*/
170 bool caller_has_dict_sys_mutex) /*!< in: true if the caller
171 owns dict_sys.mutex */
172 {
173 /* definition for the table TABLE_STATS_NAME */
174 dict_col_meta_t table_stats_columns[] = {
175 {"database_name", DATA_VARMYSQL,
176 DATA_NOT_NULL, 192},
177
178 {"table_name", DATA_VARMYSQL,
179 DATA_NOT_NULL, 597},
180
181 {"last_update", DATA_INT,
182 DATA_NOT_NULL | DATA_UNSIGNED, 4},
183
184 {"n_rows", DATA_INT,
185 DATA_NOT_NULL | DATA_UNSIGNED, 8},
186
187 {"clustered_index_size", DATA_INT,
188 DATA_NOT_NULL | DATA_UNSIGNED, 8},
189
190 {"sum_of_other_index_sizes", DATA_INT,
191 DATA_NOT_NULL | DATA_UNSIGNED, 8}
192 };
193 dict_table_schema_t table_stats_schema = {
194 TABLE_STATS_NAME,
195 UT_ARR_SIZE(table_stats_columns),
196 table_stats_columns,
197 0 /* n_foreign */,
198 0 /* n_referenced */
199 };
200
201 /* definition for the table INDEX_STATS_NAME */
202 dict_col_meta_t index_stats_columns[] = {
203 {"database_name", DATA_VARMYSQL,
204 DATA_NOT_NULL, 192},
205
206 {"table_name", DATA_VARMYSQL,
207 DATA_NOT_NULL, 597},
208
209 {"index_name", DATA_VARMYSQL,
210 DATA_NOT_NULL, 192},
211
212 {"last_update", DATA_INT,
213 DATA_NOT_NULL | DATA_UNSIGNED, 4},
214
215 {"stat_name", DATA_VARMYSQL,
216 DATA_NOT_NULL, 64*3},
217
218 {"stat_value", DATA_INT,
219 DATA_NOT_NULL | DATA_UNSIGNED, 8},
220
221 {"sample_size", DATA_INT,
222 DATA_UNSIGNED, 8},
223
224 {"stat_description", DATA_VARMYSQL,
225 DATA_NOT_NULL, 1024*3}
226 };
227 dict_table_schema_t index_stats_schema = {
228 INDEX_STATS_NAME,
229 UT_ARR_SIZE(index_stats_columns),
230 index_stats_columns,
231 0 /* n_foreign */,
232 0 /* n_referenced */
233 };
234
235 char errstr[512];
236 dberr_t ret;
237
238 if (!caller_has_dict_sys_mutex) {
239 mutex_enter(&dict_sys.mutex);
240 }
241
242 ut_ad(mutex_own(&dict_sys.mutex));
243
244 /* first check table_stats */
245 ret = dict_table_schema_check(&table_stats_schema, errstr,
246 sizeof(errstr));
247 if (ret == DB_SUCCESS) {
248 /* if it is ok, then check index_stats */
249 ret = dict_table_schema_check(&index_stats_schema, errstr,
250 sizeof(errstr));
251 }
252
253 if (!caller_has_dict_sys_mutex) {
254 mutex_exit(&dict_sys.mutex);
255 }
256
257 if (ret != DB_SUCCESS && ret != DB_STATS_DO_NOT_EXIST) {
258 ib::error() << errstr;
259 return(false);
260 } else if (ret == DB_STATS_DO_NOT_EXIST) {
261 return false;
262 }
263 /* else */
264
265 return(true);
266 }
267
268 /** Executes a given SQL statement using the InnoDB internal SQL parser.
269 This function will free the pinfo object.
270 @param[in,out] pinfo pinfo to pass to que_eval_sql() must already
271 have any literals bound to it
272 @param[in] sql SQL string to execute
273 @param[in,out] trx in case of NULL the function will allocate and
274 free the trx object. If it is not NULL then it will be rolled back
275 only in the case of error, but not freed.
276 @return DB_SUCCESS or error code */
277 static
278 dberr_t
dict_stats_exec_sql(pars_info_t * pinfo,const char * sql,trx_t * trx)279 dict_stats_exec_sql(
280 pars_info_t* pinfo,
281 const char* sql,
282 trx_t* trx)
283 {
284 dberr_t err;
285 bool trx_started = false;
286
287 ut_d(dict_sys.assert_locked());
288
289 if (!dict_stats_persistent_storage_check(true)) {
290 pars_info_free(pinfo);
291 return(DB_STATS_DO_NOT_EXIST);
292 }
293
294 if (trx == NULL) {
295 trx = trx_create();
296 trx_started = true;
297
298 if (srv_read_only_mode) {
299 trx_start_internal_read_only(trx);
300 } else {
301 trx_start_internal(trx);
302 }
303 }
304
305 err = que_eval_sql(pinfo, sql, FALSE, trx); /* pinfo is freed here */
306
307 DBUG_EXECUTE_IF("stats_index_error",
308 if (!trx_started) {
309 err = DB_STATS_DO_NOT_EXIST;
310 trx->error_state = DB_STATS_DO_NOT_EXIST;
311 });
312
313 if (!trx_started && err == DB_SUCCESS) {
314 return(DB_SUCCESS);
315 }
316
317 if (err == DB_SUCCESS) {
318 trx_commit_for_mysql(trx);
319 } else {
320 trx->op_info = "rollback of internal trx on stats tables";
321 trx->dict_operation_lock_mode = RW_X_LATCH;
322 trx->rollback();
323 trx->dict_operation_lock_mode = 0;
324 trx->op_info = "";
325 ut_a(trx->error_state == DB_SUCCESS);
326 }
327
328 if (trx_started) {
329 trx->free();
330 }
331
332 return(err);
333 }
334
335 /*********************************************************************//**
336 Duplicate a table object and its indexes.
337 This function creates a dummy dict_table_t object and initializes the
338 following table and index members:
339 dict_table_t::id (copied)
340 dict_table_t::heap (newly created)
341 dict_table_t::name (copied)
342 dict_table_t::corrupted (copied)
343 dict_table_t::indexes<> (newly created)
344 dict_table_t::magic_n
345 for each entry in dict_table_t::indexes, the following are initialized:
346 (indexes that have DICT_FTS set in index->type are skipped)
347 dict_index_t::id (copied)
348 dict_index_t::name (copied)
349 dict_index_t::table_name (points to the copied table name)
350 dict_index_t::table (points to the above semi-initialized object)
351 dict_index_t::type (copied)
352 dict_index_t::to_be_dropped (copied)
353 dict_index_t::online_status (copied)
354 dict_index_t::n_uniq (copied)
355 dict_index_t::fields[] (newly created, only first n_uniq, only fields[i].name)
356 dict_index_t::indexes<> (newly created)
357 dict_index_t::stat_n_diff_key_vals[] (only allocated, left uninitialized)
358 dict_index_t::stat_n_sample_sizes[] (only allocated, left uninitialized)
359 dict_index_t::stat_n_non_null_key_vals[] (only allocated, left uninitialized)
360 dict_index_t::magic_n
361 The returned object should be freed with dict_stats_table_clone_free()
362 when no longer needed.
363 @return incomplete table object */
364 static
365 dict_table_t*
dict_stats_table_clone_create(const dict_table_t * table)366 dict_stats_table_clone_create(
367 /*==========================*/
368 const dict_table_t* table) /*!< in: table whose stats to copy */
369 {
370 size_t heap_size;
371 dict_index_t* index;
372
373 /* Estimate the size needed for the table and all of its indexes */
374
375 heap_size = 0;
376 heap_size += sizeof(dict_table_t);
377 heap_size += strlen(table->name.m_name) + 1;
378
379 for (index = dict_table_get_first_index(table);
380 index != NULL;
381 index = dict_table_get_next_index(index)) {
382
383 if (dict_stats_should_ignore_index(index)) {
384 continue;
385 }
386
387 ut_ad(!dict_index_is_ibuf(index));
388
389 ulint n_uniq = dict_index_get_n_unique(index);
390
391 heap_size += sizeof(dict_index_t);
392 heap_size += strlen(index->name) + 1;
393 heap_size += n_uniq * sizeof(index->fields[0]);
394 for (ulint i = 0; i < n_uniq; i++) {
395 heap_size += strlen(index->fields[i].name) + 1;
396 }
397 heap_size += n_uniq * sizeof(index->stat_n_diff_key_vals[0]);
398 heap_size += n_uniq * sizeof(index->stat_n_sample_sizes[0]);
399 heap_size += n_uniq * sizeof(index->stat_n_non_null_key_vals[0]);
400 }
401
402 /* Allocate the memory and copy the members */
403
404 mem_heap_t* heap;
405
406 heap = mem_heap_create(heap_size);
407
408 dict_table_t* t;
409
410 t = (dict_table_t*) mem_heap_alloc(heap, sizeof(*t));
411
412 MEM_CHECK_DEFINED(&table->id, sizeof(table->id));
413 t->id = table->id;
414
415 t->heap = heap;
416
417 t->name.m_name = mem_heap_strdup(heap, table->name.m_name);
418
419 t->corrupted = table->corrupted;
420
421 UT_LIST_INIT(t->indexes, &dict_index_t::indexes);
422 #ifdef BTR_CUR_HASH_ADAPT
423 UT_LIST_INIT(t->freed_indexes, &dict_index_t::indexes);
424 #endif /* BTR_CUR_HASH_ADAPT */
425
426 for (index = dict_table_get_first_index(table);
427 index != NULL;
428 index = dict_table_get_next_index(index)) {
429
430 if (dict_stats_should_ignore_index(index)) {
431 continue;
432 }
433
434 ut_ad(!dict_index_is_ibuf(index));
435
436 dict_index_t* idx;
437
438 idx = (dict_index_t*) mem_heap_alloc(heap, sizeof(*idx));
439
440 MEM_CHECK_DEFINED(&index->id, sizeof(index->id));
441 idx->id = index->id;
442
443 idx->name = mem_heap_strdup(heap, index->name);
444
445 idx->table = t;
446
447 idx->type = index->type;
448
449 idx->to_be_dropped = 0;
450
451 idx->online_status = ONLINE_INDEX_COMPLETE;
452 idx->set_committed(true);
453
454 idx->n_uniq = index->n_uniq;
455
456 idx->fields = (dict_field_t*) mem_heap_alloc(
457 heap, idx->n_uniq * sizeof(idx->fields[0]));
458
459 for (ulint i = 0; i < idx->n_uniq; i++) {
460 idx->fields[i].name = mem_heap_strdup(
461 heap, index->fields[i].name);
462 }
463
464 /* hook idx into t->indexes */
465 UT_LIST_ADD_LAST(t->indexes, idx);
466
467 idx->stat_n_diff_key_vals = (ib_uint64_t*) mem_heap_alloc(
468 heap,
469 idx->n_uniq * sizeof(idx->stat_n_diff_key_vals[0]));
470
471 idx->stat_n_sample_sizes = (ib_uint64_t*) mem_heap_alloc(
472 heap,
473 idx->n_uniq * sizeof(idx->stat_n_sample_sizes[0]));
474
475 idx->stat_n_non_null_key_vals = (ib_uint64_t*) mem_heap_alloc(
476 heap,
477 idx->n_uniq * sizeof(idx->stat_n_non_null_key_vals[0]));
478 ut_d(idx->magic_n = DICT_INDEX_MAGIC_N);
479
480 idx->stat_defrag_n_page_split = 0;
481 idx->stat_defrag_n_pages_freed = 0;
482 }
483
484 ut_d(t->magic_n = DICT_TABLE_MAGIC_N);
485
486 return(t);
487 }
488
489 /*********************************************************************//**
490 Free the resources occupied by an object returned by
491 dict_stats_table_clone_create(). */
492 static
493 void
dict_stats_table_clone_free(dict_table_t * t)494 dict_stats_table_clone_free(
495 /*========================*/
496 dict_table_t* t) /*!< in: dummy table object to free */
497 {
498 mem_heap_free(t->heap);
499 }
500
501 /*********************************************************************//**
502 Write all zeros (or 1 where it makes sense) into an index
503 statistics members. The resulting stats correspond to an empty index. */
504 static
505 void
dict_stats_empty_index(dict_index_t * index,bool empty_defrag_stats)506 dict_stats_empty_index(
507 /*===================*/
508 dict_index_t* index, /*!< in/out: index */
509 bool empty_defrag_stats)
510 /*!< in: whether to empty defrag stats */
511 {
512 ut_ad(!(index->type & DICT_FTS));
513 ut_ad(!dict_index_is_ibuf(index));
514 ut_ad(mutex_own(&dict_sys.mutex));
515
516 ulint n_uniq = index->n_uniq;
517
518 for (ulint i = 0; i < n_uniq; i++) {
519 index->stat_n_diff_key_vals[i] = 0;
520 index->stat_n_sample_sizes[i] = 1;
521 index->stat_n_non_null_key_vals[i] = 0;
522 }
523
524 index->stat_index_size = 1;
525 index->stat_n_leaf_pages = 1;
526
527 if (empty_defrag_stats) {
528 dict_stats_empty_defrag_stats(index);
529 dict_stats_empty_defrag_summary(index);
530 }
531 }
532
533 /*********************************************************************//**
534 Write all zeros (or 1 where it makes sense) into a table and its indexes'
535 statistics members. The resulting stats correspond to an empty table. */
536 static
537 void
dict_stats_empty_table(dict_table_t * table,bool empty_defrag_stats)538 dict_stats_empty_table(
539 /*===================*/
540 dict_table_t* table, /*!< in/out: table */
541 bool empty_defrag_stats)
542 /*!< in: whether to empty defrag stats */
543 {
544 mutex_enter(&dict_sys.mutex);
545
546 /* Zero the stats members */
547 table->stat_n_rows = 0;
548 table->stat_clustered_index_size = 1;
549 /* 1 page for each index, not counting the clustered */
550 table->stat_sum_of_other_index_sizes
551 = UT_LIST_GET_LEN(table->indexes) - 1;
552 table->stat_modified_counter = 0;
553
554 dict_index_t* index;
555
556 for (index = dict_table_get_first_index(table);
557 index != NULL;
558 index = dict_table_get_next_index(index)) {
559
560 if (index->type & DICT_FTS) {
561 continue;
562 }
563
564 ut_ad(!dict_index_is_ibuf(index));
565
566 dict_stats_empty_index(index, empty_defrag_stats);
567 }
568
569 table->stat_initialized = TRUE;
570 mutex_exit(&dict_sys.mutex);
571 }
572
573 /*********************************************************************//**
574 Check whether index's stats are initialized (assert if they are not). */
575 static
576 void
dict_stats_assert_initialized_index(const dict_index_t * index)577 dict_stats_assert_initialized_index(
578 /*================================*/
579 const dict_index_t* index) /*!< in: index */
580 {
581 MEM_CHECK_DEFINED(
582 index->stat_n_diff_key_vals,
583 index->n_uniq * sizeof(index->stat_n_diff_key_vals[0]));
584
585 MEM_CHECK_DEFINED(
586 index->stat_n_sample_sizes,
587 index->n_uniq * sizeof(index->stat_n_sample_sizes[0]));
588
589 MEM_CHECK_DEFINED(
590 index->stat_n_non_null_key_vals,
591 index->n_uniq * sizeof(index->stat_n_non_null_key_vals[0]));
592
593 MEM_CHECK_DEFINED(
594 &index->stat_index_size,
595 sizeof(index->stat_index_size));
596
597 MEM_CHECK_DEFINED(
598 &index->stat_n_leaf_pages,
599 sizeof(index->stat_n_leaf_pages));
600 }
601
602 /*********************************************************************//**
603 Check whether table's stats are initialized (assert if they are not). */
604 static
605 void
dict_stats_assert_initialized(const dict_table_t * table)606 dict_stats_assert_initialized(
607 /*==========================*/
608 const dict_table_t* table) /*!< in: table */
609 {
610 ut_a(table->stat_initialized);
611
612 MEM_CHECK_DEFINED(&table->stats_last_recalc,
613 sizeof table->stats_last_recalc);
614
615 MEM_CHECK_DEFINED(&table->stat_persistent,
616 sizeof table->stat_persistent);
617
618 MEM_CHECK_DEFINED(&table->stats_auto_recalc,
619 sizeof table->stats_auto_recalc);
620
621 MEM_CHECK_DEFINED(&table->stats_sample_pages,
622 sizeof table->stats_sample_pages);
623
624 MEM_CHECK_DEFINED(&table->stat_n_rows,
625 sizeof table->stat_n_rows);
626
627 MEM_CHECK_DEFINED(&table->stat_clustered_index_size,
628 sizeof table->stat_clustered_index_size);
629
630 MEM_CHECK_DEFINED(&table->stat_sum_of_other_index_sizes,
631 sizeof table->stat_sum_of_other_index_sizes);
632
633 MEM_CHECK_DEFINED(&table->stat_modified_counter,
634 sizeof table->stat_modified_counter);
635
636 MEM_CHECK_DEFINED(&table->stats_bg_flag,
637 sizeof table->stats_bg_flag);
638
639 for (dict_index_t* index = dict_table_get_first_index(table);
640 index != NULL;
641 index = dict_table_get_next_index(index)) {
642
643 if (!dict_stats_should_ignore_index(index)) {
644 dict_stats_assert_initialized_index(index);
645 }
646 }
647 }
648
649 #define INDEX_EQ(i1, i2) \
650 ((i1) != NULL \
651 && (i2) != NULL \
652 && (i1)->id == (i2)->id \
653 && strcmp((i1)->name, (i2)->name) == 0)
654
655 /*********************************************************************//**
656 Copy table and index statistics from one table to another, including index
657 stats. Extra indexes in src are ignored and extra indexes in dst are
658 initialized to correspond to an empty index. */
659 static
660 void
dict_stats_copy(dict_table_t * dst,const dict_table_t * src,bool reset_ignored_indexes)661 dict_stats_copy(
662 /*============*/
663 dict_table_t* dst, /*!< in/out: destination table */
664 const dict_table_t* src, /*!< in: source table */
665 bool reset_ignored_indexes) /*!< in: if true, set ignored indexes
666 to have the same statistics as if
667 the table was empty */
668 {
669 ut_ad(mutex_own(&dict_sys.mutex));
670
671 dst->stats_last_recalc = src->stats_last_recalc;
672 dst->stat_n_rows = src->stat_n_rows;
673 dst->stat_clustered_index_size = src->stat_clustered_index_size;
674 dst->stat_sum_of_other_index_sizes = src->stat_sum_of_other_index_sizes;
675 dst->stat_modified_counter = src->stat_modified_counter;
676
677 dict_index_t* dst_idx;
678 dict_index_t* src_idx;
679
680 for (dst_idx = dict_table_get_first_index(dst),
681 src_idx = dict_table_get_first_index(src);
682 dst_idx != NULL;
683 dst_idx = dict_table_get_next_index(dst_idx),
684 (src_idx != NULL
685 && (src_idx = dict_table_get_next_index(src_idx)))) {
686
687 if (dict_stats_should_ignore_index(dst_idx)) {
688 if (reset_ignored_indexes) {
689 /* Reset index statistics for all ignored indexes,
690 unless they are FT indexes (these have no statistics)*/
691 if (dst_idx->type & DICT_FTS) {
692 continue;
693 }
694 dict_stats_empty_index(dst_idx, true);
695 } else {
696 continue;
697 }
698 }
699
700 ut_ad(!dict_index_is_ibuf(dst_idx));
701
702 if (!INDEX_EQ(src_idx, dst_idx)) {
703 for (src_idx = dict_table_get_first_index(src);
704 src_idx != NULL;
705 src_idx = dict_table_get_next_index(src_idx)) {
706
707 if (INDEX_EQ(src_idx, dst_idx)) {
708 break;
709 }
710 }
711 }
712
713 if (!INDEX_EQ(src_idx, dst_idx)) {
714 dict_stats_empty_index(dst_idx, true);
715 continue;
716 }
717
718 ulint n_copy_el;
719
720 if (dst_idx->n_uniq > src_idx->n_uniq) {
721 n_copy_el = src_idx->n_uniq;
722 /* Since src is smaller some elements in dst
723 will remain untouched by the following memmove(),
724 thus we init all of them here. */
725 dict_stats_empty_index(dst_idx, true);
726 } else {
727 n_copy_el = dst_idx->n_uniq;
728 }
729
730 memmove(dst_idx->stat_n_diff_key_vals,
731 src_idx->stat_n_diff_key_vals,
732 n_copy_el * sizeof(dst_idx->stat_n_diff_key_vals[0]));
733
734 memmove(dst_idx->stat_n_sample_sizes,
735 src_idx->stat_n_sample_sizes,
736 n_copy_el * sizeof(dst_idx->stat_n_sample_sizes[0]));
737
738 memmove(dst_idx->stat_n_non_null_key_vals,
739 src_idx->stat_n_non_null_key_vals,
740 n_copy_el * sizeof(dst_idx->stat_n_non_null_key_vals[0]));
741
742 dst_idx->stat_index_size = src_idx->stat_index_size;
743
744 dst_idx->stat_n_leaf_pages = src_idx->stat_n_leaf_pages;
745
746 dst_idx->stat_defrag_modified_counter =
747 src_idx->stat_defrag_modified_counter;
748 dst_idx->stat_defrag_n_pages_freed =
749 src_idx->stat_defrag_n_pages_freed;
750 dst_idx->stat_defrag_n_page_split =
751 src_idx->stat_defrag_n_page_split;
752 }
753
754 dst->stat_initialized = TRUE;
755 }
756
757 /** Duplicate the stats of a table and its indexes.
758 This function creates a dummy dict_table_t object and copies the input
759 table's stats into it. The returned table object is not in the dictionary
760 cache and cannot be accessed by any other threads. In addition to the
761 members copied in dict_stats_table_clone_create() this function initializes
762 the following:
763 dict_table_t::stat_initialized
764 dict_table_t::stat_persistent
765 dict_table_t::stat_n_rows
766 dict_table_t::stat_clustered_index_size
767 dict_table_t::stat_sum_of_other_index_sizes
768 dict_table_t::stat_modified_counter
769 dict_index_t::stat_n_diff_key_vals[]
770 dict_index_t::stat_n_sample_sizes[]
771 dict_index_t::stat_n_non_null_key_vals[]
772 dict_index_t::stat_index_size
773 dict_index_t::stat_n_leaf_pages
774 dict_index_t::stat_defrag_modified_counter
775 dict_index_t::stat_defrag_n_pages_freed
776 dict_index_t::stat_defrag_n_page_split
777 The returned object should be freed with dict_stats_snapshot_free()
778 when no longer needed.
779 @param[in] table table whose stats to copy
780 @return incomplete table object */
781 static
782 dict_table_t*
dict_stats_snapshot_create(dict_table_t * table)783 dict_stats_snapshot_create(
784 dict_table_t* table)
785 {
786 mutex_enter(&dict_sys.mutex);
787
788 dict_stats_assert_initialized(table);
789
790 dict_table_t* t;
791
792 t = dict_stats_table_clone_create(table);
793
794 dict_stats_copy(t, table, false);
795
796 t->stat_persistent = table->stat_persistent;
797 t->stats_auto_recalc = table->stats_auto_recalc;
798 t->stats_sample_pages = table->stats_sample_pages;
799 t->stats_bg_flag = table->stats_bg_flag;
800
801 mutex_exit(&dict_sys.mutex);
802
803 return(t);
804 }
805
806 /*********************************************************************//**
807 Free the resources occupied by an object returned by
808 dict_stats_snapshot_create(). */
809 static
810 void
dict_stats_snapshot_free(dict_table_t * t)811 dict_stats_snapshot_free(
812 /*=====================*/
813 dict_table_t* t) /*!< in: dummy table object to free */
814 {
815 dict_stats_table_clone_free(t);
816 }
817
818 /*********************************************************************//**
819 Calculates new estimates for index statistics. This function is
820 relatively quick and is used to calculate transient statistics that
821 are not saved on disk. This was the only way to calculate statistics
822 before the Persistent Statistics feature was introduced.
823 This function doesn't update the defragmentation related stats.
824 Only persistent statistics supports defragmentation stats. */
825 static
826 void
dict_stats_update_transient_for_index(dict_index_t * index)827 dict_stats_update_transient_for_index(
828 /*==================================*/
829 dict_index_t* index) /*!< in/out: index */
830 {
831 if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO
832 && (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO
833 || !dict_index_is_clust(index))) {
834 /* If we have set a high innodb_force_recovery
835 level, do not calculate statistics, as a badly
836 corrupted index can cause a crash in it.
837 Initialize some bogus index cardinality
838 statistics, so that the data can be queried in
839 various means, also via secondary indexes. */
840 mutex_enter(&dict_sys.mutex);
841 dict_stats_empty_index(index, false);
842 mutex_exit(&dict_sys.mutex);
843 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
844 } else if (ibuf_debug && !dict_index_is_clust(index)) {
845 mutex_enter(&dict_sys.mutex);
846 dict_stats_empty_index(index, false);
847 mutex_exit(&dict_sys.mutex);
848 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
849 } else {
850 mtr_t mtr;
851 ulint size;
852
853 mtr.start();
854 mtr_s_lock_index(index, &mtr);
855 size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
856
857 if (size != ULINT_UNDEFINED) {
858 index->stat_index_size = size;
859
860 size = btr_get_size(
861 index, BTR_N_LEAF_PAGES, &mtr);
862 }
863
864 mtr.commit();
865
866 switch (size) {
867 case ULINT_UNDEFINED:
868 mutex_enter(&dict_sys.mutex);
869 dict_stats_empty_index(index, false);
870 mutex_exit(&dict_sys.mutex);
871 return;
872 case 0:
873 /* The root node of the tree is a leaf */
874 size = 1;
875 }
876
877 index->stat_n_leaf_pages = size;
878
879 /* Do not continue if table decryption has failed or
880 table is already marked as corrupted. */
881 if (index->is_readable()) {
882 std::vector<index_field_stats_t> stats
883 = btr_estimate_number_of_different_key_vals(
884 index);
885
886 if (!stats.empty()) {
887 ut_ad(!mutex_own(&dict_sys.mutex));
888 mutex_enter(&dict_sys.mutex);
889 for (size_t i = 0; i < stats.size(); ++i) {
890 index->stat_n_diff_key_vals[i]
891 = stats[i].n_diff_key_vals;
892 index->stat_n_sample_sizes[i]
893 = stats[i].n_sample_sizes;
894 index->stat_n_non_null_key_vals[i]
895 = stats[i].n_non_null_key_vals;
896 }
897 mutex_exit(&dict_sys.mutex);
898 }
899 }
900 }
901 }
902
903 /*********************************************************************//**
904 Calculates new estimates for table and index statistics. This function
905 is relatively quick and is used to calculate transient statistics that
906 are not saved on disk.
907 This was the only way to calculate statistics before the
908 Persistent Statistics feature was introduced. */
909 static
910 void
dict_stats_update_transient(dict_table_t * table)911 dict_stats_update_transient(
912 /*========================*/
913 dict_table_t* table) /*!< in/out: table */
914 {
915 ut_ad(!mutex_own(&dict_sys.mutex));
916
917 dict_index_t* index;
918 ulint sum_of_index_sizes = 0;
919
920 /* Find out the sizes of the indexes and how many different values
921 for the key they approximately have */
922
923 index = dict_table_get_first_index(table);
924
925 if (!table->space) {
926 /* Nothing to do. */
927 dict_stats_empty_table(table, true);
928 return;
929 } else if (index == NULL) {
930 /* Table definition is corrupt */
931
932 ib::warn() << "Table " << table->name
933 << " has no indexes. Cannot calculate statistics.";
934 dict_stats_empty_table(table, true);
935 return;
936 }
937
938 for (; index != NULL; index = dict_table_get_next_index(index)) {
939
940 ut_ad(!dict_index_is_ibuf(index));
941
942 if (index->type & (DICT_FTS | DICT_SPATIAL)) {
943 continue;
944 }
945
946 if (dict_stats_should_ignore_index(index)
947 || !index->is_readable()) {
948 mutex_enter(&dict_sys.mutex);
949 dict_stats_empty_index(index, false);
950 mutex_exit(&dict_sys.mutex);
951 continue;
952 }
953
954 dict_stats_update_transient_for_index(index);
955
956 sum_of_index_sizes += index->stat_index_size;
957 }
958
959 mutex_enter(&dict_sys.mutex);
960
961 index = dict_table_get_first_index(table);
962
963 table->stat_n_rows = index->stat_n_diff_key_vals[
964 dict_index_get_n_unique(index) - 1];
965
966 table->stat_clustered_index_size = index->stat_index_size;
967
968 table->stat_sum_of_other_index_sizes = sum_of_index_sizes
969 - index->stat_index_size;
970
971 table->stats_last_recalc = time(NULL);
972
973 table->stat_modified_counter = 0;
974
975 table->stat_initialized = TRUE;
976
977 mutex_exit(&dict_sys.mutex);
978 }
979
980 /* @{ Pseudo code about the relation between the following functions
981
982 let N = N_SAMPLE_PAGES(index)
983
984 dict_stats_analyze_index()
985 for each n_prefix
986 search for good enough level:
987 dict_stats_analyze_index_level() // only called if level has <= N pages
988 // full scan of the level in one mtr
989 collect statistics about the given level
990 if we are not satisfied with the level, search next lower level
991 we have found a good enough level here
992 dict_stats_analyze_index_for_n_prefix(that level, stats collected above)
993 // full scan of the level in one mtr
994 dive below some records and analyze the leaf page there:
995 dict_stats_analyze_index_below_cur()
996 @} */
997
998 /*********************************************************************//**
999 Find the total number and the number of distinct keys on a given level in
1000 an index. Each of the 1..n_uniq prefixes are looked up and the results are
1001 saved in the array n_diff[0] .. n_diff[n_uniq - 1]. The total number of
1002 records on the level is saved in total_recs.
1003 Also, the index of the last record in each group of equal records is saved
1004 in n_diff_boundaries[0..n_uniq - 1], records indexing starts from the leftmost
1005 record on the level and continues cross pages boundaries, counting from 0. */
1006 static
1007 void
dict_stats_analyze_index_level(dict_index_t * index,ulint level,ib_uint64_t * n_diff,ib_uint64_t * total_recs,ib_uint64_t * total_pages,boundaries_t * n_diff_boundaries,mtr_t * mtr)1008 dict_stats_analyze_index_level(
1009 /*===========================*/
1010 dict_index_t* index, /*!< in: index */
1011 ulint level, /*!< in: level */
1012 ib_uint64_t* n_diff, /*!< out: array for number of
1013 distinct keys for all prefixes */
1014 ib_uint64_t* total_recs, /*!< out: total number of records */
1015 ib_uint64_t* total_pages, /*!< out: total number of pages */
1016 boundaries_t* n_diff_boundaries,/*!< out: boundaries of the groups
1017 of distinct keys */
1018 mtr_t* mtr) /*!< in/out: mini-transaction */
1019 {
1020 ulint n_uniq;
1021 mem_heap_t* heap;
1022 btr_pcur_t pcur;
1023 const page_t* page;
1024 const rec_t* rec;
1025 const rec_t* prev_rec;
1026 bool prev_rec_is_copied;
1027 byte* prev_rec_buf = NULL;
1028 ulint prev_rec_buf_size = 0;
1029 rec_offs* rec_offsets;
1030 rec_offs* prev_rec_offsets;
1031 ulint i;
1032
1033 DEBUG_PRINTF(" %s(table=%s, index=%s, level=" ULINTPF ")\n",
1034 __func__, index->table->name, index->name, level);
1035
1036 ut_ad(mtr->memo_contains(index->lock, MTR_MEMO_SX_LOCK));
1037
1038 n_uniq = dict_index_get_n_unique(index);
1039
1040 /* elements in the n_diff array are 0..n_uniq-1 (inclusive) */
1041 memset(n_diff, 0x0, n_uniq * sizeof(n_diff[0]));
1042
1043 /* Allocate space for the offsets header (the allocation size at
1044 offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_uniq + 1,
1045 so that this will never be less than the size calculated in
1046 rec_get_offsets_func(). */
1047 i = (REC_OFFS_HEADER_SIZE + 1 + 1) + n_uniq;
1048
1049 heap = mem_heap_create((2 * sizeof *rec_offsets) * i);
1050 rec_offsets = static_cast<rec_offs*>(
1051 mem_heap_alloc(heap, i * sizeof *rec_offsets));
1052 prev_rec_offsets = static_cast<rec_offs*>(
1053 mem_heap_alloc(heap, i * sizeof *prev_rec_offsets));
1054 rec_offs_set_n_alloc(rec_offsets, i);
1055 rec_offs_set_n_alloc(prev_rec_offsets, i);
1056
1057 /* reset the dynamic arrays n_diff_boundaries[0..n_uniq-1] */
1058 if (n_diff_boundaries != NULL) {
1059 for (i = 0; i < n_uniq; i++) {
1060 n_diff_boundaries[i].erase(
1061 n_diff_boundaries[i].begin(),
1062 n_diff_boundaries[i].end());
1063 }
1064 }
1065
1066 /* Position pcur on the leftmost record on the leftmost page
1067 on the desired level. */
1068
1069 btr_pcur_open_at_index_side(
1070 true, index, BTR_SEARCH_TREE_ALREADY_S_LATCHED,
1071 &pcur, true, level, mtr);
1072 btr_pcur_move_to_next_on_page(&pcur);
1073
1074 page = btr_pcur_get_page(&pcur);
1075
1076 /* The page must not be empty, except when
1077 it is the root page (and the whole index is empty). */
1078 ut_ad(btr_pcur_is_on_user_rec(&pcur) || page_is_leaf(page));
1079 ut_ad(btr_pcur_get_rec(&pcur)
1080 == page_rec_get_next_const(page_get_infimum_rec(page)));
1081
1082 /* check that we are indeed on the desired level */
1083 ut_a(btr_page_get_level(page) == level);
1084
1085 /* there should not be any pages on the left */
1086 ut_a(!page_has_prev(page));
1087
1088 if (REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
1089 btr_pcur_get_rec(&pcur), page_is_comp(page))) {
1090 ut_ad(btr_pcur_is_on_user_rec(&pcur));
1091 if (level == 0) {
1092 /* Skip the metadata pseudo-record */
1093 ut_ad(index->is_instant());
1094 btr_pcur_move_to_next_user_rec(&pcur, mtr);
1095 }
1096 } else {
1097 /* The first record on the leftmost page must be
1098 marked as such on each level except the leaf level. */
1099 ut_a(level == 0);
1100 }
1101
1102 prev_rec = NULL;
1103 prev_rec_is_copied = false;
1104
1105 /* no records by default */
1106 *total_recs = 0;
1107
1108 *total_pages = 0;
1109
1110 /* iterate over all user records on this level
1111 and compare each two adjacent ones, even the last on page
1112 X and the fist on page X+1 */
1113 for (;
1114 btr_pcur_is_on_user_rec(&pcur);
1115 btr_pcur_move_to_next_user_rec(&pcur, mtr)) {
1116
1117 bool rec_is_last_on_page;
1118
1119 rec = btr_pcur_get_rec(&pcur);
1120
1121 /* If rec and prev_rec are on different pages, then prev_rec
1122 must have been copied, because we hold latch only on the page
1123 where rec resides. */
1124 if (prev_rec != NULL
1125 && page_align(rec) != page_align(prev_rec)) {
1126
1127 ut_a(prev_rec_is_copied);
1128 }
1129
1130 rec_is_last_on_page =
1131 page_rec_is_supremum(page_rec_get_next_const(rec));
1132
1133 /* increment the pages counter at the end of each page */
1134 if (rec_is_last_on_page) {
1135
1136 (*total_pages)++;
1137 }
1138
1139 /* Skip delete-marked records on the leaf level. If we
1140 do not skip them, then ANALYZE quickly after DELETE
1141 could count them or not (purge may have already wiped
1142 them away) which brings non-determinism. We skip only
1143 leaf-level delete marks because delete marks on
1144 non-leaf level do not make sense. */
1145
1146 if (level == 0
1147 && !srv_stats_include_delete_marked
1148 && rec_get_deleted_flag(
1149 rec,
1150 page_is_comp(btr_pcur_get_page(&pcur)))) {
1151
1152 if (rec_is_last_on_page
1153 && !prev_rec_is_copied
1154 && prev_rec != NULL) {
1155 /* copy prev_rec */
1156
1157 prev_rec_offsets = rec_get_offsets(
1158 prev_rec, index, prev_rec_offsets,
1159 index->n_core_fields,
1160 n_uniq, &heap);
1161
1162 prev_rec = rec_copy_prefix_to_buf(
1163 prev_rec, index, n_uniq,
1164 &prev_rec_buf, &prev_rec_buf_size);
1165
1166 prev_rec_is_copied = true;
1167 }
1168
1169 continue;
1170 }
1171 rec_offsets = rec_get_offsets(rec, index, rec_offsets,
1172 level ? 0 : index->n_core_fields,
1173 n_uniq, &heap);
1174
1175 (*total_recs)++;
1176
1177 if (prev_rec != NULL) {
1178 ulint matched_fields;
1179
1180 prev_rec_offsets = rec_get_offsets(
1181 prev_rec, index, prev_rec_offsets,
1182 level ? 0 : index->n_core_fields,
1183 n_uniq, &heap);
1184
1185 cmp_rec_rec(prev_rec, rec,
1186 prev_rec_offsets, rec_offsets, index,
1187 false, &matched_fields);
1188
1189 for (i = matched_fields; i < n_uniq; i++) {
1190
1191 if (n_diff_boundaries != NULL) {
1192 /* push the index of the previous
1193 record, that is - the last one from
1194 a group of equal keys */
1195
1196 ib_uint64_t idx;
1197
1198 /* the index of the current record
1199 is total_recs - 1, the index of the
1200 previous record is total_recs - 2;
1201 we know that idx is not going to
1202 become negative here because if we
1203 are in this branch then there is a
1204 previous record and thus
1205 total_recs >= 2 */
1206 idx = *total_recs - 2;
1207
1208 n_diff_boundaries[i].push_back(idx);
1209 }
1210
1211 /* increment the number of different keys
1212 for n_prefix=i+1 (e.g. if i=0 then we increment
1213 for n_prefix=1 which is stored in n_diff[0]) */
1214 n_diff[i]++;
1215 }
1216 } else {
1217 /* this is the first non-delete marked record */
1218 for (i = 0; i < n_uniq; i++) {
1219 n_diff[i] = 1;
1220 }
1221 }
1222
1223 if (rec_is_last_on_page) {
1224 /* end of a page has been reached */
1225
1226 /* we need to copy the record instead of assigning
1227 like prev_rec = rec; because when we traverse the
1228 records on this level at some point we will jump from
1229 one page to the next and then rec and prev_rec will
1230 be on different pages and
1231 btr_pcur_move_to_next_user_rec() will release the
1232 latch on the page that prev_rec is on */
1233 prev_rec = rec_copy_prefix_to_buf(
1234 rec, index, n_uniq,
1235 &prev_rec_buf, &prev_rec_buf_size);
1236 prev_rec_is_copied = true;
1237
1238 } else {
1239 /* still on the same page, the next call to
1240 btr_pcur_move_to_next_user_rec() will not jump
1241 on the next page, we can simply assign pointers
1242 instead of copying the records like above */
1243
1244 prev_rec = rec;
1245 prev_rec_is_copied = false;
1246 }
1247 }
1248
1249 /* if *total_pages is left untouched then the above loop was not
1250 entered at all and there is one page in the whole tree which is
1251 empty or the loop was entered but this is level 0, contains one page
1252 and all records are delete-marked */
1253 if (*total_pages == 0) {
1254
1255 ut_ad(level == 0);
1256 ut_ad(*total_recs == 0);
1257
1258 *total_pages = 1;
1259 }
1260
1261 /* if there are records on this level and boundaries
1262 should be saved */
1263 if (*total_recs > 0 && n_diff_boundaries != NULL) {
1264
1265 /* remember the index of the last record on the level as the
1266 last one from the last group of equal keys; this holds for
1267 all possible prefixes */
1268 for (i = 0; i < n_uniq; i++) {
1269 ib_uint64_t idx;
1270
1271 idx = *total_recs - 1;
1272
1273 n_diff_boundaries[i].push_back(idx);
1274 }
1275 }
1276
1277 /* now in n_diff_boundaries[i] there are exactly n_diff[i] integers,
1278 for i=0..n_uniq-1 */
1279
1280 #ifdef UNIV_STATS_DEBUG
1281 for (i = 0; i < n_uniq; i++) {
1282
1283 DEBUG_PRINTF(" %s(): total recs: " UINT64PF
1284 ", total pages: " UINT64PF
1285 ", n_diff[" ULINTPF "]: " UINT64PF "\n",
1286 __func__, *total_recs,
1287 *total_pages,
1288 i, n_diff[i]);
1289
1290 #if 0
1291 if (n_diff_boundaries != NULL) {
1292 ib_uint64_t j;
1293
1294 DEBUG_PRINTF(" %s(): boundaries[%lu]: ",
1295 __func__, i);
1296
1297 for (j = 0; j < n_diff[i]; j++) {
1298 ib_uint64_t idx;
1299
1300 idx = n_diff_boundaries[i][j];
1301
1302 DEBUG_PRINTF(UINT64PF "=" UINT64PF ", ",
1303 j, idx);
1304 }
1305 DEBUG_PRINTF("\n");
1306 }
1307 #endif
1308 }
1309 #endif /* UNIV_STATS_DEBUG */
1310
1311 /* Release the latch on the last page, because that is not done by
1312 btr_pcur_close(). This function works also for non-leaf pages. */
1313 btr_leaf_page_release(btr_pcur_get_block(&pcur), BTR_SEARCH_LEAF, mtr);
1314
1315 btr_pcur_close(&pcur);
1316 ut_free(prev_rec_buf);
1317 mem_heap_free(heap);
1318 }
1319
1320 /** Scan a page, reading records from left to right and counting the number
1321 of distinct records (looking only at the first n_prefix
1322 columns) and the number of external pages pointed by records from this page.
1323 If scan_method is QUIT_ON_FIRST_NON_BORING then the function
1324 will return as soon as it finds a record that does not match its neighbor
1325 to the right, which means that in the case of QUIT_ON_FIRST_NON_BORING the
1326 returned n_diff can either be 0 (empty page), 1 (the whole page has all keys
1327 equal) or 2 (the function found a non-boring record and returned).
1328 @param[out] out_rec record, or NULL
1329 @param[out] offsets1 rec_get_offsets() working space (must
1330 be big enough)
1331 @param[out] offsets2 rec_get_offsets() working space (must
1332 be big enough)
1333 @param[in] index index of the page
1334 @param[in] page the page to scan
1335 @param[in] n_prefix look at the first n_prefix columns
1336 @param[in] n_core 0, or index->n_core_fields for leaf
1337 @param[out] n_diff number of distinct records encountered
1338 @param[out] n_external_pages if this is non-NULL then it will be set
1339 to the number of externally stored pages which were encountered
1340 @return offsets1 or offsets2 (the offsets of *out_rec),
1341 or NULL if the page is empty and does not contain user records. */
1342 UNIV_INLINE
1343 rec_offs*
dict_stats_scan_page(const rec_t ** out_rec,rec_offs * offsets1,rec_offs * offsets2,const dict_index_t * index,const page_t * page,ulint n_prefix,ulint n_core,ib_uint64_t * n_diff,ib_uint64_t * n_external_pages)1344 dict_stats_scan_page(
1345 const rec_t** out_rec,
1346 rec_offs* offsets1,
1347 rec_offs* offsets2,
1348 const dict_index_t* index,
1349 const page_t* page,
1350 ulint n_prefix,
1351 ulint n_core,
1352 ib_uint64_t* n_diff,
1353 ib_uint64_t* n_external_pages)
1354 {
1355 rec_offs* offsets_rec = offsets1;
1356 rec_offs* offsets_next_rec = offsets2;
1357 const rec_t* rec;
1358 const rec_t* next_rec;
1359 /* A dummy heap, to be passed to rec_get_offsets().
1360 Because offsets1,offsets2 should be big enough,
1361 this memory heap should never be used. */
1362 mem_heap_t* heap = NULL;
1363 ut_ad(!!n_core == page_is_leaf(page));
1364 const rec_t* (*get_next)(const rec_t*)
1365 = !n_core || srv_stats_include_delete_marked
1366 ? page_rec_get_next_const
1367 : page_rec_get_next_non_del_marked;
1368
1369 const bool should_count_external_pages = n_external_pages != NULL;
1370
1371 if (should_count_external_pages) {
1372 *n_external_pages = 0;
1373 }
1374
1375 rec = get_next(page_get_infimum_rec(page));
1376
1377 if (page_rec_is_supremum(rec)) {
1378 /* the page is empty or contains only delete-marked records */
1379 *n_diff = 0;
1380 *out_rec = NULL;
1381 return(NULL);
1382 }
1383
1384 offsets_rec = rec_get_offsets(rec, index, offsets_rec, n_core,
1385 ULINT_UNDEFINED, &heap);
1386
1387 if (should_count_external_pages) {
1388 *n_external_pages += btr_rec_get_externally_stored_len(
1389 rec, offsets_rec);
1390 }
1391
1392 next_rec = get_next(rec);
1393
1394 *n_diff = 1;
1395
1396 while (!page_rec_is_supremum(next_rec)) {
1397
1398 ulint matched_fields;
1399
1400 offsets_next_rec = rec_get_offsets(next_rec, index,
1401 offsets_next_rec, n_core,
1402 ULINT_UNDEFINED,
1403 &heap);
1404
1405 /* check whether rec != next_rec when looking at
1406 the first n_prefix fields */
1407 cmp_rec_rec(rec, next_rec, offsets_rec, offsets_next_rec,
1408 index, false, &matched_fields);
1409
1410 if (matched_fields < n_prefix) {
1411 /* rec != next_rec, => rec is non-boring */
1412
1413 (*n_diff)++;
1414
1415 if (!n_core) {
1416 break;
1417 }
1418 }
1419
1420 rec = next_rec;
1421 /* Assign offsets_rec = offsets_next_rec so that
1422 offsets_rec matches with rec which was just assigned
1423 rec = next_rec above. Also need to point
1424 offsets_next_rec to the place where offsets_rec was
1425 pointing before because we have just 2 placeholders
1426 where data is actually stored: offsets1 and offsets2
1427 and we are using them in circular fashion
1428 (offsets[_next]_rec are just pointers to those
1429 placeholders). */
1430 std::swap(offsets_rec, offsets_next_rec);
1431
1432 if (should_count_external_pages) {
1433 *n_external_pages += btr_rec_get_externally_stored_len(
1434 rec, offsets_rec);
1435 }
1436
1437 next_rec = get_next(next_rec);
1438 }
1439
1440 /* offsets1,offsets2 should have been big enough */
1441 ut_a(heap == NULL);
1442 *out_rec = rec;
1443 return(offsets_rec);
1444 }
1445
1446 /** Dive below the current position of a cursor and calculate the number of
1447 distinct records on the leaf page, when looking at the fist n_prefix
1448 columns. Also calculate the number of external pages pointed by records
1449 on the leaf page.
1450 @param[in] cur cursor
1451 @param[in] n_prefix look at the first n_prefix columns
1452 when comparing records
1453 @param[out] n_diff number of distinct records
1454 @param[out] n_external_pages number of external pages
1455 @return number of distinct records on the leaf page */
1456 static
1457 void
dict_stats_analyze_index_below_cur(const btr_cur_t * cur,ulint n_prefix,ib_uint64_t * n_diff,ib_uint64_t * n_external_pages)1458 dict_stats_analyze_index_below_cur(
1459 const btr_cur_t* cur,
1460 ulint n_prefix,
1461 ib_uint64_t* n_diff,
1462 ib_uint64_t* n_external_pages)
1463 {
1464 dict_index_t* index;
1465 buf_block_t* block;
1466 const page_t* page;
1467 mem_heap_t* heap;
1468 const rec_t* rec;
1469 rec_offs* offsets1;
1470 rec_offs* offsets2;
1471 rec_offs* offsets_rec;
1472 ulint size;
1473 mtr_t mtr;
1474
1475 index = btr_cur_get_index(cur);
1476
1477 /* Allocate offsets for the record and the node pointer, for
1478 node pointer records. In a secondary index, the node pointer
1479 record will consist of all index fields followed by a child
1480 page number.
1481 Allocate space for the offsets header (the allocation size at
1482 offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_fields + 1,
1483 so that this will never be less than the size calculated in
1484 rec_get_offsets_func(). */
1485 size = (1 + REC_OFFS_HEADER_SIZE) + 1 + dict_index_get_n_fields(index);
1486
1487 heap = mem_heap_create(size * (sizeof *offsets1 + sizeof *offsets2));
1488
1489 offsets1 = static_cast<rec_offs*>(mem_heap_alloc(
1490 heap, size * sizeof *offsets1));
1491
1492 offsets2 = static_cast<rec_offs*>(mem_heap_alloc(
1493 heap, size * sizeof *offsets2));
1494
1495 rec_offs_set_n_alloc(offsets1, size);
1496 rec_offs_set_n_alloc(offsets2, size);
1497
1498 rec = btr_cur_get_rec(cur);
1499 page = page_align(rec);
1500 ut_ad(!page_rec_is_leaf(rec));
1501
1502 offsets_rec = rec_get_offsets(rec, index, offsets1, 0,
1503 ULINT_UNDEFINED, &heap);
1504
1505 page_id_t page_id(index->table->space_id,
1506 btr_node_ptr_get_child_page_no(
1507 rec, offsets_rec));
1508 const ulint zip_size = index->table->space->zip_size();
1509
1510 /* assume no external pages by default - in case we quit from this
1511 function without analyzing any leaf pages */
1512 *n_external_pages = 0;
1513
1514 mtr_start(&mtr);
1515
1516 /* descend to the leaf level on the B-tree */
1517 for (;;) {
1518
1519 dberr_t err = DB_SUCCESS;
1520
1521 block = buf_page_get_gen(page_id, zip_size,
1522 RW_S_LATCH, NULL, BUF_GET,
1523 __FILE__, __LINE__, &mtr, &err,
1524 !index->is_clust()
1525 && 1 == btr_page_get_level(page));
1526
1527 page = buf_block_get_frame(block);
1528
1529 if (page_is_leaf(page)) {
1530 /* leaf level */
1531 break;
1532 }
1533 /* else */
1534
1535 /* search for the first non-boring record on the page */
1536 offsets_rec = dict_stats_scan_page(
1537 &rec, offsets1, offsets2, index, page, n_prefix,
1538 0, n_diff, NULL);
1539
1540 /* pages on level > 0 are not allowed to be empty */
1541 ut_a(offsets_rec != NULL);
1542 /* if page is not empty (offsets_rec != NULL) then n_diff must
1543 be > 0, otherwise there is a bug in dict_stats_scan_page() */
1544 ut_a(*n_diff > 0);
1545
1546 if (*n_diff == 1) {
1547 mtr_commit(&mtr);
1548
1549 /* page has all keys equal and the end of the page
1550 was reached by dict_stats_scan_page(), no need to
1551 descend to the leaf level */
1552 mem_heap_free(heap);
1553 /* can't get an estimate for n_external_pages here
1554 because we do not dive to the leaf level, assume no
1555 external pages (*n_external_pages was assigned to 0
1556 above). */
1557 return;
1558 }
1559 /* else */
1560
1561 /* when we instruct dict_stats_scan_page() to quit on the
1562 first non-boring record it finds, then the returned n_diff
1563 can either be 0 (empty page), 1 (page has all keys equal) or
1564 2 (non-boring record was found) */
1565 ut_a(*n_diff == 2);
1566
1567 /* we have a non-boring record in rec, descend below it */
1568
1569 page_id.set_page_no(
1570 btr_node_ptr_get_child_page_no(rec, offsets_rec));
1571 }
1572
1573 /* make sure we got a leaf page as a result from the above loop */
1574 ut_ad(page_is_leaf(page));
1575
1576 /* scan the leaf page and find the number of distinct keys,
1577 when looking only at the first n_prefix columns; also estimate
1578 the number of externally stored pages pointed by records on this
1579 page */
1580
1581 offsets_rec = dict_stats_scan_page(
1582 &rec, offsets1, offsets2, index, page, n_prefix,
1583 index->n_core_fields, n_diff,
1584 n_external_pages);
1585
1586 #if 0
1587 DEBUG_PRINTF(" %s(): n_diff below page_no=%lu: " UINT64PF "\n",
1588 __func__, page_no, n_diff);
1589 #endif
1590
1591 mtr_commit(&mtr);
1592 mem_heap_free(heap);
1593 }
1594
1595 /** Input data that is used to calculate dict_index_t::stat_n_diff_key_vals[]
1596 for each n-columns prefix (n from 1 to n_uniq). */
1597 struct n_diff_data_t {
1598 /** Index of the level on which the descent through the btree
1599 stopped. level 0 is the leaf level. This is >= 1 because we
1600 avoid scanning the leaf level because it may contain too many
1601 pages and doing so is useless when combined with the random dives -
1602 if we are to scan the leaf level, this means a full scan and we can
1603 simply do that instead of fiddling with picking random records higher
1604 in the tree and to dive below them. At the start of the analyzing
1605 we may decide to do full scan of the leaf level, but then this
1606 structure is not used in that code path. */
1607 ulint level;
1608
1609 /** Number of records on the level where the descend through the btree
1610 stopped. When we scan the btree from the root, we stop at some mid
1611 level, choose some records from it and dive below them towards a leaf
1612 page to analyze. */
1613 ib_uint64_t n_recs_on_level;
1614
1615 /** Number of different key values that were found on the mid level. */
1616 ib_uint64_t n_diff_on_level;
1617
1618 /** Number of leaf pages that are analyzed. This is also the same as
1619 the number of records that we pick from the mid level and dive below
1620 them. */
1621 ib_uint64_t n_leaf_pages_to_analyze;
1622
1623 /** Cumulative sum of the number of different key values that were
1624 found on all analyzed pages. */
1625 ib_uint64_t n_diff_all_analyzed_pages;
1626
1627 /** Cumulative sum of the number of external pages (stored outside of
1628 the btree but in the same file segment). */
1629 ib_uint64_t n_external_pages_sum;
1630 };
1631
1632 /** Estimate the number of different key values in an index when looking at
1633 the first n_prefix columns. For a given level in an index select
1634 n_diff_data->n_leaf_pages_to_analyze records from that level and dive below
1635 them to the corresponding leaf pages, then scan those leaf pages and save the
1636 sampling results in n_diff_data->n_diff_all_analyzed_pages.
1637 @param[in] index index
1638 @param[in] n_prefix look at first 'n_prefix' columns when
1639 comparing records
1640 @param[in] boundaries a vector that contains
1641 n_diff_data->n_diff_on_level integers each of which represents the index (on
1642 level 'level', counting from left/smallest to right/biggest from 0) of the
1643 last record from each group of distinct keys
1644 @param[in,out] n_diff_data n_diff_all_analyzed_pages and
1645 n_external_pages_sum in this structure will be set by this function. The
1646 members level, n_diff_on_level and n_leaf_pages_to_analyze must be set by the
1647 caller in advance - they are used by some calculations inside this function
1648 @param[in,out] mtr mini-transaction */
1649 static
1650 void
dict_stats_analyze_index_for_n_prefix(dict_index_t * index,ulint n_prefix,const boundaries_t * boundaries,n_diff_data_t * n_diff_data,mtr_t * mtr)1651 dict_stats_analyze_index_for_n_prefix(
1652 dict_index_t* index,
1653 ulint n_prefix,
1654 const boundaries_t* boundaries,
1655 n_diff_data_t* n_diff_data,
1656 mtr_t* mtr)
1657 {
1658 btr_pcur_t pcur;
1659 const page_t* page;
1660 ib_uint64_t rec_idx;
1661 ib_uint64_t i;
1662
1663 #if 0
1664 DEBUG_PRINTF(" %s(table=%s, index=%s, level=%lu, n_prefix=%lu,"
1665 " n_diff_on_level=" UINT64PF ")\n",
1666 __func__, index->table->name, index->name, level,
1667 n_prefix, n_diff_data->n_diff_on_level);
1668 #endif
1669
1670 ut_ad(mtr->memo_contains(index->lock, MTR_MEMO_SX_LOCK));
1671
1672 /* Position pcur on the leftmost record on the leftmost page
1673 on the desired level. */
1674
1675 btr_pcur_open_at_index_side(
1676 true, index, BTR_SEARCH_TREE_ALREADY_S_LATCHED,
1677 &pcur, true, n_diff_data->level, mtr);
1678 btr_pcur_move_to_next_on_page(&pcur);
1679
1680 page = btr_pcur_get_page(&pcur);
1681
1682 const rec_t* first_rec = btr_pcur_get_rec(&pcur);
1683
1684 /* We shouldn't be scanning the leaf level. The caller of this function
1685 should have stopped the descend on level 1 or higher. */
1686 ut_ad(n_diff_data->level > 0);
1687 ut_ad(!page_is_leaf(page));
1688
1689 /* The page must not be empty, except when
1690 it is the root page (and the whole index is empty). */
1691 ut_ad(btr_pcur_is_on_user_rec(&pcur));
1692 ut_ad(first_rec == page_rec_get_next_const(page_get_infimum_rec(page)));
1693
1694 /* check that we are indeed on the desired level */
1695 ut_a(btr_page_get_level(page) == n_diff_data->level);
1696
1697 /* there should not be any pages on the left */
1698 ut_a(!page_has_prev(page));
1699
1700 /* check whether the first record on the leftmost page is marked
1701 as such; we are on a non-leaf level */
1702 ut_a(rec_get_info_bits(first_rec, page_is_comp(page))
1703 & REC_INFO_MIN_REC_FLAG);
1704
1705 const ib_uint64_t last_idx_on_level = boundaries->at(
1706 static_cast<unsigned>(n_diff_data->n_diff_on_level - 1));
1707
1708 rec_idx = 0;
1709
1710 n_diff_data->n_diff_all_analyzed_pages = 0;
1711 n_diff_data->n_external_pages_sum = 0;
1712
1713 for (i = 0; i < n_diff_data->n_leaf_pages_to_analyze; i++) {
1714 /* there are n_diff_on_level elements
1715 in 'boundaries' and we divide those elements
1716 into n_leaf_pages_to_analyze segments, for example:
1717
1718 let n_diff_on_level=100, n_leaf_pages_to_analyze=4, then:
1719 segment i=0: [0, 24]
1720 segment i=1: [25, 49]
1721 segment i=2: [50, 74]
1722 segment i=3: [75, 99] or
1723
1724 let n_diff_on_level=1, n_leaf_pages_to_analyze=1, then:
1725 segment i=0: [0, 0] or
1726
1727 let n_diff_on_level=2, n_leaf_pages_to_analyze=2, then:
1728 segment i=0: [0, 0]
1729 segment i=1: [1, 1] or
1730
1731 let n_diff_on_level=13, n_leaf_pages_to_analyze=7, then:
1732 segment i=0: [0, 0]
1733 segment i=1: [1, 2]
1734 segment i=2: [3, 4]
1735 segment i=3: [5, 6]
1736 segment i=4: [7, 8]
1737 segment i=5: [9, 10]
1738 segment i=6: [11, 12]
1739
1740 then we select a random record from each segment and dive
1741 below it */
1742 const ib_uint64_t n_diff = n_diff_data->n_diff_on_level;
1743 const ib_uint64_t n_pick
1744 = n_diff_data->n_leaf_pages_to_analyze;
1745
1746 const ib_uint64_t left = n_diff * i / n_pick;
1747 const ib_uint64_t right = n_diff * (i + 1) / n_pick - 1;
1748
1749 ut_a(left <= right);
1750 ut_a(right <= last_idx_on_level);
1751
1752 const ulint rnd = ut_rnd_interval(
1753 static_cast<ulint>(right - left));
1754
1755 const ib_uint64_t dive_below_idx
1756 = boundaries->at(static_cast<unsigned>(left + rnd));
1757
1758 #if 0
1759 DEBUG_PRINTF(" %s(): dive below record with index="
1760 UINT64PF "\n", __func__, dive_below_idx);
1761 #endif
1762
1763 /* seek to the record with index dive_below_idx */
1764 while (rec_idx < dive_below_idx
1765 && btr_pcur_is_on_user_rec(&pcur)) {
1766
1767 btr_pcur_move_to_next_user_rec(&pcur, mtr);
1768 rec_idx++;
1769 }
1770
1771 /* if the level has finished before the record we are
1772 searching for, this means that the B-tree has changed in
1773 the meantime, quit our sampling and use whatever stats
1774 we have collected so far */
1775 if (rec_idx < dive_below_idx) {
1776
1777 ut_ad(!btr_pcur_is_on_user_rec(&pcur));
1778 break;
1779 }
1780
1781 /* it could be that the tree has changed in such a way that
1782 the record under dive_below_idx is the supremum record, in
1783 this case rec_idx == dive_below_idx and pcur is positioned
1784 on the supremum, we do not want to dive below it */
1785 if (!btr_pcur_is_on_user_rec(&pcur)) {
1786 break;
1787 }
1788
1789 ut_a(rec_idx == dive_below_idx);
1790
1791 ib_uint64_t n_diff_on_leaf_page;
1792 ib_uint64_t n_external_pages;
1793
1794 dict_stats_analyze_index_below_cur(btr_pcur_get_btr_cur(&pcur),
1795 n_prefix,
1796 &n_diff_on_leaf_page,
1797 &n_external_pages);
1798
1799 /* We adjust n_diff_on_leaf_page here to avoid counting
1800 one value twice - once as the last on some page and once
1801 as the first on another page. Consider the following example:
1802 Leaf level:
1803 page: (2,2,2,2,3,3)
1804 ... many pages like (3,3,3,3,3,3) ...
1805 page: (3,3,3,3,5,5)
1806 ... many pages like (5,5,5,5,5,5) ...
1807 page: (5,5,5,5,8,8)
1808 page: (8,8,8,8,9,9)
1809 our algo would (correctly) get an estimate that there are
1810 2 distinct records per page (average). Having 4 pages below
1811 non-boring records, it would (wrongly) estimate the number
1812 of distinct records to 8. */
1813 if (n_diff_on_leaf_page > 0) {
1814 n_diff_on_leaf_page--;
1815 }
1816
1817 n_diff_data->n_diff_all_analyzed_pages += n_diff_on_leaf_page;
1818
1819 n_diff_data->n_external_pages_sum += n_external_pages;
1820 }
1821
1822 btr_pcur_close(&pcur);
1823 }
1824
1825 /** statistics for an index */
1826 struct index_stats_t
1827 {
1828 std::vector<index_field_stats_t> stats;
1829 ulint index_size;
1830 ulint n_leaf_pages;
1831
index_stats_tindex_stats_t1832 index_stats_t(ulint n_uniq) : index_size(1), n_leaf_pages(1)
1833 {
1834 stats.reserve(n_uniq);
1835 for (ulint i= 0; i < n_uniq; ++i)
1836 stats.push_back(index_field_stats_t(0, 1, 0));
1837 }
1838 };
1839
1840 /** Set dict_index_t::stat_n_diff_key_vals[] and stat_n_sample_sizes[].
1841 @param[in] n_diff_data input data to use to derive the results
1842 @param[in,out] index_stats index stats to set */
1843 UNIV_INLINE
1844 void
dict_stats_index_set_n_diff(const n_diff_data_t * n_diff_data,index_stats_t & index_stats)1845 dict_stats_index_set_n_diff(
1846 const n_diff_data_t* n_diff_data,
1847 index_stats_t& index_stats)
1848 {
1849 for (ulint n_prefix = index_stats.stats.size();
1850 n_prefix >= 1;
1851 n_prefix--) {
1852 /* n_diff_all_analyzed_pages can be 0 here if
1853 all the leaf pages sampled contained only
1854 delete-marked records. In this case we should assign
1855 0 to index->stat_n_diff_key_vals[n_prefix - 1], which
1856 the formula below does. */
1857
1858 const n_diff_data_t* data = &n_diff_data[n_prefix - 1];
1859
1860 ut_ad(data->n_leaf_pages_to_analyze > 0);
1861 ut_ad(data->n_recs_on_level > 0);
1862
1863 ib_uint64_t n_ordinary_leaf_pages;
1864
1865 if (data->level == 1) {
1866 /* If we know the number of records on level 1, then
1867 this number is the same as the number of pages on
1868 level 0 (leaf). */
1869 n_ordinary_leaf_pages = data->n_recs_on_level;
1870 } else {
1871 /* If we analyzed D ordinary leaf pages and found E
1872 external pages in total linked from those D ordinary
1873 leaf pages, then this means that the ratio
1874 ordinary/external is D/E. Then the ratio ordinary/total
1875 is D / (D + E). Knowing that the total number of pages
1876 is T (including ordinary and external) then we estimate
1877 that the total number of ordinary leaf pages is
1878 T * D / (D + E). */
1879 n_ordinary_leaf_pages
1880 = index_stats.n_leaf_pages
1881 * data->n_leaf_pages_to_analyze
1882 / (data->n_leaf_pages_to_analyze
1883 + data->n_external_pages_sum);
1884 }
1885
1886 /* See REF01 for an explanation of the algorithm */
1887 index_stats.stats[n_prefix - 1].n_diff_key_vals
1888 = n_ordinary_leaf_pages
1889
1890 * data->n_diff_on_level
1891 / data->n_recs_on_level
1892
1893 * data->n_diff_all_analyzed_pages
1894 / data->n_leaf_pages_to_analyze;
1895
1896 index_stats.stats[n_prefix - 1].n_sample_sizes
1897 = data->n_leaf_pages_to_analyze;
1898
1899 DEBUG_PRINTF(" %s(): n_diff=" UINT64PF
1900 " for n_prefix=" ULINTPF
1901 " (" ULINTPF
1902 " * " UINT64PF " / " UINT64PF
1903 " * " UINT64PF " / " UINT64PF ")\n",
1904 __func__,
1905 index_stats.stats[n_prefix - 1].n_diff_key_vals,
1906 n_prefix,
1907 index_stats.n_leaf_pages,
1908 data->n_diff_on_level,
1909 data->n_recs_on_level,
1910 data->n_diff_all_analyzed_pages,
1911 data->n_leaf_pages_to_analyze);
1912 }
1913 }
1914
1915 /** Calculates new statistics for a given index and saves them to the index
1916 members stat_n_diff_key_vals[], stat_n_sample_sizes[], stat_index_size and
1917 stat_n_leaf_pages. This function can be slow.
1918 @param[in] index index to analyze
1919 @return index stats */
dict_stats_analyze_index(dict_index_t * index)1920 static index_stats_t dict_stats_analyze_index(dict_index_t* index)
1921 {
1922 ulint root_level;
1923 ulint level;
1924 bool level_is_analyzed;
1925 ulint n_uniq;
1926 ulint n_prefix;
1927 ib_uint64_t total_recs;
1928 ib_uint64_t total_pages;
1929 mtr_t mtr;
1930 ulint size;
1931 index_stats_t result(index->n_uniq);
1932 DBUG_ENTER("dict_stats_analyze_index");
1933
1934 DBUG_PRINT("info", ("index: %s, online status: %d", index->name(),
1935 dict_index_get_online_status(index)));
1936
1937 ut_ad(!mutex_own(&dict_sys.mutex)); // because this function is slow
1938 ut_ad(index->table->get_ref_count());
1939
1940 /* Disable update statistic for Rtree */
1941 if (dict_index_is_spatial(index)) {
1942 DBUG_RETURN(result);
1943 }
1944
1945 DEBUG_PRINTF(" %s(index=%s)\n", __func__, index->name());
1946
1947 mtr.start();
1948 mtr_s_lock_index(index, &mtr);
1949 size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
1950
1951 if (size != ULINT_UNDEFINED) {
1952 result.index_size = size;
1953 size = btr_get_size(index, BTR_N_LEAF_PAGES, &mtr);
1954 }
1955
1956 /* Release the X locks on the root page taken by btr_get_size() */
1957 mtr.commit();
1958
1959 switch (size) {
1960 case ULINT_UNDEFINED:
1961 dict_stats_assert_initialized_index(index);
1962 DBUG_RETURN(result);
1963 case 0:
1964 /* The root node of the tree is a leaf */
1965 size = 1;
1966 }
1967
1968 result.n_leaf_pages = size;
1969
1970 mtr.start();
1971 mtr_sx_lock_index(index, &mtr);
1972 root_level = btr_height_get(index, &mtr);
1973
1974 n_uniq = dict_index_get_n_unique(index);
1975
1976 /* If the tree has just one level (and one page) or if the user
1977 has requested to sample too many pages then do full scan.
1978
1979 For each n-column prefix (for n=1..n_uniq) N_SAMPLE_PAGES(index)
1980 will be sampled, so in total N_SAMPLE_PAGES(index) * n_uniq leaf
1981 pages will be sampled. If that number is bigger than the total
1982 number of leaf pages then do full scan of the leaf level instead
1983 since it will be faster and will give better results. */
1984
1985 if (root_level == 0
1986 || N_SAMPLE_PAGES(index) * n_uniq > result.n_leaf_pages) {
1987
1988 if (root_level == 0) {
1989 DEBUG_PRINTF(" %s(): just one page,"
1990 " doing full scan\n", __func__);
1991 } else {
1992 DEBUG_PRINTF(" %s(): too many pages requested for"
1993 " sampling, doing full scan\n", __func__);
1994 }
1995
1996 /* do full scan of level 0; save results directly
1997 into the index */
1998
1999 dict_stats_analyze_index_level(index,
2000 0 /* leaf level */,
2001 index->stat_n_diff_key_vals,
2002 &total_recs,
2003 &total_pages,
2004 NULL /* boundaries not needed */,
2005 &mtr);
2006
2007 mtr.commit();
2008
2009 mutex_enter(&dict_sys.mutex);
2010 for (ulint i = 0; i < n_uniq; i++) {
2011 result.stats[i].n_diff_key_vals = index->stat_n_diff_key_vals[i];
2012 result.stats[i].n_sample_sizes = total_pages;
2013 result.stats[i].n_non_null_key_vals = index->stat_n_non_null_key_vals[i];
2014 }
2015 result.n_leaf_pages = index->stat_n_leaf_pages;
2016 mutex_exit(&dict_sys.mutex);
2017
2018 DBUG_RETURN(result);
2019 }
2020
2021 /* For each level that is being scanned in the btree, this contains the
2022 number of different key values for all possible n-column prefixes. */
2023 ib_uint64_t* n_diff_on_level = UT_NEW_ARRAY(
2024 ib_uint64_t, n_uniq, mem_key_dict_stats_n_diff_on_level);
2025
2026 /* For each level that is being scanned in the btree, this contains the
2027 index of the last record from each group of equal records (when
2028 comparing only the first n columns, n=1..n_uniq). */
2029 boundaries_t* n_diff_boundaries = UT_NEW_ARRAY_NOKEY(boundaries_t,
2030 n_uniq);
2031
2032 /* For each n-column prefix this array contains the input data that is
2033 used to calculate dict_index_t::stat_n_diff_key_vals[]. */
2034 n_diff_data_t* n_diff_data = UT_NEW_ARRAY_NOKEY(n_diff_data_t, n_uniq);
2035
2036 /* total_recs is also used to estimate the number of pages on one
2037 level below, so at the start we have 1 page (the root) */
2038 total_recs = 1;
2039
2040 /* Here we use the following optimization:
2041 If we find that level L is the first one (searching from the
2042 root) that contains at least D distinct keys when looking at
2043 the first n_prefix columns, then:
2044 if we look at the first n_prefix-1 columns then the first
2045 level that contains D distinct keys will be either L or a
2046 lower one.
2047 So if we find that the first level containing D distinct
2048 keys (on n_prefix columns) is L, we continue from L when
2049 searching for D distinct keys on n_prefix-1 columns. */
2050 level = root_level;
2051 level_is_analyzed = false;
2052
2053 for (n_prefix = n_uniq; n_prefix >= 1; n_prefix--) {
2054
2055 DEBUG_PRINTF(" %s(): searching level with >=%llu "
2056 "distinct records, n_prefix=" ULINTPF "\n",
2057 __func__, N_DIFF_REQUIRED(index), n_prefix);
2058
2059 /* Commit the mtr to release the tree S lock to allow
2060 other threads to do some work too. */
2061 mtr.commit();
2062 mtr.start();
2063 mtr_sx_lock_index(index, &mtr);
2064 if (root_level != btr_height_get(index, &mtr)) {
2065 /* Just quit if the tree has changed beyond
2066 recognition here. The old stats from previous
2067 runs will remain in the values that we have
2068 not calculated yet. Initially when the index
2069 object is created the stats members are given
2070 some sensible values so leaving them untouched
2071 here even the first time will not cause us to
2072 read uninitialized memory later. */
2073 break;
2074 }
2075
2076 /* check whether we should pick the current level;
2077 we pick level 1 even if it does not have enough
2078 distinct records because we do not want to scan the
2079 leaf level because it may contain too many records */
2080 if (level_is_analyzed
2081 && (n_diff_on_level[n_prefix - 1] >= N_DIFF_REQUIRED(index)
2082 || level == 1)) {
2083
2084 goto found_level;
2085 }
2086
2087 /* search for a level that contains enough distinct records */
2088
2089 if (level_is_analyzed && level > 1) {
2090
2091 /* if this does not hold we should be on
2092 "found_level" instead of here */
2093 ut_ad(n_diff_on_level[n_prefix - 1]
2094 < N_DIFF_REQUIRED(index));
2095
2096 level--;
2097 level_is_analyzed = false;
2098 }
2099
2100 /* descend into the tree, searching for "good enough" level */
2101 for (;;) {
2102
2103 /* make sure we do not scan the leaf level
2104 accidentally, it may contain too many pages */
2105 ut_ad(level > 0);
2106
2107 /* scanning the same level twice is an optimization
2108 bug */
2109 ut_ad(!level_is_analyzed);
2110
2111 /* Do not scan if this would read too many pages.
2112 Here we use the following fact:
2113 the number of pages on level L equals the number
2114 of records on level L+1, thus we deduce that the
2115 following call would scan total_recs pages, because
2116 total_recs is left from the previous iteration when
2117 we scanned one level upper or we have not scanned any
2118 levels yet in which case total_recs is 1. */
2119 if (total_recs > N_SAMPLE_PAGES(index)) {
2120
2121 /* if the above cond is true then we are
2122 not at the root level since on the root
2123 level total_recs == 1 (set before we
2124 enter the n-prefix loop) and cannot
2125 be > N_SAMPLE_PAGES(index) */
2126 ut_a(level != root_level);
2127
2128 /* step one level back and be satisfied with
2129 whatever it contains */
2130 level++;
2131 level_is_analyzed = true;
2132
2133 break;
2134 }
2135
2136 dict_stats_analyze_index_level(index,
2137 level,
2138 n_diff_on_level,
2139 &total_recs,
2140 &total_pages,
2141 n_diff_boundaries,
2142 &mtr);
2143
2144 level_is_analyzed = true;
2145
2146 if (level == 1
2147 || n_diff_on_level[n_prefix - 1]
2148 >= N_DIFF_REQUIRED(index)) {
2149 /* we have reached the last level we could scan
2150 or we found a good level with many distinct
2151 records */
2152 break;
2153 }
2154
2155 level--;
2156 level_is_analyzed = false;
2157 }
2158 found_level:
2159
2160 DEBUG_PRINTF(" %s(): found level " ULINTPF
2161 " that has " UINT64PF
2162 " distinct records for n_prefix=" ULINTPF "\n",
2163 __func__, level, n_diff_on_level[n_prefix - 1],
2164 n_prefix);
2165 /* here we are either on level 1 or the level that we are on
2166 contains >= N_DIFF_REQUIRED distinct keys or we did not scan
2167 deeper levels because they would contain too many pages */
2168
2169 ut_ad(level > 0);
2170
2171 ut_ad(level_is_analyzed);
2172
2173 /* if any of these is 0 then there is exactly one page in the
2174 B-tree and it is empty and we should have done full scan and
2175 should not be here */
2176 ut_ad(total_recs > 0);
2177 ut_ad(n_diff_on_level[n_prefix - 1] > 0);
2178
2179 ut_ad(N_SAMPLE_PAGES(index) > 0);
2180
2181 n_diff_data_t* data = &n_diff_data[n_prefix - 1];
2182
2183 data->level = level;
2184
2185 data->n_recs_on_level = total_recs;
2186
2187 data->n_diff_on_level = n_diff_on_level[n_prefix - 1];
2188
2189 data->n_leaf_pages_to_analyze = std::min(
2190 N_SAMPLE_PAGES(index),
2191 n_diff_on_level[n_prefix - 1]);
2192
2193 /* pick some records from this level and dive below them for
2194 the given n_prefix */
2195
2196 dict_stats_analyze_index_for_n_prefix(
2197 index, n_prefix, &n_diff_boundaries[n_prefix - 1],
2198 data, &mtr);
2199 }
2200
2201 mtr.commit();
2202
2203 UT_DELETE_ARRAY(n_diff_boundaries);
2204
2205 UT_DELETE_ARRAY(n_diff_on_level);
2206
2207 /* n_prefix == 0 means that the above loop did not end up prematurely
2208 due to tree being changed and so n_diff_data[] is set up. */
2209 if (n_prefix == 0) {
2210 dict_stats_index_set_n_diff(n_diff_data, result);
2211 }
2212
2213 UT_DELETE_ARRAY(n_diff_data);
2214
2215 DBUG_RETURN(result);
2216 }
2217
2218 /*********************************************************************//**
2219 Calculates new estimates for table and index statistics. This function
2220 is relatively slow and is used to calculate persistent statistics that
2221 will be saved on disk.
2222 @return DB_SUCCESS or error code */
2223 static
2224 dberr_t
dict_stats_update_persistent(dict_table_t * table)2225 dict_stats_update_persistent(
2226 /*=========================*/
2227 dict_table_t* table) /*!< in/out: table */
2228 {
2229 dict_index_t* index;
2230
2231 DEBUG_PRINTF("%s(table=%s)\n", __func__, table->name);
2232
2233 DEBUG_SYNC_C("dict_stats_update_persistent");
2234
2235 /* analyze the clustered index first */
2236
2237 index = dict_table_get_first_index(table);
2238
2239 if (index == NULL
2240 || index->is_corrupted()
2241 || (index->type | DICT_UNIQUE) != (DICT_CLUSTERED | DICT_UNIQUE)) {
2242
2243 /* Table definition is corrupt */
2244 dict_stats_empty_table(table, true);
2245
2246 return(DB_CORRUPTION);
2247 }
2248
2249 ut_ad(!dict_index_is_ibuf(index));
2250 mutex_enter(&dict_sys.mutex);
2251 dict_stats_empty_index(index, false);
2252 mutex_exit(&dict_sys.mutex);
2253
2254 index_stats_t stats = dict_stats_analyze_index(index);
2255
2256 mutex_enter(&dict_sys.mutex);
2257 index->stat_index_size = stats.index_size;
2258 index->stat_n_leaf_pages = stats.n_leaf_pages;
2259 for (size_t i = 0; i < stats.stats.size(); ++i) {
2260 index->stat_n_diff_key_vals[i] = stats.stats[i].n_diff_key_vals;
2261 index->stat_n_sample_sizes[i] = stats.stats[i].n_sample_sizes;
2262 index->stat_n_non_null_key_vals[i] = stats.stats[i].n_non_null_key_vals;
2263 }
2264
2265 ulint n_unique = dict_index_get_n_unique(index);
2266
2267 table->stat_n_rows = index->stat_n_diff_key_vals[n_unique - 1];
2268
2269 table->stat_clustered_index_size = index->stat_index_size;
2270
2271 /* analyze other indexes from the table, if any */
2272
2273 table->stat_sum_of_other_index_sizes = 0;
2274
2275 for (index = dict_table_get_next_index(index);
2276 index != NULL;
2277 index = dict_table_get_next_index(index)) {
2278
2279 ut_ad(!dict_index_is_ibuf(index));
2280
2281 if (index->type & (DICT_FTS | DICT_SPATIAL)) {
2282 continue;
2283 }
2284
2285 dict_stats_empty_index(index, false);
2286
2287 if (dict_stats_should_ignore_index(index)) {
2288 continue;
2289 }
2290
2291 if (!(table->stats_bg_flag & BG_STAT_SHOULD_QUIT)) {
2292 mutex_exit(&dict_sys.mutex);
2293 stats = dict_stats_analyze_index(index);
2294 mutex_enter(&dict_sys.mutex);
2295
2296 index->stat_index_size = stats.index_size;
2297 index->stat_n_leaf_pages = stats.n_leaf_pages;
2298 for (size_t i = 0; i < stats.stats.size(); ++i) {
2299 index->stat_n_diff_key_vals[i]
2300 = stats.stats[i].n_diff_key_vals;
2301 index->stat_n_sample_sizes[i]
2302 = stats.stats[i].n_sample_sizes;
2303 index->stat_n_non_null_key_vals[i]
2304 = stats.stats[i].n_non_null_key_vals;
2305 }
2306 }
2307
2308 table->stat_sum_of_other_index_sizes
2309 += index->stat_index_size;
2310 }
2311
2312 table->stats_last_recalc = time(NULL);
2313
2314 table->stat_modified_counter = 0;
2315
2316 table->stat_initialized = TRUE;
2317
2318 dict_stats_assert_initialized(table);
2319
2320 mutex_exit(&dict_sys.mutex);
2321
2322 return(DB_SUCCESS);
2323 }
2324
2325 #include "mysql_com.h"
2326 /** Save an individual index's statistic into the persistent statistics
2327 storage.
2328 @param[in] index index to be updated
2329 @param[in] last_update timestamp of the stat
2330 @param[in] stat_name name of the stat
2331 @param[in] stat_value value of the stat
2332 @param[in] sample_size n pages sampled or NULL
2333 @param[in] stat_description description of the stat
2334 @param[in,out] trx in case of NULL the function will
2335 allocate and free the trx object. If it is not NULL then it will be
2336 rolled back only in the case of error, but not freed.
2337 @return DB_SUCCESS or error code */
2338 dberr_t
dict_stats_save_index_stat(dict_index_t * index,time_t last_update,const char * stat_name,ib_uint64_t stat_value,ib_uint64_t * sample_size,const char * stat_description,trx_t * trx)2339 dict_stats_save_index_stat(
2340 dict_index_t* index,
2341 time_t last_update,
2342 const char* stat_name,
2343 ib_uint64_t stat_value,
2344 ib_uint64_t* sample_size,
2345 const char* stat_description,
2346 trx_t* trx)
2347 {
2348 dberr_t ret;
2349 pars_info_t* pinfo;
2350 char db_utf8[MAX_DB_UTF8_LEN];
2351 char table_utf8[MAX_TABLE_UTF8_LEN];
2352
2353 ut_ad(!trx || trx->internal || trx->mysql_thd);
2354 ut_d(dict_sys.assert_locked());
2355
2356 dict_fs2utf8(index->table->name.m_name, db_utf8, sizeof(db_utf8),
2357 table_utf8, sizeof(table_utf8));
2358
2359 pinfo = pars_info_create();
2360 pars_info_add_str_literal(pinfo, "database_name", db_utf8);
2361 pars_info_add_str_literal(pinfo, "table_name", table_utf8);
2362 pars_info_add_str_literal(pinfo, "index_name", index->name);
2363 MEM_CHECK_DEFINED(&last_update, 4);
2364 pars_info_add_int4_literal(pinfo, "last_update", uint32(last_update));
2365 MEM_CHECK_DEFINED(stat_name, strlen(stat_name));
2366 pars_info_add_str_literal(pinfo, "stat_name", stat_name);
2367 MEM_CHECK_DEFINED(&stat_value, 8);
2368 pars_info_add_ull_literal(pinfo, "stat_value", stat_value);
2369 if (sample_size != NULL) {
2370 MEM_CHECK_DEFINED(sample_size, 8);
2371 pars_info_add_ull_literal(pinfo, "sample_size", *sample_size);
2372 } else {
2373 pars_info_add_literal(pinfo, "sample_size", NULL,
2374 UNIV_SQL_NULL, DATA_FIXBINARY, 0);
2375 }
2376 pars_info_add_str_literal(pinfo, "stat_description",
2377 stat_description);
2378
2379 ret = dict_stats_exec_sql(
2380 pinfo,
2381 "PROCEDURE INDEX_STATS_SAVE () IS\n"
2382 "BEGIN\n"
2383
2384 "DELETE FROM \"" INDEX_STATS_NAME "\"\n"
2385 "WHERE\n"
2386 "database_name = :database_name AND\n"
2387 "table_name = :table_name AND\n"
2388 "index_name = :index_name AND\n"
2389 "stat_name = :stat_name;\n"
2390
2391 "INSERT INTO \"" INDEX_STATS_NAME "\"\n"
2392 "VALUES\n"
2393 "(\n"
2394 ":database_name,\n"
2395 ":table_name,\n"
2396 ":index_name,\n"
2397 ":last_update,\n"
2398 ":stat_name,\n"
2399 ":stat_value,\n"
2400 ":sample_size,\n"
2401 ":stat_description\n"
2402 ");\n"
2403 "END;", trx);
2404
2405 if (UNIV_UNLIKELY(ret != DB_SUCCESS)) {
2406 if (innodb_index_stats_not_found == false &&
2407 index->stats_error_printed == false) {
2408 ib::error() << "Cannot save index statistics for table "
2409 << index->table->name
2410 << ", index " << index->name
2411 << ", stat name \"" << stat_name << "\": "
2412 << ret;
2413 index->stats_error_printed = true;
2414 }
2415 }
2416
2417 return(ret);
2418 }
2419
2420 /** Report an error if updating table statistics failed because
2421 .ibd file is missing, table decryption failed or table is corrupted.
2422 @param[in,out] table Table
2423 @param[in] defragment true if statistics is for defragment
2424 @retval DB_DECRYPTION_FAILED if decryption of the table failed
2425 @retval DB_TABLESPACE_DELETED if .ibd file is missing
2426 @retval DB_CORRUPTION if table is marked as corrupted */
2427 dberr_t
dict_stats_report_error(dict_table_t * table,bool defragment)2428 dict_stats_report_error(dict_table_t* table, bool defragment)
2429 {
2430 dberr_t err;
2431
2432 const char* df = defragment ? " defragment" : "";
2433
2434 if (!table->space) {
2435 ib::warn() << "Cannot save" << df << " statistics for table "
2436 << table->name
2437 << " because the .ibd file is missing. "
2438 << TROUBLESHOOTING_MSG;
2439 err = DB_TABLESPACE_DELETED;
2440 } else {
2441 ib::warn() << "Cannot save" << df << " statistics for table "
2442 << table->name
2443 << " because file "
2444 << table->space->chain.start->name
2445 << (table->corrupted
2446 ? " is corrupted."
2447 : " cannot be decrypted.");
2448 err = table->corrupted ? DB_CORRUPTION : DB_DECRYPTION_FAILED;
2449 }
2450
2451 dict_stats_empty_table(table, defragment);
2452 return err;
2453 }
2454
2455 /** Save the table's statistics into the persistent statistics storage.
2456 @param[in] table_orig table whose stats to save
2457 @param[in] only_for_index if this is non-NULL, then stats for indexes
2458 that are not equal to it will not be saved, if NULL, then all indexes' stats
2459 are saved
2460 @return DB_SUCCESS or error code */
2461 static
2462 dberr_t
dict_stats_save(dict_table_t * table_orig,const index_id_t * only_for_index)2463 dict_stats_save(
2464 dict_table_t* table_orig,
2465 const index_id_t* only_for_index)
2466 {
2467 pars_info_t* pinfo;
2468 dberr_t ret;
2469 dict_table_t* table;
2470 char db_utf8[MAX_DB_UTF8_LEN];
2471 char table_utf8[MAX_TABLE_UTF8_LEN];
2472
2473 if (high_level_read_only) {
2474 return DB_READ_ONLY;
2475 }
2476
2477 if (!table_orig->is_readable()) {
2478 return (dict_stats_report_error(table_orig));
2479 }
2480
2481 table = dict_stats_snapshot_create(table_orig);
2482
2483 dict_fs2utf8(table->name.m_name, db_utf8, sizeof(db_utf8),
2484 table_utf8, sizeof(table_utf8));
2485
2486 const time_t now = time(NULL);
2487 dict_sys_lock();
2488
2489 pinfo = pars_info_create();
2490
2491 pars_info_add_str_literal(pinfo, "database_name", db_utf8);
2492 pars_info_add_str_literal(pinfo, "table_name", table_utf8);
2493 pars_info_add_int4_literal(pinfo, "last_update", uint32(now));
2494 pars_info_add_ull_literal(pinfo, "n_rows", table->stat_n_rows);
2495 pars_info_add_ull_literal(pinfo, "clustered_index_size",
2496 table->stat_clustered_index_size);
2497 pars_info_add_ull_literal(pinfo, "sum_of_other_index_sizes",
2498 table->stat_sum_of_other_index_sizes);
2499
2500 ret = dict_stats_exec_sql(
2501 pinfo,
2502 "PROCEDURE TABLE_STATS_SAVE () IS\n"
2503 "BEGIN\n"
2504
2505 "DELETE FROM \"" TABLE_STATS_NAME "\"\n"
2506 "WHERE\n"
2507 "database_name = :database_name AND\n"
2508 "table_name = :table_name;\n"
2509
2510 "INSERT INTO \"" TABLE_STATS_NAME "\"\n"
2511 "VALUES\n"
2512 "(\n"
2513 ":database_name,\n"
2514 ":table_name,\n"
2515 ":last_update,\n"
2516 ":n_rows,\n"
2517 ":clustered_index_size,\n"
2518 ":sum_of_other_index_sizes\n"
2519 ");\n"
2520 "END;", NULL);
2521
2522 if (UNIV_UNLIKELY(ret != DB_SUCCESS)) {
2523 ib::error() << "Cannot save table statistics for table "
2524 << table->name << ": " << ret;
2525 func_exit:
2526 dict_sys_unlock();
2527 dict_stats_snapshot_free(table);
2528 return ret;
2529 }
2530
2531 trx_t* trx = trx_create();
2532 trx_start_internal(trx);
2533
2534 dict_index_t* index;
2535 index_map_t indexes(
2536 (ut_strcmp_functor()),
2537 index_map_t_allocator(mem_key_dict_stats_index_map_t));
2538
2539 /* Below we do all the modifications in innodb_index_stats in a single
2540 transaction for performance reasons. Modifying more than one row in a
2541 single transaction may deadlock with other transactions if they
2542 lock the rows in different order. Other transaction could be for
2543 example when we DROP a table and do
2544 DELETE FROM innodb_index_stats WHERE database_name = '...'
2545 AND table_name = '...'; which will affect more than one row. To
2546 prevent deadlocks we always lock the rows in the same order - the
2547 order of the PK, which is (database_name, table_name, index_name,
2548 stat_name). This is why below we sort the indexes by name and then
2549 for each index, do the mods ordered by stat_name. */
2550
2551 for (index = dict_table_get_first_index(table);
2552 index != NULL;
2553 index = dict_table_get_next_index(index)) {
2554
2555 indexes[index->name] = index;
2556 }
2557
2558 index_map_t::const_iterator it;
2559
2560 for (it = indexes.begin(); it != indexes.end(); ++it) {
2561
2562 index = it->second;
2563
2564 if (only_for_index != NULL && index->id != *only_for_index) {
2565 continue;
2566 }
2567
2568 if (dict_stats_should_ignore_index(index)) {
2569 continue;
2570 }
2571
2572 ut_ad(!dict_index_is_ibuf(index));
2573
2574 for (unsigned i = 0; i < index->n_uniq; i++) {
2575
2576 char stat_name[16];
2577 char stat_description[1024];
2578
2579 snprintf(stat_name, sizeof(stat_name),
2580 "n_diff_pfx%02u", i + 1);
2581
2582 /* craft a string that contains the column names */
2583 snprintf(stat_description, sizeof(stat_description),
2584 "%s", index->fields[0].name());
2585 for (unsigned j = 1; j <= i; j++) {
2586 size_t len;
2587
2588 len = strlen(stat_description);
2589
2590 snprintf(stat_description + len,
2591 sizeof(stat_description) - len,
2592 ",%s", index->fields[j].name());
2593 }
2594
2595 ret = dict_stats_save_index_stat(
2596 index, now, stat_name,
2597 index->stat_n_diff_key_vals[i],
2598 &index->stat_n_sample_sizes[i],
2599 stat_description, trx);
2600
2601 if (ret != DB_SUCCESS) {
2602 goto end;
2603 }
2604 }
2605
2606 ret = dict_stats_save_index_stat(index, now, "n_leaf_pages",
2607 index->stat_n_leaf_pages,
2608 NULL,
2609 "Number of leaf pages "
2610 "in the index", trx);
2611 if (ret != DB_SUCCESS) {
2612 goto end;
2613 }
2614
2615 ret = dict_stats_save_index_stat(index, now, "size",
2616 index->stat_index_size,
2617 NULL,
2618 "Number of pages "
2619 "in the index", trx);
2620 if (ret != DB_SUCCESS) {
2621 goto end;
2622 }
2623 }
2624
2625 trx_commit_for_mysql(trx);
2626
2627 end:
2628 trx->free();
2629 goto func_exit;
2630 }
2631
2632 /*********************************************************************//**
2633 Called for the row that is selected by
2634 SELECT ... FROM mysql.innodb_table_stats WHERE table='...'
2635 The second argument is a pointer to the table and the fetched stats are
2636 written to it.
2637 @return non-NULL dummy */
2638 static
2639 ibool
dict_stats_fetch_table_stats_step(void * node_void,void * table_void)2640 dict_stats_fetch_table_stats_step(
2641 /*==============================*/
2642 void* node_void, /*!< in: select node */
2643 void* table_void) /*!< out: table */
2644 {
2645 sel_node_t* node = (sel_node_t*) node_void;
2646 dict_table_t* table = (dict_table_t*) table_void;
2647 que_common_t* cnode;
2648 int i;
2649
2650 /* this should loop exactly 3 times - for
2651 n_rows,clustered_index_size,sum_of_other_index_sizes */
2652 for (cnode = static_cast<que_common_t*>(node->select_list), i = 0;
2653 cnode != NULL;
2654 cnode = static_cast<que_common_t*>(que_node_get_next(cnode)),
2655 i++) {
2656
2657 const byte* data;
2658 dfield_t* dfield = que_node_get_val(cnode);
2659 dtype_t* type = dfield_get_type(dfield);
2660 ulint len = dfield_get_len(dfield);
2661
2662 data = static_cast<const byte*>(dfield_get_data(dfield));
2663
2664 switch (i) {
2665 case 0: /* mysql.innodb_table_stats.n_rows */
2666
2667 ut_a(dtype_get_mtype(type) == DATA_INT);
2668 ut_a(len == 8);
2669
2670 table->stat_n_rows = mach_read_from_8(data);
2671
2672 break;
2673
2674 case 1: /* mysql.innodb_table_stats.clustered_index_size */
2675
2676 ut_a(dtype_get_mtype(type) == DATA_INT);
2677 ut_a(len == 8);
2678
2679 table->stat_clustered_index_size
2680 = (ulint) mach_read_from_8(data);
2681
2682 break;
2683
2684 case 2: /* mysql.innodb_table_stats.sum_of_other_index_sizes */
2685
2686 ut_a(dtype_get_mtype(type) == DATA_INT);
2687 ut_a(len == 8);
2688
2689 table->stat_sum_of_other_index_sizes
2690 = (ulint) mach_read_from_8(data);
2691
2692 break;
2693
2694 default:
2695
2696 /* someone changed SELECT
2697 n_rows,clustered_index_size,sum_of_other_index_sizes
2698 to select more columns from innodb_table_stats without
2699 adjusting here */
2700 ut_error;
2701 }
2702 }
2703
2704 /* if i < 3 this means someone changed the
2705 SELECT n_rows,clustered_index_size,sum_of_other_index_sizes
2706 to select less columns from innodb_table_stats without adjusting here;
2707 if i > 3 we would have ut_error'ed earlier */
2708 ut_a(i == 3 /*n_rows,clustered_index_size,sum_of_other_index_sizes*/);
2709
2710 /* XXX this is not used but returning non-NULL is necessary */
2711 return(TRUE);
2712 }
2713
2714 /** Aux struct used to pass a table and a boolean to
2715 dict_stats_fetch_index_stats_step(). */
2716 struct index_fetch_t {
2717 dict_table_t* table; /*!< table whose indexes are to be modified */
2718 bool stats_were_modified; /*!< will be set to true if at
2719 least one index stats were modified */
2720 };
2721
2722 /*********************************************************************//**
2723 Called for the rows that are selected by
2724 SELECT ... FROM mysql.innodb_index_stats WHERE table='...'
2725 The second argument is a pointer to the table and the fetched stats are
2726 written to its indexes.
2727 Let a table has N indexes and each index has Ui unique columns for i=1..N,
2728 then mysql.innodb_index_stats will have SUM(Ui) i=1..N rows for that table.
2729 So this function will be called SUM(Ui) times where SUM(Ui) is of magnitude
2730 N*AVG(Ui). In each call it searches for the currently fetched index into
2731 table->indexes linearly, assuming this list is not sorted. Thus, overall,
2732 fetching all indexes' stats from mysql.innodb_index_stats is O(N^2) where N
2733 is the number of indexes.
2734 This can be improved if we sort table->indexes in a temporary area just once
2735 and then search in that sorted list. Then the complexity will be O(N*log(N)).
2736 We assume a table will not have more than 100 indexes, so we go with the
2737 simpler N^2 algorithm.
2738 @return non-NULL dummy */
2739 static
2740 ibool
dict_stats_fetch_index_stats_step(void * node_void,void * arg_void)2741 dict_stats_fetch_index_stats_step(
2742 /*==============================*/
2743 void* node_void, /*!< in: select node */
2744 void* arg_void) /*!< out: table + a flag that tells if we
2745 modified anything */
2746 {
2747 sel_node_t* node = (sel_node_t*) node_void;
2748 index_fetch_t* arg = (index_fetch_t*) arg_void;
2749 dict_table_t* table = arg->table;
2750 dict_index_t* index = NULL;
2751 que_common_t* cnode;
2752 const char* stat_name = NULL;
2753 ulint stat_name_len = ULINT_UNDEFINED;
2754 ib_uint64_t stat_value = UINT64_UNDEFINED;
2755 ib_uint64_t sample_size = UINT64_UNDEFINED;
2756 int i;
2757
2758 /* this should loop exactly 4 times - for the columns that
2759 were selected: index_name,stat_name,stat_value,sample_size */
2760 for (cnode = static_cast<que_common_t*>(node->select_list), i = 0;
2761 cnode != NULL;
2762 cnode = static_cast<que_common_t*>(que_node_get_next(cnode)),
2763 i++) {
2764
2765 const byte* data;
2766 dfield_t* dfield = que_node_get_val(cnode);
2767 dtype_t* type = dfield_get_type(dfield);
2768 ulint len = dfield_get_len(dfield);
2769
2770 data = static_cast<const byte*>(dfield_get_data(dfield));
2771
2772 switch (i) {
2773 case 0: /* mysql.innodb_index_stats.index_name */
2774
2775 ut_a(dtype_get_mtype(type) == DATA_VARMYSQL);
2776
2777 /* search for index in table's indexes whose name
2778 matches data; the fetched index name is in data,
2779 has no terminating '\0' and has length len */
2780 for (index = dict_table_get_first_index(table);
2781 index != NULL;
2782 index = dict_table_get_next_index(index)) {
2783
2784 if (index->is_committed()
2785 && strlen(index->name) == len
2786 && memcmp(index->name, data, len) == 0) {
2787 /* the corresponding index was found */
2788 break;
2789 }
2790 }
2791
2792 /* if index is NULL here this means that
2793 mysql.innodb_index_stats contains more rows than the
2794 number of indexes in the table; this is ok, we just
2795 return ignoring those extra rows; in other words
2796 dict_stats_fetch_index_stats_step() has been called
2797 for a row from index_stats with unknown index_name
2798 column */
2799 if (index == NULL) {
2800
2801 return(TRUE);
2802 }
2803
2804 break;
2805
2806 case 1: /* mysql.innodb_index_stats.stat_name */
2807
2808 ut_a(dtype_get_mtype(type) == DATA_VARMYSQL);
2809
2810 ut_a(index != NULL);
2811
2812 stat_name = (const char*) data;
2813 stat_name_len = len;
2814
2815 break;
2816
2817 case 2: /* mysql.innodb_index_stats.stat_value */
2818
2819 ut_a(dtype_get_mtype(type) == DATA_INT);
2820 ut_a(len == 8);
2821
2822 ut_a(index != NULL);
2823 ut_a(stat_name != NULL);
2824 ut_a(stat_name_len != ULINT_UNDEFINED);
2825
2826 stat_value = mach_read_from_8(data);
2827
2828 break;
2829
2830 case 3: /* mysql.innodb_index_stats.sample_size */
2831
2832 ut_a(dtype_get_mtype(type) == DATA_INT);
2833 ut_a(len == 8 || len == UNIV_SQL_NULL);
2834
2835 ut_a(index != NULL);
2836 ut_a(stat_name != NULL);
2837 ut_a(stat_name_len != ULINT_UNDEFINED);
2838 ut_a(stat_value != UINT64_UNDEFINED);
2839
2840 if (len == UNIV_SQL_NULL) {
2841 break;
2842 }
2843 /* else */
2844
2845 sample_size = mach_read_from_8(data);
2846
2847 break;
2848
2849 default:
2850
2851 /* someone changed
2852 SELECT index_name,stat_name,stat_value,sample_size
2853 to select more columns from innodb_index_stats without
2854 adjusting here */
2855 ut_error;
2856 }
2857 }
2858
2859 /* if i < 4 this means someone changed the
2860 SELECT index_name,stat_name,stat_value,sample_size
2861 to select less columns from innodb_index_stats without adjusting here;
2862 if i > 4 we would have ut_error'ed earlier */
2863 ut_a(i == 4 /* index_name,stat_name,stat_value,sample_size */);
2864
2865 ut_a(index != NULL);
2866 ut_a(stat_name != NULL);
2867 ut_a(stat_name_len != ULINT_UNDEFINED);
2868 ut_a(stat_value != UINT64_UNDEFINED);
2869 /* sample_size could be UINT64_UNDEFINED here, if it is NULL */
2870
2871 #define PFX "n_diff_pfx"
2872 #define PFX_LEN 10
2873
2874 if (stat_name_len == 4 /* strlen("size") */
2875 && strncasecmp("size", stat_name, stat_name_len) == 0) {
2876 index->stat_index_size = (ulint) stat_value;
2877 arg->stats_were_modified = true;
2878 } else if (stat_name_len == 12 /* strlen("n_leaf_pages") */
2879 && strncasecmp("n_leaf_pages", stat_name, stat_name_len)
2880 == 0) {
2881 index->stat_n_leaf_pages = (ulint) stat_value;
2882 arg->stats_were_modified = true;
2883 } else if (stat_name_len == 12 /* strlen("n_page_split") */
2884 && strncasecmp("n_page_split", stat_name, stat_name_len)
2885 == 0) {
2886 index->stat_defrag_n_page_split = (ulint) stat_value;
2887 arg->stats_were_modified = true;
2888 } else if (stat_name_len == 13 /* strlen("n_pages_freed") */
2889 && strncasecmp("n_pages_freed", stat_name, stat_name_len)
2890 == 0) {
2891 index->stat_defrag_n_pages_freed = (ulint) stat_value;
2892 arg->stats_were_modified = true;
2893 } else if (stat_name_len > PFX_LEN /* e.g. stat_name=="n_diff_pfx01" */
2894 && strncasecmp(PFX, stat_name, PFX_LEN) == 0) {
2895
2896 const char* num_ptr;
2897 unsigned long n_pfx;
2898
2899 /* point num_ptr into "1" from "n_diff_pfx12..." */
2900 num_ptr = stat_name + PFX_LEN;
2901
2902 /* stat_name should have exactly 2 chars appended to PFX
2903 and they should be digits */
2904 if (stat_name_len != PFX_LEN + 2
2905 || num_ptr[0] < '0' || num_ptr[0] > '9'
2906 || num_ptr[1] < '0' || num_ptr[1] > '9') {
2907
2908 char db_utf8[MAX_DB_UTF8_LEN];
2909 char table_utf8[MAX_TABLE_UTF8_LEN];
2910
2911 dict_fs2utf8(table->name.m_name,
2912 db_utf8, sizeof(db_utf8),
2913 table_utf8, sizeof(table_utf8));
2914
2915 ib::info out;
2916 out << "Ignoring strange row from "
2917 << INDEX_STATS_NAME_PRINT << " WHERE"
2918 " database_name = '" << db_utf8
2919 << "' AND table_name = '" << table_utf8
2920 << "' AND index_name = '" << index->name()
2921 << "' AND stat_name = '";
2922 out.write(stat_name, stat_name_len);
2923 out << "'; because stat_name is malformed";
2924 return(TRUE);
2925 }
2926 /* else */
2927
2928 /* extract 12 from "n_diff_pfx12..." into n_pfx
2929 note that stat_name does not have a terminating '\0' */
2930 n_pfx = ulong(num_ptr[0] - '0') * 10 + ulong(num_ptr[1] - '0');
2931
2932 ulint n_uniq = index->n_uniq;
2933
2934 if (n_pfx == 0 || n_pfx > n_uniq) {
2935
2936 char db_utf8[MAX_DB_UTF8_LEN];
2937 char table_utf8[MAX_TABLE_UTF8_LEN];
2938
2939 dict_fs2utf8(table->name.m_name,
2940 db_utf8, sizeof(db_utf8),
2941 table_utf8, sizeof(table_utf8));
2942
2943 ib::info out;
2944 out << "Ignoring strange row from "
2945 << INDEX_STATS_NAME_PRINT << " WHERE"
2946 " database_name = '" << db_utf8
2947 << "' AND table_name = '" << table_utf8
2948 << "' AND index_name = '" << index->name()
2949 << "' AND stat_name = '";
2950 out.write(stat_name, stat_name_len);
2951 out << "'; because stat_name is out of range, the index"
2952 " has " << n_uniq << " unique columns";
2953
2954 return(TRUE);
2955 }
2956 /* else */
2957
2958 index->stat_n_diff_key_vals[n_pfx - 1] = stat_value;
2959
2960 if (sample_size != UINT64_UNDEFINED) {
2961 index->stat_n_sample_sizes[n_pfx - 1] = sample_size;
2962 } else {
2963 /* hmm, strange... the user must have UPDATEd the
2964 table manually and SET sample_size = NULL */
2965 index->stat_n_sample_sizes[n_pfx - 1] = 0;
2966 }
2967
2968 index->stat_n_non_null_key_vals[n_pfx - 1] = 0;
2969
2970 arg->stats_were_modified = true;
2971 } else {
2972 /* silently ignore rows with unknown stat_name, the
2973 user may have developed her own stats */
2974 }
2975
2976 /* XXX this is not used but returning non-NULL is necessary */
2977 return(TRUE);
2978 }
2979
2980 /*********************************************************************//**
2981 Read table's statistics from the persistent statistics storage.
2982 @return DB_SUCCESS or error code */
2983 static
2984 dberr_t
dict_stats_fetch_from_ps(dict_table_t * table)2985 dict_stats_fetch_from_ps(
2986 /*=====================*/
2987 dict_table_t* table) /*!< in/out: table */
2988 {
2989 index_fetch_t index_fetch_arg;
2990 trx_t* trx;
2991 pars_info_t* pinfo;
2992 dberr_t ret;
2993 char db_utf8[MAX_DB_UTF8_LEN];
2994 char table_utf8[MAX_TABLE_UTF8_LEN];
2995
2996 ut_ad(!mutex_own(&dict_sys.mutex));
2997
2998 /* Initialize all stats to dummy values before fetching because if
2999 the persistent storage contains incomplete stats (e.g. missing stats
3000 for some index) then we would end up with (partially) uninitialized
3001 stats. */
3002 dict_stats_empty_table(table, true);
3003
3004 trx = trx_create();
3005
3006 /* Use 'read-uncommitted' so that the SELECTs we execute
3007 do not get blocked in case some user has locked the rows we
3008 are SELECTing */
3009
3010 trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
3011
3012 if (srv_read_only_mode) {
3013 trx_start_internal_read_only(trx);
3014 } else {
3015 trx_start_internal(trx);
3016 }
3017
3018 dict_fs2utf8(table->name.m_name, db_utf8, sizeof(db_utf8),
3019 table_utf8, sizeof(table_utf8));
3020
3021 pinfo = pars_info_create();
3022
3023 pars_info_add_str_literal(pinfo, "database_name", db_utf8);
3024
3025 pars_info_add_str_literal(pinfo, "table_name", table_utf8);
3026
3027 pars_info_bind_function(pinfo,
3028 "fetch_table_stats_step",
3029 dict_stats_fetch_table_stats_step,
3030 table);
3031
3032 index_fetch_arg.table = table;
3033 index_fetch_arg.stats_were_modified = false;
3034 pars_info_bind_function(pinfo,
3035 "fetch_index_stats_step",
3036 dict_stats_fetch_index_stats_step,
3037 &index_fetch_arg);
3038
3039 ret = que_eval_sql(pinfo,
3040 "PROCEDURE FETCH_STATS () IS\n"
3041 "found INT;\n"
3042 "DECLARE FUNCTION fetch_table_stats_step;\n"
3043 "DECLARE FUNCTION fetch_index_stats_step;\n"
3044 "DECLARE CURSOR table_stats_cur IS\n"
3045 " SELECT\n"
3046 /* if you change the selected fields, be
3047 sure to adjust
3048 dict_stats_fetch_table_stats_step() */
3049 " n_rows,\n"
3050 " clustered_index_size,\n"
3051 " sum_of_other_index_sizes\n"
3052 " FROM \"" TABLE_STATS_NAME "\"\n"
3053 " WHERE\n"
3054 " database_name = :database_name AND\n"
3055 " table_name = :table_name;\n"
3056 "DECLARE CURSOR index_stats_cur IS\n"
3057 " SELECT\n"
3058 /* if you change the selected fields, be
3059 sure to adjust
3060 dict_stats_fetch_index_stats_step() */
3061 " index_name,\n"
3062 " stat_name,\n"
3063 " stat_value,\n"
3064 " sample_size\n"
3065 " FROM \"" INDEX_STATS_NAME "\"\n"
3066 " WHERE\n"
3067 " database_name = :database_name AND\n"
3068 " table_name = :table_name;\n"
3069
3070 "BEGIN\n"
3071
3072 "OPEN table_stats_cur;\n"
3073 "FETCH table_stats_cur INTO\n"
3074 " fetch_table_stats_step();\n"
3075 "IF (SQL % NOTFOUND) THEN\n"
3076 " CLOSE table_stats_cur;\n"
3077 " RETURN;\n"
3078 "END IF;\n"
3079 "CLOSE table_stats_cur;\n"
3080
3081 "OPEN index_stats_cur;\n"
3082 "found := 1;\n"
3083 "WHILE found = 1 LOOP\n"
3084 " FETCH index_stats_cur INTO\n"
3085 " fetch_index_stats_step();\n"
3086 " IF (SQL % NOTFOUND) THEN\n"
3087 " found := 0;\n"
3088 " END IF;\n"
3089 "END LOOP;\n"
3090 "CLOSE index_stats_cur;\n"
3091
3092 "END;",
3093 TRUE, trx);
3094 /* pinfo is freed by que_eval_sql() */
3095
3096 trx_commit_for_mysql(trx);
3097
3098 trx->free();
3099
3100 if (!index_fetch_arg.stats_were_modified) {
3101 return(DB_STATS_DO_NOT_EXIST);
3102 }
3103
3104 return(ret);
3105 }
3106
3107 /*********************************************************************//**
3108 Clear defragmentation stats modified counter for all indices in table. */
3109 static
3110 void
dict_stats_empty_defrag_modified_counter(dict_table_t * table)3111 dict_stats_empty_defrag_modified_counter(
3112 dict_table_t* table) /*!< in: table */
3113 {
3114 dict_index_t* index;
3115 ut_a(table);
3116 for (index = dict_table_get_first_index(table);
3117 index != NULL;
3118 index = dict_table_get_next_index(index)) {
3119 index->stat_defrag_modified_counter = 0;
3120 }
3121 }
3122
3123 /*********************************************************************//**
3124 Fetches or calculates new estimates for index statistics. */
3125 void
dict_stats_update_for_index(dict_index_t * index)3126 dict_stats_update_for_index(
3127 /*========================*/
3128 dict_index_t* index) /*!< in/out: index */
3129 {
3130 DBUG_ENTER("dict_stats_update_for_index");
3131
3132 ut_ad(!mutex_own(&dict_sys.mutex));
3133
3134 if (dict_stats_is_persistent_enabled(index->table)) {
3135
3136 if (dict_stats_persistent_storage_check(false)) {
3137 index_stats_t stats = dict_stats_analyze_index(index);
3138 mutex_enter(&dict_sys.mutex);
3139 index->stat_index_size = stats.index_size;
3140 index->stat_n_leaf_pages = stats.n_leaf_pages;
3141 for (size_t i = 0; i < stats.stats.size(); ++i) {
3142 index->stat_n_diff_key_vals[i]
3143 = stats.stats[i].n_diff_key_vals;
3144 index->stat_n_sample_sizes[i]
3145 = stats.stats[i].n_sample_sizes;
3146 index->stat_n_non_null_key_vals[i]
3147 = stats.stats[i].n_non_null_key_vals;
3148 }
3149 index->table->stat_sum_of_other_index_sizes
3150 += index->stat_index_size;
3151 mutex_exit(&dict_sys.mutex);
3152
3153 dict_stats_save(index->table, &index->id);
3154 DBUG_VOID_RETURN;
3155 }
3156 /* else */
3157
3158 if (innodb_index_stats_not_found == false &&
3159 index->stats_error_printed == false) {
3160 /* Fall back to transient stats since the persistent
3161 storage is not present or is corrupted */
3162
3163 ib::info() << "Recalculation of persistent statistics"
3164 " requested for table " << index->table->name
3165 << " index " << index->name
3166 << " but the required"
3167 " persistent statistics storage is not present or is"
3168 " corrupted. Using transient stats instead.";
3169 index->stats_error_printed = false;
3170 }
3171 }
3172
3173 dict_stats_update_transient_for_index(index);
3174
3175 DBUG_VOID_RETURN;
3176 }
3177
3178 /*********************************************************************//**
3179 Calculates new estimates for table and index statistics. The statistics
3180 are used in query optimization.
3181 @return DB_SUCCESS or error code */
3182 dberr_t
dict_stats_update(dict_table_t * table,dict_stats_upd_option_t stats_upd_option)3183 dict_stats_update(
3184 /*==============*/
3185 dict_table_t* table, /*!< in/out: table */
3186 dict_stats_upd_option_t stats_upd_option)
3187 /*!< in: whether to (re) calc
3188 the stats or to fetch them from
3189 the persistent statistics
3190 storage */
3191 {
3192 ut_ad(!mutex_own(&dict_sys.mutex));
3193
3194 if (!table->is_readable()) {
3195 return (dict_stats_report_error(table));
3196 } else if (srv_force_recovery > SRV_FORCE_NO_IBUF_MERGE) {
3197 /* If we have set a high innodb_force_recovery level, do
3198 not calculate statistics, as a badly corrupted index can
3199 cause a crash in it. */
3200 dict_stats_empty_table(table, false);
3201 return(DB_SUCCESS);
3202 }
3203
3204 switch (stats_upd_option) {
3205 case DICT_STATS_RECALC_PERSISTENT:
3206
3207 if (srv_read_only_mode) {
3208 goto transient;
3209 }
3210
3211 /* Persistent recalculation requested, called from
3212 1) ANALYZE TABLE, or
3213 2) the auto recalculation background thread, or
3214 3) open table if stats do not exist on disk and auto recalc
3215 is enabled */
3216
3217 /* InnoDB internal tables (e.g. SYS_TABLES) cannot have
3218 persistent stats enabled */
3219 ut_a(strchr(table->name.m_name, '/') != NULL);
3220
3221 /* check if the persistent statistics storage exists
3222 before calling the potentially slow function
3223 dict_stats_update_persistent(); that is a
3224 prerequisite for dict_stats_save() succeeding */
3225 if (dict_stats_persistent_storage_check(false)) {
3226
3227 dberr_t err;
3228
3229 err = dict_stats_update_persistent(table);
3230
3231 if (err != DB_SUCCESS) {
3232 return(err);
3233 }
3234
3235 err = dict_stats_save(table, NULL);
3236
3237 return(err);
3238 }
3239
3240 /* Fall back to transient stats since the persistent
3241 storage is not present or is corrupted */
3242
3243 if (innodb_table_stats_not_found == false &&
3244 table->stats_error_printed == false) {
3245 ib::warn() << "Recalculation of persistent statistics"
3246 " requested for table "
3247 << table->name
3248 << " but the required persistent"
3249 " statistics storage is not present or is corrupted."
3250 " Using transient stats instead.";
3251 table->stats_error_printed = true;
3252 }
3253
3254 goto transient;
3255
3256 case DICT_STATS_RECALC_TRANSIENT:
3257
3258 goto transient;
3259
3260 case DICT_STATS_EMPTY_TABLE:
3261
3262 dict_stats_empty_table(table, true);
3263
3264 /* If table is using persistent stats,
3265 then save the stats on disk */
3266
3267 if (dict_stats_is_persistent_enabled(table)) {
3268
3269 if (dict_stats_persistent_storage_check(false)) {
3270
3271 return(dict_stats_save(table, NULL));
3272 }
3273
3274 return(DB_STATS_DO_NOT_EXIST);
3275 }
3276
3277 return(DB_SUCCESS);
3278
3279 case DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY:
3280
3281 /* fetch requested, either fetch from persistent statistics
3282 storage or use the old method */
3283
3284 if (table->stat_initialized) {
3285 return(DB_SUCCESS);
3286 }
3287
3288 /* InnoDB internal tables (e.g. SYS_TABLES) cannot have
3289 persistent stats enabled */
3290 ut_a(strchr(table->name.m_name, '/') != NULL);
3291
3292 if (!dict_stats_persistent_storage_check(false)) {
3293 /* persistent statistics storage does not exist
3294 or is corrupted, calculate the transient stats */
3295
3296 if (innodb_table_stats_not_found == false &&
3297 table->stats_error_printed == false) {
3298 ib::error() << "Fetch of persistent statistics"
3299 " requested for table "
3300 << table->name
3301 << " but the required system tables "
3302 << TABLE_STATS_NAME_PRINT
3303 << " and " << INDEX_STATS_NAME_PRINT
3304 << " are not present or have unexpected"
3305 " structure. Using transient stats instead.";
3306 table->stats_error_printed = true;
3307 }
3308
3309 goto transient;
3310 }
3311
3312 dict_table_t* t;
3313
3314 /* Create a dummy table object with the same name and
3315 indexes, suitable for fetching the stats into it. */
3316 t = dict_stats_table_clone_create(table);
3317
3318 dberr_t err = dict_stats_fetch_from_ps(t);
3319
3320 t->stats_last_recalc = table->stats_last_recalc;
3321 t->stat_modified_counter = 0;
3322 dict_stats_empty_defrag_modified_counter(t);
3323
3324 switch (err) {
3325 case DB_SUCCESS:
3326
3327 mutex_enter(&dict_sys.mutex);
3328
3329 /* Pass reset_ignored_indexes=true as parameter
3330 to dict_stats_copy. This will cause statictics
3331 for corrupted indexes to be set to empty values */
3332 dict_stats_copy(table, t, true);
3333
3334 dict_stats_assert_initialized(table);
3335
3336 mutex_exit(&dict_sys.mutex);
3337
3338 dict_stats_table_clone_free(t);
3339
3340 return(DB_SUCCESS);
3341 case DB_STATS_DO_NOT_EXIST:
3342
3343 dict_stats_table_clone_free(t);
3344
3345 if (srv_read_only_mode) {
3346 goto transient;
3347 }
3348
3349 if (dict_stats_auto_recalc_is_enabled(table)) {
3350 return(dict_stats_update(
3351 table,
3352 DICT_STATS_RECALC_PERSISTENT));
3353 }
3354
3355 ib::info() << "Trying to use table " << table->name
3356 << " which has persistent statistics enabled,"
3357 " but auto recalculation turned off and the"
3358 " statistics do not exist in "
3359 TABLE_STATS_NAME_PRINT
3360 " and " INDEX_STATS_NAME_PRINT
3361 ". Please either run \"ANALYZE TABLE "
3362 << table->name << ";\" manually or enable the"
3363 " auto recalculation with \"ALTER TABLE "
3364 << table->name << " STATS_AUTO_RECALC=1;\"."
3365 " InnoDB will now use transient statistics for "
3366 << table->name << ".";
3367
3368 goto transient;
3369 default:
3370
3371 dict_stats_table_clone_free(t);
3372
3373 if (innodb_table_stats_not_found == false &&
3374 table->stats_error_printed == false) {
3375 ib::error() << "Error fetching persistent statistics"
3376 " for table "
3377 << table->name
3378 << " from " TABLE_STATS_NAME_PRINT " and "
3379 INDEX_STATS_NAME_PRINT ": " << err
3380 << ". Using transient stats method instead.";
3381 }
3382
3383 goto transient;
3384 }
3385 /* no "default:" in order to produce a compilation warning
3386 about unhandled enumeration value */
3387 }
3388
3389 transient:
3390 dict_stats_update_transient(table);
3391
3392 return(DB_SUCCESS);
3393 }
3394
3395 /** Remove the information for a particular index's stats from the persistent
3396 storage if it exists and if there is data stored for this index.
3397 This function creates its own trx and commits it.
3398
3399 We must modify system tables in a separate transaction in order to
3400 adhere to the InnoDB design constraint that dict_sys.latch prevents
3401 lock waits on system tables. If we modified system and user tables in
3402 the same transaction, we should exclusively hold dict_sys.latch until
3403 the transaction is committed, and effectively block other transactions
3404 that will attempt to open any InnoDB tables. Because we have no
3405 guarantee that user transactions will be committed fast, we cannot
3406 afford to keep the system tables locked in a user transaction.
3407 @return DB_SUCCESS or error code */
3408 dberr_t
dict_stats_drop_index(const char * db_and_table,const char * iname,char * errstr,ulint errstr_sz)3409 dict_stats_drop_index(
3410 /*==================*/
3411 const char* db_and_table,/*!< in: db and table, e.g. 'db/table' */
3412 const char* iname, /*!< in: index name */
3413 char* errstr, /*!< out: error message if != DB_SUCCESS
3414 is returned */
3415 ulint errstr_sz)/*!< in: size of the errstr buffer */
3416 {
3417 char db_utf8[MAX_DB_UTF8_LEN];
3418 char table_utf8[MAX_TABLE_UTF8_LEN];
3419 pars_info_t* pinfo;
3420 dberr_t ret;
3421
3422 ut_ad(!mutex_own(&dict_sys.mutex));
3423
3424 /* skip indexes whose table names do not contain a database name
3425 e.g. if we are dropping an index from SYS_TABLES */
3426 if (strchr(db_and_table, '/') == NULL) {
3427
3428 return(DB_SUCCESS);
3429 }
3430
3431 dict_fs2utf8(db_and_table, db_utf8, sizeof(db_utf8),
3432 table_utf8, sizeof(table_utf8));
3433
3434 pinfo = pars_info_create();
3435
3436 pars_info_add_str_literal(pinfo, "database_name", db_utf8);
3437
3438 pars_info_add_str_literal(pinfo, "table_name", table_utf8);
3439
3440 pars_info_add_str_literal(pinfo, "index_name", iname);
3441
3442 dict_sys_lock();
3443
3444 ret = dict_stats_exec_sql(
3445 pinfo,
3446 "PROCEDURE DROP_INDEX_STATS () IS\n"
3447 "BEGIN\n"
3448 "DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
3449 "database_name = :database_name AND\n"
3450 "table_name = :table_name AND\n"
3451 "index_name = :index_name;\n"
3452 "END;\n", NULL);
3453
3454 dict_sys_unlock();
3455
3456 if (ret == DB_STATS_DO_NOT_EXIST) {
3457 ret = DB_SUCCESS;
3458 }
3459
3460 if (ret != DB_SUCCESS) {
3461 snprintf(errstr, errstr_sz,
3462 "Unable to delete statistics for index %s"
3463 " from %s%s: %s. They can be deleted later using"
3464 " DELETE FROM %s WHERE"
3465 " database_name = '%s' AND"
3466 " table_name = '%s' AND"
3467 " index_name = '%s';",
3468 iname,
3469 INDEX_STATS_NAME_PRINT,
3470 (ret == DB_LOCK_WAIT_TIMEOUT
3471 ? " because the rows are locked"
3472 : ""),
3473 ut_strerr(ret),
3474 INDEX_STATS_NAME_PRINT,
3475 db_utf8,
3476 table_utf8,
3477 iname);
3478
3479 ut_print_timestamp(stderr);
3480 fprintf(stderr, " InnoDB: %s\n", errstr);
3481 }
3482
3483 return(ret);
3484 }
3485
3486 /*********************************************************************//**
3487 Executes
3488 DELETE FROM mysql.innodb_table_stats
3489 WHERE database_name = '...' AND table_name = '...';
3490 Creates its own transaction and commits it.
3491 @return DB_SUCCESS or error code */
3492 UNIV_INLINE
3493 dberr_t
dict_stats_delete_from_table_stats(const char * database_name,const char * table_name)3494 dict_stats_delete_from_table_stats(
3495 /*===============================*/
3496 const char* database_name, /*!< in: database name, e.g. 'db' */
3497 const char* table_name) /*!< in: table name, e.g. 'table' */
3498 {
3499 pars_info_t* pinfo;
3500 dberr_t ret;
3501
3502 ut_d(dict_sys.assert_locked());
3503
3504 pinfo = pars_info_create();
3505
3506 pars_info_add_str_literal(pinfo, "database_name", database_name);
3507 pars_info_add_str_literal(pinfo, "table_name", table_name);
3508
3509 ret = dict_stats_exec_sql(
3510 pinfo,
3511 "PROCEDURE DELETE_FROM_TABLE_STATS () IS\n"
3512 "BEGIN\n"
3513 "DELETE FROM \"" TABLE_STATS_NAME "\" WHERE\n"
3514 "database_name = :database_name AND\n"
3515 "table_name = :table_name;\n"
3516 "END;\n", NULL);
3517
3518 return(ret);
3519 }
3520
3521 /*********************************************************************//**
3522 Executes
3523 DELETE FROM mysql.innodb_index_stats
3524 WHERE database_name = '...' AND table_name = '...';
3525 Creates its own transaction and commits it.
3526 @return DB_SUCCESS or error code */
3527 UNIV_INLINE
3528 dberr_t
dict_stats_delete_from_index_stats(const char * database_name,const char * table_name)3529 dict_stats_delete_from_index_stats(
3530 /*===============================*/
3531 const char* database_name, /*!< in: database name, e.g. 'db' */
3532 const char* table_name) /*!< in: table name, e.g. 'table' */
3533 {
3534 pars_info_t* pinfo;
3535 dberr_t ret;
3536
3537 ut_d(dict_sys.assert_locked());
3538
3539 pinfo = pars_info_create();
3540
3541 pars_info_add_str_literal(pinfo, "database_name", database_name);
3542 pars_info_add_str_literal(pinfo, "table_name", table_name);
3543
3544 ret = dict_stats_exec_sql(
3545 pinfo,
3546 "PROCEDURE DELETE_FROM_INDEX_STATS () IS\n"
3547 "BEGIN\n"
3548 "DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
3549 "database_name = :database_name AND\n"
3550 "table_name = :table_name;\n"
3551 "END;\n", NULL);
3552
3553 return(ret);
3554 }
3555
3556 /*********************************************************************//**
3557 Removes the statistics for a table and all of its indexes from the
3558 persistent statistics storage if it exists and if there is data stored for
3559 the table. This function creates its own transaction and commits it.
3560 @return DB_SUCCESS or error code */
3561 dberr_t
dict_stats_drop_table(const char * db_and_table,char * errstr,ulint errstr_sz)3562 dict_stats_drop_table(
3563 /*==================*/
3564 const char* db_and_table, /*!< in: db and table, e.g. 'db/table' */
3565 char* errstr, /*!< out: error message
3566 if != DB_SUCCESS is returned */
3567 ulint errstr_sz) /*!< in: size of errstr buffer */
3568 {
3569 char db_utf8[MAX_DB_UTF8_LEN];
3570 char table_utf8[MAX_TABLE_UTF8_LEN];
3571 dberr_t ret;
3572
3573 ut_d(dict_sys.assert_locked());
3574
3575 /* skip tables that do not contain a database name
3576 e.g. if we are dropping SYS_TABLES */
3577 if (strchr(db_and_table, '/') == NULL) {
3578
3579 return(DB_SUCCESS);
3580 }
3581
3582 /* skip innodb_table_stats and innodb_index_stats themselves */
3583 if (strcmp(db_and_table, TABLE_STATS_NAME) == 0
3584 || strcmp(db_and_table, INDEX_STATS_NAME) == 0) {
3585
3586 return(DB_SUCCESS);
3587 }
3588
3589 dict_fs2utf8(db_and_table, db_utf8, sizeof(db_utf8),
3590 table_utf8, sizeof(table_utf8));
3591
3592 ret = dict_stats_delete_from_table_stats(db_utf8, table_utf8);
3593
3594 if (ret == DB_SUCCESS) {
3595 ret = dict_stats_delete_from_index_stats(db_utf8, table_utf8);
3596 }
3597
3598 if (ret == DB_STATS_DO_NOT_EXIST) {
3599 ret = DB_SUCCESS;
3600 }
3601
3602 if (ret != DB_SUCCESS) {
3603
3604 snprintf(errstr, errstr_sz,
3605 "Unable to delete statistics for table %s.%s: %s."
3606 " They can be deleted later using"
3607
3608 " DELETE FROM %s WHERE"
3609 " database_name = '%s' AND"
3610 " table_name = '%s';"
3611
3612 " DELETE FROM %s WHERE"
3613 " database_name = '%s' AND"
3614 " table_name = '%s';",
3615
3616 db_utf8, table_utf8,
3617 ut_strerr(ret),
3618
3619 INDEX_STATS_NAME_PRINT,
3620 db_utf8, table_utf8,
3621
3622 TABLE_STATS_NAME_PRINT,
3623 db_utf8, table_utf8);
3624 }
3625
3626 return(ret);
3627 }
3628
3629 /*********************************************************************//**
3630 Executes
3631 UPDATE mysql.innodb_table_stats SET
3632 database_name = '...', table_name = '...'
3633 WHERE database_name = '...' AND table_name = '...';
3634 Creates its own transaction and commits it.
3635 @return DB_SUCCESS or error code */
3636 UNIV_INLINE
3637 dberr_t
dict_stats_rename_table_in_table_stats(const char * old_dbname_utf8,const char * old_tablename_utf8,const char * new_dbname_utf8,const char * new_tablename_utf8)3638 dict_stats_rename_table_in_table_stats(
3639 /*===================================*/
3640 const char* old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */
3641 const char* old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */
3642 const char* new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */
3643 const char* new_tablename_utf8)/*!< in: table name, e.g. 'newtable' */
3644 {
3645 pars_info_t* pinfo;
3646 dberr_t ret;
3647
3648 ut_d(dict_sys.assert_locked());
3649
3650 pinfo = pars_info_create();
3651
3652 pars_info_add_str_literal(pinfo, "old_dbname_utf8", old_dbname_utf8);
3653 pars_info_add_str_literal(pinfo, "old_tablename_utf8", old_tablename_utf8);
3654 pars_info_add_str_literal(pinfo, "new_dbname_utf8", new_dbname_utf8);
3655 pars_info_add_str_literal(pinfo, "new_tablename_utf8", new_tablename_utf8);
3656
3657 ret = dict_stats_exec_sql(
3658 pinfo,
3659 "PROCEDURE RENAME_TABLE_IN_TABLE_STATS () IS\n"
3660 "BEGIN\n"
3661 "UPDATE \"" TABLE_STATS_NAME "\" SET\n"
3662 "database_name = :new_dbname_utf8,\n"
3663 "table_name = :new_tablename_utf8\n"
3664 "WHERE\n"
3665 "database_name = :old_dbname_utf8 AND\n"
3666 "table_name = :old_tablename_utf8;\n"
3667 "END;\n", NULL);
3668
3669 return(ret);
3670 }
3671
3672 /*********************************************************************//**
3673 Executes
3674 UPDATE mysql.innodb_index_stats SET
3675 database_name = '...', table_name = '...'
3676 WHERE database_name = '...' AND table_name = '...';
3677 Creates its own transaction and commits it.
3678 @return DB_SUCCESS or error code */
3679 UNIV_INLINE
3680 dberr_t
dict_stats_rename_table_in_index_stats(const char * old_dbname_utf8,const char * old_tablename_utf8,const char * new_dbname_utf8,const char * new_tablename_utf8)3681 dict_stats_rename_table_in_index_stats(
3682 /*===================================*/
3683 const char* old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */
3684 const char* old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */
3685 const char* new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */
3686 const char* new_tablename_utf8)/*!< in: table name, e.g. 'newtable' */
3687 {
3688 pars_info_t* pinfo;
3689 dberr_t ret;
3690
3691 ut_d(dict_sys.assert_locked());
3692
3693 pinfo = pars_info_create();
3694
3695 pars_info_add_str_literal(pinfo, "old_dbname_utf8", old_dbname_utf8);
3696 pars_info_add_str_literal(pinfo, "old_tablename_utf8", old_tablename_utf8);
3697 pars_info_add_str_literal(pinfo, "new_dbname_utf8", new_dbname_utf8);
3698 pars_info_add_str_literal(pinfo, "new_tablename_utf8", new_tablename_utf8);
3699
3700 ret = dict_stats_exec_sql(
3701 pinfo,
3702 "PROCEDURE RENAME_TABLE_IN_INDEX_STATS () IS\n"
3703 "BEGIN\n"
3704 "UPDATE \"" INDEX_STATS_NAME "\" SET\n"
3705 "database_name = :new_dbname_utf8,\n"
3706 "table_name = :new_tablename_utf8\n"
3707 "WHERE\n"
3708 "database_name = :old_dbname_utf8 AND\n"
3709 "table_name = :old_tablename_utf8;\n"
3710 "END;\n", NULL);
3711
3712 return(ret);
3713 }
3714
3715 /*********************************************************************//**
3716 Renames a table in InnoDB persistent stats storage.
3717 This function creates its own transaction and commits it.
3718 @return DB_SUCCESS or error code */
3719 dberr_t
dict_stats_rename_table(const char * old_name,const char * new_name,char * errstr,size_t errstr_sz)3720 dict_stats_rename_table(
3721 /*====================*/
3722 const char* old_name, /*!< in: old name, e.g. 'db/table' */
3723 const char* new_name, /*!< in: new name, e.g. 'db/table' */
3724 char* errstr, /*!< out: error string if != DB_SUCCESS
3725 is returned */
3726 size_t errstr_sz) /*!< in: errstr size */
3727 {
3728 char old_db_utf8[MAX_DB_UTF8_LEN];
3729 char new_db_utf8[MAX_DB_UTF8_LEN];
3730 char old_table_utf8[MAX_TABLE_UTF8_LEN];
3731 char new_table_utf8[MAX_TABLE_UTF8_LEN];
3732 dberr_t ret;
3733
3734 /* skip innodb_table_stats and innodb_index_stats themselves */
3735 if (strcmp(old_name, TABLE_STATS_NAME) == 0
3736 || strcmp(old_name, INDEX_STATS_NAME) == 0
3737 || strcmp(new_name, TABLE_STATS_NAME) == 0
3738 || strcmp(new_name, INDEX_STATS_NAME) == 0) {
3739
3740 return(DB_SUCCESS);
3741 }
3742
3743 dict_fs2utf8(old_name, old_db_utf8, sizeof(old_db_utf8),
3744 old_table_utf8, sizeof(old_table_utf8));
3745
3746 dict_fs2utf8(new_name, new_db_utf8, sizeof(new_db_utf8),
3747 new_table_utf8, sizeof(new_table_utf8));
3748
3749 dict_sys_lock();
3750
3751 ulint n_attempts = 0;
3752 do {
3753 n_attempts++;
3754
3755 ret = dict_stats_rename_table_in_table_stats(
3756 old_db_utf8, old_table_utf8,
3757 new_db_utf8, new_table_utf8);
3758
3759 if (ret == DB_DUPLICATE_KEY) {
3760 dict_stats_delete_from_table_stats(
3761 new_db_utf8, new_table_utf8);
3762 }
3763
3764 if (ret == DB_STATS_DO_NOT_EXIST) {
3765 ret = DB_SUCCESS;
3766 }
3767
3768 if (ret != DB_SUCCESS) {
3769 dict_sys_unlock();
3770 os_thread_sleep(200000 /* 0.2 sec */);
3771 dict_sys_lock();
3772 }
3773 } while ((ret == DB_DEADLOCK
3774 || ret == DB_DUPLICATE_KEY
3775 || ret == DB_LOCK_WAIT_TIMEOUT)
3776 && n_attempts < 5);
3777
3778 if (ret != DB_SUCCESS) {
3779 snprintf(errstr, errstr_sz,
3780 "Unable to rename statistics from"
3781 " %s.%s to %s.%s in %s: %s."
3782 " They can be renamed later using"
3783
3784 " UPDATE %s SET"
3785 " database_name = '%s',"
3786 " table_name = '%s'"
3787 " WHERE"
3788 " database_name = '%s' AND"
3789 " table_name = '%s';",
3790
3791 old_db_utf8, old_table_utf8,
3792 new_db_utf8, new_table_utf8,
3793 TABLE_STATS_NAME_PRINT,
3794 ut_strerr(ret),
3795
3796 TABLE_STATS_NAME_PRINT,
3797 new_db_utf8, new_table_utf8,
3798 old_db_utf8, old_table_utf8);
3799 dict_sys_unlock();
3800 return(ret);
3801 }
3802 /* else */
3803
3804 n_attempts = 0;
3805 do {
3806 n_attempts++;
3807
3808 ret = dict_stats_rename_table_in_index_stats(
3809 old_db_utf8, old_table_utf8,
3810 new_db_utf8, new_table_utf8);
3811
3812 if (ret == DB_DUPLICATE_KEY) {
3813 dict_stats_delete_from_index_stats(
3814 new_db_utf8, new_table_utf8);
3815 }
3816
3817 if (ret == DB_STATS_DO_NOT_EXIST) {
3818 ret = DB_SUCCESS;
3819 }
3820
3821 if (ret != DB_SUCCESS) {
3822 dict_sys_unlock();
3823 os_thread_sleep(200000 /* 0.2 sec */);
3824 dict_sys_lock();
3825 }
3826 } while ((ret == DB_DEADLOCK
3827 || ret == DB_DUPLICATE_KEY
3828 || ret == DB_LOCK_WAIT_TIMEOUT)
3829 && n_attempts < 5);
3830
3831 dict_sys_unlock();
3832
3833 if (ret != DB_SUCCESS) {
3834 snprintf(errstr, errstr_sz,
3835 "Unable to rename statistics from"
3836 " %s.%s to %s.%s in %s: %s."
3837 " They can be renamed later using"
3838
3839 " UPDATE %s SET"
3840 " database_name = '%s',"
3841 " table_name = '%s'"
3842 " WHERE"
3843 " database_name = '%s' AND"
3844 " table_name = '%s';",
3845
3846 old_db_utf8, old_table_utf8,
3847 new_db_utf8, new_table_utf8,
3848 INDEX_STATS_NAME_PRINT,
3849 ut_strerr(ret),
3850
3851 INDEX_STATS_NAME_PRINT,
3852 new_db_utf8, new_table_utf8,
3853 old_db_utf8, old_table_utf8);
3854 }
3855
3856 return(ret);
3857 }
3858
3859 /*********************************************************************//**
3860 Renames an index in InnoDB persistent stats storage.
3861 This function creates its own transaction and commits it.
3862 @return DB_SUCCESS or error code. DB_STATS_DO_NOT_EXIST will be returned
3863 if the persistent stats do not exist. */
3864 dberr_t
dict_stats_rename_index(const dict_table_t * table,const char * old_index_name,const char * new_index_name)3865 dict_stats_rename_index(
3866 /*====================*/
3867 const dict_table_t* table, /*!< in: table whose index
3868 is renamed */
3869 const char* old_index_name, /*!< in: old index name */
3870 const char* new_index_name) /*!< in: new index name */
3871 {
3872 dict_sys_lock();
3873
3874 if (!dict_stats_persistent_storage_check(true)) {
3875 dict_sys_unlock();
3876 return(DB_STATS_DO_NOT_EXIST);
3877 }
3878
3879 char dbname_utf8[MAX_DB_UTF8_LEN];
3880 char tablename_utf8[MAX_TABLE_UTF8_LEN];
3881
3882 dict_fs2utf8(table->name.m_name, dbname_utf8, sizeof(dbname_utf8),
3883 tablename_utf8, sizeof(tablename_utf8));
3884
3885 pars_info_t* pinfo;
3886
3887 pinfo = pars_info_create();
3888
3889 pars_info_add_str_literal(pinfo, "dbname_utf8", dbname_utf8);
3890 pars_info_add_str_literal(pinfo, "tablename_utf8", tablename_utf8);
3891 pars_info_add_str_literal(pinfo, "new_index_name", new_index_name);
3892 pars_info_add_str_literal(pinfo, "old_index_name", old_index_name);
3893
3894 dberr_t ret;
3895
3896 ret = dict_stats_exec_sql(
3897 pinfo,
3898 "PROCEDURE RENAME_INDEX_IN_INDEX_STATS () IS\n"
3899 "BEGIN\n"
3900 "UPDATE \"" INDEX_STATS_NAME "\" SET\n"
3901 "index_name = :new_index_name\n"
3902 "WHERE\n"
3903 "database_name = :dbname_utf8 AND\n"
3904 "table_name = :tablename_utf8 AND\n"
3905 "index_name = :old_index_name;\n"
3906 "END;\n", NULL);
3907
3908 dict_sys_unlock();
3909
3910 return(ret);
3911 }
3912
3913 /* tests @{ */
3914 #ifdef UNIV_ENABLE_UNIT_TEST_DICT_STATS
3915
3916 /* The following unit tests test some of the functions in this file
3917 individually, such testing cannot be performed by the mysql-test framework
3918 via SQL. */
3919
3920 /* test_dict_table_schema_check() @{ */
3921 void
test_dict_table_schema_check()3922 test_dict_table_schema_check()
3923 {
3924 /*
3925 CREATE TABLE tcheck (
3926 c01 VARCHAR(123),
3927 c02 INT,
3928 c03 INT NOT NULL,
3929 c04 INT UNSIGNED,
3930 c05 BIGINT,
3931 c06 BIGINT UNSIGNED NOT NULL,
3932 c07 TIMESTAMP
3933 ) ENGINE=INNODB;
3934 */
3935 /* definition for the table 'test/tcheck' */
3936 dict_col_meta_t columns[] = {
3937 {"c01", DATA_VARCHAR, 0, 123},
3938 {"c02", DATA_INT, 0, 4},
3939 {"c03", DATA_INT, DATA_NOT_NULL, 4},
3940 {"c04", DATA_INT, DATA_UNSIGNED, 4},
3941 {"c05", DATA_INT, 0, 8},
3942 {"c06", DATA_INT, DATA_NOT_NULL | DATA_UNSIGNED, 8},
3943 {"c07", DATA_INT, 0, 4},
3944 {"c_extra", DATA_INT, 0, 4}
3945 };
3946 dict_table_schema_t schema = {
3947 "test/tcheck",
3948 0 /* will be set individually for each test below */,
3949 columns
3950 };
3951 char errstr[512];
3952
3953 snprintf(errstr, sizeof(errstr), "Table not found");
3954
3955 /* prevent any data dictionary modifications while we are checking
3956 the tables' structure */
3957
3958 mutex_enter(&dict_sys.mutex);
3959
3960 /* check that a valid table is reported as valid */
3961 schema.n_cols = 7;
3962 if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3963 == DB_SUCCESS) {
3964 printf("OK: test.tcheck ok\n");
3965 } else {
3966 printf("ERROR: %s\n", errstr);
3967 printf("ERROR: test.tcheck not present or corrupted\n");
3968 goto test_dict_table_schema_check_end;
3969 }
3970
3971 /* check columns with wrong length */
3972 schema.columns[1].len = 8;
3973 if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3974 != DB_SUCCESS) {
3975 printf("OK: test.tcheck.c02 has different length and is"
3976 " reported as corrupted\n");
3977 } else {
3978 printf("OK: test.tcheck.c02 has different length but is"
3979 " reported as ok\n");
3980 goto test_dict_table_schema_check_end;
3981 }
3982 schema.columns[1].len = 4;
3983
3984 /* request that c02 is NOT NULL while actually it does not have
3985 this flag set */
3986 schema.columns[1].prtype_mask |= DATA_NOT_NULL;
3987 if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3988 != DB_SUCCESS) {
3989 printf("OK: test.tcheck.c02 does not have NOT NULL while"
3990 " it should and is reported as corrupted\n");
3991 } else {
3992 printf("ERROR: test.tcheck.c02 does not have NOT NULL while"
3993 " it should and is not reported as corrupted\n");
3994 goto test_dict_table_schema_check_end;
3995 }
3996 schema.columns[1].prtype_mask &= ~DATA_NOT_NULL;
3997
3998 /* check a table that contains some extra columns */
3999 schema.n_cols = 6;
4000 if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
4001 == DB_SUCCESS) {
4002 printf("ERROR: test.tcheck has more columns but is not"
4003 " reported as corrupted\n");
4004 goto test_dict_table_schema_check_end;
4005 } else {
4006 printf("OK: test.tcheck has more columns and is"
4007 " reported as corrupted\n");
4008 }
4009
4010 /* check a table that has some columns missing */
4011 schema.n_cols = 8;
4012 if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
4013 != DB_SUCCESS) {
4014 printf("OK: test.tcheck has missing columns and is"
4015 " reported as corrupted\n");
4016 } else {
4017 printf("ERROR: test.tcheck has missing columns but is"
4018 " reported as ok\n");
4019 goto test_dict_table_schema_check_end;
4020 }
4021
4022 /* check non-existent table */
4023 schema.table_name = "test/tcheck_nonexistent";
4024 if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
4025 != DB_SUCCESS) {
4026 printf("OK: test.tcheck_nonexistent is not present\n");
4027 } else {
4028 printf("ERROR: test.tcheck_nonexistent is present!?\n");
4029 goto test_dict_table_schema_check_end;
4030 }
4031
4032 test_dict_table_schema_check_end:
4033
4034 mutex_exit(&dict_sys.mutex);
4035 }
4036 /* @} */
4037
4038 /* save/fetch aux macros @{ */
4039 #define TEST_DATABASE_NAME "foobardb"
4040 #define TEST_TABLE_NAME "test_dict_stats"
4041
4042 #define TEST_N_ROWS 111
4043 #define TEST_CLUSTERED_INDEX_SIZE 222
4044 #define TEST_SUM_OF_OTHER_INDEX_SIZES 333
4045
4046 #define TEST_IDX1_NAME "tidx1"
4047 #define TEST_IDX1_COL1_NAME "tidx1_col1"
4048 #define TEST_IDX1_INDEX_SIZE 123
4049 #define TEST_IDX1_N_LEAF_PAGES 234
4050 #define TEST_IDX1_N_DIFF1 50
4051 #define TEST_IDX1_N_DIFF1_SAMPLE_SIZE 500
4052
4053 #define TEST_IDX2_NAME "tidx2"
4054 #define TEST_IDX2_COL1_NAME "tidx2_col1"
4055 #define TEST_IDX2_COL2_NAME "tidx2_col2"
4056 #define TEST_IDX2_COL3_NAME "tidx2_col3"
4057 #define TEST_IDX2_COL4_NAME "tidx2_col4"
4058 #define TEST_IDX2_INDEX_SIZE 321
4059 #define TEST_IDX2_N_LEAF_PAGES 432
4060 #define TEST_IDX2_N_DIFF1 60
4061 #define TEST_IDX2_N_DIFF1_SAMPLE_SIZE 600
4062 #define TEST_IDX2_N_DIFF2 61
4063 #define TEST_IDX2_N_DIFF2_SAMPLE_SIZE 610
4064 #define TEST_IDX2_N_DIFF3 62
4065 #define TEST_IDX2_N_DIFF3_SAMPLE_SIZE 620
4066 #define TEST_IDX2_N_DIFF4 63
4067 #define TEST_IDX2_N_DIFF4_SAMPLE_SIZE 630
4068 /* @} */
4069
4070 /* test_dict_stats_save() @{ */
4071 void
test_dict_stats_save()4072 test_dict_stats_save()
4073 {
4074 dict_table_t table;
4075 dict_index_t index1;
4076 dict_field_t index1_fields[1];
4077 ib_uint64_t index1_stat_n_diff_key_vals[1];
4078 ib_uint64_t index1_stat_n_sample_sizes[1];
4079 dict_index_t index2;
4080 dict_field_t index2_fields[4];
4081 ib_uint64_t index2_stat_n_diff_key_vals[4];
4082 ib_uint64_t index2_stat_n_sample_sizes[4];
4083 dberr_t ret;
4084
4085 /* craft a dummy dict_table_t */
4086 table.name.m_name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
4087 table.stat_n_rows = TEST_N_ROWS;
4088 table.stat_clustered_index_size = TEST_CLUSTERED_INDEX_SIZE;
4089 table.stat_sum_of_other_index_sizes = TEST_SUM_OF_OTHER_INDEX_SIZES;
4090 UT_LIST_INIT(table.indexes, &dict_index_t::indexes);
4091 #ifdef BTR_CUR_HASH_ADAPT
4092 UT_LIST_INIT(table.freed_indexes, &dict_index_t::indexes);
4093 #endif /* BTR_CUR_HASH_ADAPT */
4094 UT_LIST_ADD_LAST(table.indexes, &index1);
4095 UT_LIST_ADD_LAST(table.indexes, &index2);
4096 ut_d(table.magic_n = DICT_TABLE_MAGIC_N);
4097 ut_d(index1.magic_n = DICT_INDEX_MAGIC_N);
4098
4099 index1.name = TEST_IDX1_NAME;
4100 index1.table = &table;
4101 index1.cached = 1;
4102 index1.n_uniq = 1;
4103 index1.fields = index1_fields;
4104 index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals;
4105 index1.stat_n_sample_sizes = index1_stat_n_sample_sizes;
4106 index1.stat_index_size = TEST_IDX1_INDEX_SIZE;
4107 index1.stat_n_leaf_pages = TEST_IDX1_N_LEAF_PAGES;
4108 index1_fields[0].name = TEST_IDX1_COL1_NAME;
4109 index1_stat_n_diff_key_vals[0] = TEST_IDX1_N_DIFF1;
4110 index1_stat_n_sample_sizes[0] = TEST_IDX1_N_DIFF1_SAMPLE_SIZE;
4111
4112 ut_d(index2.magic_n = DICT_INDEX_MAGIC_N);
4113 index2.name = TEST_IDX2_NAME;
4114 index2.table = &table;
4115 index2.cached = 1;
4116 index2.n_uniq = 4;
4117 index2.fields = index2_fields;
4118 index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals;
4119 index2.stat_n_sample_sizes = index2_stat_n_sample_sizes;
4120 index2.stat_index_size = TEST_IDX2_INDEX_SIZE;
4121 index2.stat_n_leaf_pages = TEST_IDX2_N_LEAF_PAGES;
4122 index2_fields[0].name = TEST_IDX2_COL1_NAME;
4123 index2_fields[1].name = TEST_IDX2_COL2_NAME;
4124 index2_fields[2].name = TEST_IDX2_COL3_NAME;
4125 index2_fields[3].name = TEST_IDX2_COL4_NAME;
4126 index2_stat_n_diff_key_vals[0] = TEST_IDX2_N_DIFF1;
4127 index2_stat_n_diff_key_vals[1] = TEST_IDX2_N_DIFF2;
4128 index2_stat_n_diff_key_vals[2] = TEST_IDX2_N_DIFF3;
4129 index2_stat_n_diff_key_vals[3] = TEST_IDX2_N_DIFF4;
4130 index2_stat_n_sample_sizes[0] = TEST_IDX2_N_DIFF1_SAMPLE_SIZE;
4131 index2_stat_n_sample_sizes[1] = TEST_IDX2_N_DIFF2_SAMPLE_SIZE;
4132 index2_stat_n_sample_sizes[2] = TEST_IDX2_N_DIFF3_SAMPLE_SIZE;
4133 index2_stat_n_sample_sizes[3] = TEST_IDX2_N_DIFF4_SAMPLE_SIZE;
4134
4135 ret = dict_stats_save(&table, NULL);
4136
4137 ut_a(ret == DB_SUCCESS);
4138
4139 printf("\nOK: stats saved successfully, now go ahead and read"
4140 " what's inside %s and %s:\n\n",
4141 TABLE_STATS_NAME_PRINT,
4142 INDEX_STATS_NAME_PRINT);
4143
4144 printf("SELECT COUNT(*) = 1 AS table_stats_saved_successfully\n"
4145 "FROM %s\n"
4146 "WHERE\n"
4147 "database_name = '%s' AND\n"
4148 "table_name = '%s' AND\n"
4149 "n_rows = %d AND\n"
4150 "clustered_index_size = %d AND\n"
4151 "sum_of_other_index_sizes = %d;\n"
4152 "\n",
4153 TABLE_STATS_NAME_PRINT,
4154 TEST_DATABASE_NAME,
4155 TEST_TABLE_NAME,
4156 TEST_N_ROWS,
4157 TEST_CLUSTERED_INDEX_SIZE,
4158 TEST_SUM_OF_OTHER_INDEX_SIZES);
4159
4160 printf("SELECT COUNT(*) = 3 AS tidx1_stats_saved_successfully\n"
4161 "FROM %s\n"
4162 "WHERE\n"
4163 "database_name = '%s' AND\n"
4164 "table_name = '%s' AND\n"
4165 "index_name = '%s' AND\n"
4166 "(\n"
4167 " (stat_name = 'size' AND stat_value = %d AND"
4168 " sample_size IS NULL) OR\n"
4169 " (stat_name = 'n_leaf_pages' AND stat_value = %d AND"
4170 " sample_size IS NULL) OR\n"
4171 " (stat_name = 'n_diff_pfx01' AND stat_value = %d AND"
4172 " sample_size = '%d' AND stat_description = '%s')\n"
4173 ");\n"
4174 "\n",
4175 INDEX_STATS_NAME_PRINT,
4176 TEST_DATABASE_NAME,
4177 TEST_TABLE_NAME,
4178 TEST_IDX1_NAME,
4179 TEST_IDX1_INDEX_SIZE,
4180 TEST_IDX1_N_LEAF_PAGES,
4181 TEST_IDX1_N_DIFF1,
4182 TEST_IDX1_N_DIFF1_SAMPLE_SIZE,
4183 TEST_IDX1_COL1_NAME);
4184
4185 printf("SELECT COUNT(*) = 6 AS tidx2_stats_saved_successfully\n"
4186 "FROM %s\n"
4187 "WHERE\n"
4188 "database_name = '%s' AND\n"
4189 "table_name = '%s' AND\n"
4190 "index_name = '%s' AND\n"
4191 "(\n"
4192 " (stat_name = 'size' AND stat_value = %d AND"
4193 " sample_size IS NULL) OR\n"
4194 " (stat_name = 'n_leaf_pages' AND stat_value = %d AND"
4195 " sample_size IS NULL) OR\n"
4196 " (stat_name = 'n_diff_pfx01' AND stat_value = %d AND"
4197 " sample_size = '%d' AND stat_description = '%s') OR\n"
4198 " (stat_name = 'n_diff_pfx02' AND stat_value = %d AND"
4199 " sample_size = '%d' AND stat_description = '%s,%s') OR\n"
4200 " (stat_name = 'n_diff_pfx03' AND stat_value = %d AND"
4201 " sample_size = '%d' AND stat_description = '%s,%s,%s') OR\n"
4202 " (stat_name = 'n_diff_pfx04' AND stat_value = %d AND"
4203 " sample_size = '%d' AND stat_description = '%s,%s,%s,%s')\n"
4204 ");\n"
4205 "\n",
4206 INDEX_STATS_NAME_PRINT,
4207 TEST_DATABASE_NAME,
4208 TEST_TABLE_NAME,
4209 TEST_IDX2_NAME,
4210 TEST_IDX2_INDEX_SIZE,
4211 TEST_IDX2_N_LEAF_PAGES,
4212 TEST_IDX2_N_DIFF1,
4213 TEST_IDX2_N_DIFF1_SAMPLE_SIZE, TEST_IDX2_COL1_NAME,
4214 TEST_IDX2_N_DIFF2,
4215 TEST_IDX2_N_DIFF2_SAMPLE_SIZE,
4216 TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME,
4217 TEST_IDX2_N_DIFF3,
4218 TEST_IDX2_N_DIFF3_SAMPLE_SIZE,
4219 TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, TEST_IDX2_COL3_NAME,
4220 TEST_IDX2_N_DIFF4,
4221 TEST_IDX2_N_DIFF4_SAMPLE_SIZE,
4222 TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, TEST_IDX2_COL3_NAME,
4223 TEST_IDX2_COL4_NAME);
4224 }
4225 /* @} */
4226
4227 /* test_dict_stats_fetch_from_ps() @{ */
4228 void
test_dict_stats_fetch_from_ps()4229 test_dict_stats_fetch_from_ps()
4230 {
4231 dict_table_t table;
4232 dict_index_t index1;
4233 ib_uint64_t index1_stat_n_diff_key_vals[1];
4234 ib_uint64_t index1_stat_n_sample_sizes[1];
4235 dict_index_t index2;
4236 ib_uint64_t index2_stat_n_diff_key_vals[4];
4237 ib_uint64_t index2_stat_n_sample_sizes[4];
4238 dberr_t ret;
4239
4240 /* craft a dummy dict_table_t */
4241 table.name.m_name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
4242 UT_LIST_INIT(table.indexes, &dict_index_t::indexes);
4243 #ifdef BTR_CUR_HASH_ADAPT
4244 UT_LIST_INIT(table.freed_indexes, &dict_index_t::indexes);
4245 #endif /* BTR_CUR_HASH_ADAPT */
4246 UT_LIST_ADD_LAST(table.indexes, &index1);
4247 UT_LIST_ADD_LAST(table.indexes, &index2);
4248 ut_d(table.magic_n = DICT_TABLE_MAGIC_N);
4249
4250 index1.name = TEST_IDX1_NAME;
4251 ut_d(index1.magic_n = DICT_INDEX_MAGIC_N);
4252 index1.cached = 1;
4253 index1.n_uniq = 1;
4254 index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals;
4255 index1.stat_n_sample_sizes = index1_stat_n_sample_sizes;
4256
4257 index2.name = TEST_IDX2_NAME;
4258 ut_d(index2.magic_n = DICT_INDEX_MAGIC_N);
4259 index2.cached = 1;
4260 index2.n_uniq = 4;
4261 index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals;
4262 index2.stat_n_sample_sizes = index2_stat_n_sample_sizes;
4263
4264 ret = dict_stats_fetch_from_ps(&table);
4265
4266 ut_a(ret == DB_SUCCESS);
4267
4268 ut_a(table.stat_n_rows == TEST_N_ROWS);
4269 ut_a(table.stat_clustered_index_size == TEST_CLUSTERED_INDEX_SIZE);
4270 ut_a(table.stat_sum_of_other_index_sizes
4271 == TEST_SUM_OF_OTHER_INDEX_SIZES);
4272
4273 ut_a(index1.stat_index_size == TEST_IDX1_INDEX_SIZE);
4274 ut_a(index1.stat_n_leaf_pages == TEST_IDX1_N_LEAF_PAGES);
4275 ut_a(index1_stat_n_diff_key_vals[0] == TEST_IDX1_N_DIFF1);
4276 ut_a(index1_stat_n_sample_sizes[0] == TEST_IDX1_N_DIFF1_SAMPLE_SIZE);
4277
4278 ut_a(index2.stat_index_size == TEST_IDX2_INDEX_SIZE);
4279 ut_a(index2.stat_n_leaf_pages == TEST_IDX2_N_LEAF_PAGES);
4280 ut_a(index2_stat_n_diff_key_vals[0] == TEST_IDX2_N_DIFF1);
4281 ut_a(index2_stat_n_sample_sizes[0] == TEST_IDX2_N_DIFF1_SAMPLE_SIZE);
4282 ut_a(index2_stat_n_diff_key_vals[1] == TEST_IDX2_N_DIFF2);
4283 ut_a(index2_stat_n_sample_sizes[1] == TEST_IDX2_N_DIFF2_SAMPLE_SIZE);
4284 ut_a(index2_stat_n_diff_key_vals[2] == TEST_IDX2_N_DIFF3);
4285 ut_a(index2_stat_n_sample_sizes[2] == TEST_IDX2_N_DIFF3_SAMPLE_SIZE);
4286 ut_a(index2_stat_n_diff_key_vals[3] == TEST_IDX2_N_DIFF4);
4287 ut_a(index2_stat_n_sample_sizes[3] == TEST_IDX2_N_DIFF4_SAMPLE_SIZE);
4288
4289 printf("OK: fetch successful\n");
4290 }
4291 /* @} */
4292
4293 /* test_dict_stats_all() @{ */
4294 void
test_dict_stats_all()4295 test_dict_stats_all()
4296 {
4297 test_dict_table_schema_check();
4298
4299 test_dict_stats_save();
4300
4301 test_dict_stats_fetch_from_ps();
4302 }
4303 /* @} */
4304
4305 #endif /* UNIV_ENABLE_UNIT_TEST_DICT_STATS */
4306 /* @} */
4307