1 /*****************************************************************************
2
3 Copyright (c) 2009, 2019, Oracle and/or its affiliates. All Rights Reserved.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation. The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License, version 2.0, for more details.
20
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24
25 *****************************************************************************/
26
27 /**************************************************//**
28 @file dict/dict0stats.cc
29 Code used for calculating and manipulating table statistics.
30
31 Created Jan 06, 2010 Vasil Dimov
32 *******************************************************/
33
34 #ifndef UNIV_HOTBACKUP
35
36 #include "univ.i"
37
38 #include "btr0btr.h" /* btr_get_size() */
39 #include "btr0cur.h" /* btr_estimate_number_of_different_key_vals() */
40 #include "dict0dict.h" /* dict_table_get_first_index(), dict_fs2utf8() */
41 #include "dict0mem.h" /* DICT_TABLE_MAGIC_N */
42 #include "dict0stats.h"
43 #include "data0type.h" /* dtype_t */
44 #include "db0err.h" /* dberr_t */
45 #include "page0page.h" /* page_align() */
46 #include "pars0pars.h" /* pars_info_create() */
47 #include "pars0types.h" /* pars_info_t */
48 #include "que0que.h" /* que_eval_sql() */
49 #include "rem0cmp.h" /* REC_MAX_N_FIELDS,cmp_rec_rec_with_match() */
50 #include "row0sel.h" /* sel_node_t */
51 #include "row0types.h" /* sel_node_t */
52 #include "trx0trx.h" /* trx_create() */
53 #include "trx0roll.h" /* trx_rollback_to_savepoint() */
54 #include "ut0rnd.h" /* ut_rnd_interval() */
55 #include "ut0ut.h" /* ut_format_name(), ut_time() */
56
57 #include <algorithm>
58 #include <map>
59 #include <vector>
60
61 /* Sampling algorithm description @{
62
63 The algorithm is controlled by one number - N_SAMPLE_PAGES(index),
64 let it be A, which is the number of leaf pages to analyze for a given index
65 for each n-prefix (if the index is on 3 columns, then 3*A leaf pages will be
66 analyzed).
67
68 Let the total number of leaf pages in the table be T.
69 Level 0 - leaf pages, level H - root.
70
71 Definition: N-prefix-boring record is a record on a non-leaf page that equals
72 the next (to the right, cross page boundaries, skipping the supremum and
73 infimum) record on the same level when looking at the fist n-prefix columns.
74 The last (user) record on a level is not boring (it does not match the
75 non-existent user record to the right). We call the records boring because all
76 the records on the page below a boring record are equal to that boring record.
77
78 We avoid diving below boring records when searching for a leaf page to
79 estimate the number of distinct records because we know that such a leaf
80 page will have number of distinct records == 1.
81
82 For each n-prefix: start from the root level and full scan subsequent lower
83 levels until a level that contains at least A*10 distinct records is found.
84 Lets call this level LA.
85 As an optimization the search is canceled if it has reached level 1 (never
86 descend to the level 0 (leaf)) and also if the next level to be scanned
87 would contain more than A pages. The latter is because the user has asked
88 to analyze A leaf pages and it does not make sense to scan much more than
89 A non-leaf pages with the sole purpose of finding a good sample of A leaf
90 pages.
91
92 After finding the appropriate level LA with >A*10 distinct records (or less in
93 the exceptions described above), divide it into groups of equal records and
94 pick A such groups. Then pick the last record from each group. For example,
95 let the level be:
96
97 index: 0,1,2,3,4,5,6,7,8,9,10
98 record: 1,1,1,2,2,7,7,7,7,7,9
99
100 There are 4 groups of distinct records and if A=2 random ones are selected,
101 e.g. 1,1,1 and 7,7,7,7,7, then records with indexes 2 and 9 will be selected.
102
103 After selecting A records as described above, dive below them to find A leaf
104 pages and analyze them, finding the total number of distinct records. The
105 dive to the leaf level is performed by selecting a non-boring record from
106 each page and diving below it.
107
108 This way, a total of A leaf pages are analyzed for the given n-prefix.
109
110 Let the number of different key values found in each leaf page i be Pi (i=1..A).
111 Let N_DIFF_AVG_LEAF be (P1 + P2 + ... + PA) / A.
112 Let the number of different key values on level LA be N_DIFF_LA.
113 Let the total number of records on level LA be TOTAL_LA.
114 Let R be N_DIFF_LA / TOTAL_LA, we assume this ratio is the same on the
115 leaf level.
116 Let the number of leaf pages be N.
117 Then the total number of different key values on the leaf level is:
118 N * R * N_DIFF_AVG_LEAF.
119 See REF01 for the implementation.
120
121 The above describes how to calculate the cardinality of an index.
122 This algorithm is executed for each n-prefix of a multi-column index
123 where n=1..n_uniq.
124 @} */
125
126 /* names of the tables from the persistent statistics storage */
127 #define TABLE_STATS_NAME "mysql/innodb_table_stats"
128 #define TABLE_STATS_NAME_PRINT "mysql.innodb_table_stats"
129 #define INDEX_STATS_NAME "mysql/innodb_index_stats"
130 #define INDEX_STATS_NAME_PRINT "mysql.innodb_index_stats"
131
132 #ifdef UNIV_STATS_DEBUG
133 #define DEBUG_PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__)
134 #else /* UNIV_STATS_DEBUG */
135 #define DEBUG_PRINTF(fmt, ...) /* noop */
136 #endif /* UNIV_STATS_DEBUG */
137
138 /* Gets the number of leaf pages to sample in persistent stats estimation */
139 #define N_SAMPLE_PAGES(index) \
140 static_cast<ib_uint64_t>( \
141 (index)->table->stats_sample_pages != 0 \
142 ? (index)->table->stats_sample_pages \
143 : srv_stats_persistent_sample_pages)
144
145 /* number of distinct records on a given level that are required to stop
146 descending to lower levels and fetch N_SAMPLE_PAGES(index) records
147 from that level */
148 #define N_DIFF_REQUIRED(index) (N_SAMPLE_PAGES(index) * 10)
149
150 /* A dynamic array where we store the boundaries of each distinct group
151 of keys. For example if a btree level is:
152 index: 0,1,2,3,4,5,6,7,8,9,10,11,12
153 data: b,b,b,b,b,b,g,g,j,j,j, x, y
154 then we would store 5,7,10,11,12 in the array. */
155 typedef std::vector<ib_uint64_t> boundaries_t;
156
157 /* This is used to arrange the index based on the index name.
158 @return true if index_name1 is smaller than index_name2. */
159 struct index_cmp
160 {
operator ()index_cmp161 bool operator()(const char* index_name1, const char* index_name2) const {
162 return(strcmp(index_name1, index_name2) < 0);
163 }
164 };
165
166 typedef std::map<const char*, dict_index_t*, index_cmp> index_map_t;
167
168 /*********************************************************************//**
169 Checks whether an index should be ignored in stats manipulations:
170 * stats fetch
171 * stats recalc
172 * stats save
173 @return true if exists and all tables are ok */
174 UNIV_INLINE
175 bool
dict_stats_should_ignore_index(const dict_index_t * index)176 dict_stats_should_ignore_index(
177 /*===========================*/
178 const dict_index_t* index) /*!< in: index */
179 {
180 return((index->type & DICT_FTS)
181 || dict_index_is_corrupted(index)
182 || index->to_be_dropped
183 || *index->name == TEMP_INDEX_PREFIX);
184 }
185
186 /*********************************************************************//**
187 Checks whether the persistent statistics storage exists and that all
188 tables have the proper structure.
189 @return true if exists and all tables are ok */
190 static
191 bool
dict_stats_persistent_storage_check(bool caller_has_dict_sys_mutex)192 dict_stats_persistent_storage_check(
193 /*================================*/
194 bool caller_has_dict_sys_mutex) /*!< in: true if the caller
195 owns dict_sys->mutex */
196 {
197 /* definition for the table TABLE_STATS_NAME */
198 dict_col_meta_t table_stats_columns[] = {
199 {"database_name", DATA_VARMYSQL,
200 DATA_NOT_NULL, 192},
201
202 {"table_name", DATA_VARMYSQL,
203 DATA_NOT_NULL, 192},
204
205 {"last_update", DATA_FIXBINARY,
206 DATA_NOT_NULL, 4},
207
208 {"n_rows", DATA_INT,
209 DATA_NOT_NULL | DATA_UNSIGNED, 8},
210
211 {"clustered_index_size", DATA_INT,
212 DATA_NOT_NULL | DATA_UNSIGNED, 8},
213
214 {"sum_of_other_index_sizes", DATA_INT,
215 DATA_NOT_NULL | DATA_UNSIGNED, 8}
216 };
217 dict_table_schema_t table_stats_schema = {
218 TABLE_STATS_NAME,
219 UT_ARR_SIZE(table_stats_columns),
220 table_stats_columns,
221 0 /* n_foreign */,
222 0 /* n_referenced */
223 };
224
225 /* definition for the table INDEX_STATS_NAME */
226 dict_col_meta_t index_stats_columns[] = {
227 {"database_name", DATA_VARMYSQL,
228 DATA_NOT_NULL, 192},
229
230 {"table_name", DATA_VARMYSQL,
231 DATA_NOT_NULL, 192},
232
233 {"index_name", DATA_VARMYSQL,
234 DATA_NOT_NULL, 192},
235
236 {"last_update", DATA_FIXBINARY,
237 DATA_NOT_NULL, 4},
238
239 {"stat_name", DATA_VARMYSQL,
240 DATA_NOT_NULL, 64*3},
241
242 {"stat_value", DATA_INT,
243 DATA_NOT_NULL | DATA_UNSIGNED, 8},
244
245 {"sample_size", DATA_INT,
246 DATA_UNSIGNED, 8},
247
248 {"stat_description", DATA_VARMYSQL,
249 DATA_NOT_NULL, 1024*3}
250 };
251 dict_table_schema_t index_stats_schema = {
252 INDEX_STATS_NAME,
253 UT_ARR_SIZE(index_stats_columns),
254 index_stats_columns,
255 0 /* n_foreign */,
256 0 /* n_referenced */
257 };
258
259 char errstr[512];
260 dberr_t ret;
261
262 if (!caller_has_dict_sys_mutex) {
263 mutex_enter(&(dict_sys->mutex));
264 }
265
266 ut_ad(mutex_own(&dict_sys->mutex));
267
268 /* first check table_stats */
269 ret = dict_table_schema_check(&table_stats_schema, errstr,
270 sizeof(errstr));
271 if (ret == DB_SUCCESS) {
272 /* if it is ok, then check index_stats */
273 ret = dict_table_schema_check(&index_stats_schema, errstr,
274 sizeof(errstr));
275 }
276
277 if (!caller_has_dict_sys_mutex) {
278 mutex_exit(&(dict_sys->mutex));
279 }
280
281 if (ret != DB_SUCCESS) {
282 ut_print_timestamp(stderr);
283 fprintf(stderr, " InnoDB: Error: %s\n", errstr);
284 return(false);
285 }
286 /* else */
287
288 return(true);
289 }
290
291 /** Executes a given SQL statement using the InnoDB internal SQL parser.
292 This function will free the pinfo object.
293 @param[in,out] pinfo pinfo to pass to que_eval_sql() must already
294 have any literals bound to it
295 @param[in] sql SQL string to execute
296 @param[in,out] trx in case of NULL the function will allocate and
297 free the trx object. If it is not NULL then it will be rolled back
298 only in the case of error, but not freed.
299 @return DB_SUCCESS or error code */
300 static
301 dberr_t
dict_stats_exec_sql(pars_info_t * pinfo,const char * sql,trx_t * trx)302 dict_stats_exec_sql(
303 pars_info_t* pinfo,
304 const char* sql,
305 trx_t* trx)
306 {
307 dberr_t err;
308 bool trx_started = false;
309 #ifdef UNIV_SYNC_DEBUG
310 ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
311 #endif /* UNIV_SYNC_DEBUG */
312 ut_ad(mutex_own(&dict_sys->mutex));
313
314 if (!dict_stats_persistent_storage_check(true)) {
315 pars_info_free(pinfo);
316 return(DB_STATS_DO_NOT_EXIST);
317 }
318
319 if (trx == NULL) {
320 trx = trx_allocate_for_background();
321 trx_start_if_not_started(trx);
322 trx_started = true;
323 }
324
325 err = que_eval_sql(pinfo, sql, FALSE, trx); /* pinfo is freed here */
326
327 DBUG_EXECUTE_IF("stats_index_error",
328 if (!trx_started) {
329 err = DB_STATS_DO_NOT_EXIST;
330 trx->error_state = DB_STATS_DO_NOT_EXIST;
331 });
332
333 if (!trx_started && err == DB_SUCCESS) {
334 return(DB_SUCCESS);
335 }
336
337 if (err == DB_SUCCESS) {
338 trx_commit_for_mysql(trx);
339 } else {
340 trx->op_info = "rollback of internal trx on stats tables";
341 trx->dict_operation_lock_mode = RW_X_LATCH;
342 trx_rollback_to_savepoint(trx, NULL);
343 trx->dict_operation_lock_mode = 0;
344 trx->op_info = "";
345 ut_a(trx->error_state == DB_SUCCESS);
346 }
347
348 if (trx_started) {
349 trx_free_for_background(trx);
350 }
351
352 return(err);
353 }
354
355 /*********************************************************************//**
356 Duplicate a table object and its indexes.
357 This function creates a dummy dict_table_t object and initializes the
358 following table and index members:
359 dict_table_t::id (copied)
360 dict_table_t::heap (newly created)
361 dict_table_t::name (copied)
362 dict_table_t::corrupted (copied)
363 dict_table_t::indexes<> (newly created)
364 dict_table_t::magic_n
365 for each entry in dict_table_t::indexes, the following are initialized:
366 (indexes that have DICT_FTS set in index->type are skipped)
367 dict_index_t::id (copied)
368 dict_index_t::name (copied)
369 dict_index_t::table_name (points to the copied table name)
370 dict_index_t::table (points to the above semi-initialized object)
371 dict_index_t::type (copied)
372 dict_index_t::to_be_dropped (copied)
373 dict_index_t::online_status (copied)
374 dict_index_t::n_uniq (copied)
375 dict_index_t::fields[] (newly created, only first n_uniq, only fields[i].name)
376 dict_index_t::indexes<> (newly created)
377 dict_index_t::stat_n_diff_key_vals[] (only allocated, left uninitialized)
378 dict_index_t::stat_n_sample_sizes[] (only allocated, left uninitialized)
379 dict_index_t::stat_n_non_null_key_vals[] (only allocated, left uninitialized)
380 dict_index_t::magic_n
381 The returned object should be freed with dict_stats_table_clone_free()
382 when no longer needed.
383 @return incomplete table object */
384 static
385 dict_table_t*
dict_stats_table_clone_create(const dict_table_t * table)386 dict_stats_table_clone_create(
387 /*==========================*/
388 const dict_table_t* table) /*!< in: table whose stats to copy */
389 {
390 size_t heap_size;
391 dict_index_t* index;
392
393 /* Estimate the size needed for the table and all of its indexes */
394
395 heap_size = 0;
396 heap_size += sizeof(dict_table_t);
397 heap_size += strlen(table->name) + 1;
398
399 for (index = dict_table_get_first_index(table);
400 index != NULL;
401 index = dict_table_get_next_index(index)) {
402
403 if (dict_stats_should_ignore_index(index)) {
404 continue;
405 }
406
407 ut_ad(!dict_index_is_univ(index));
408
409 ulint n_uniq = dict_index_get_n_unique(index);
410
411 heap_size += sizeof(dict_index_t);
412 heap_size += strlen(index->name) + 1;
413 heap_size += n_uniq * sizeof(index->fields[0]);
414 for (ulint i = 0; i < n_uniq; i++) {
415 heap_size += strlen(index->fields[i].name) + 1;
416 }
417 heap_size += n_uniq * sizeof(index->stat_n_diff_key_vals[0]);
418 heap_size += n_uniq * sizeof(index->stat_n_sample_sizes[0]);
419 heap_size += n_uniq * sizeof(index->stat_n_non_null_key_vals[0]);
420 }
421
422 /* Allocate the memory and copy the members */
423
424 mem_heap_t* heap;
425
426 heap = mem_heap_create(heap_size);
427
428 dict_table_t* t;
429
430 t = (dict_table_t*) mem_heap_alloc(heap, sizeof(*t));
431
432 UNIV_MEM_ASSERT_RW_ABORT(&table->id, sizeof(table->id));
433 t->id = table->id;
434
435 t->heap = heap;
436
437 UNIV_MEM_ASSERT_RW_ABORT(table->name, strlen(table->name) + 1);
438 t->name = (char*) mem_heap_strdup(heap, table->name);
439
440 t->corrupted = table->corrupted;
441
442 /* This private object "t" is not shared with other threads, so
443 we do not need the stats_latch (thus we pass false below). The
444 dict_table_stats_lock()/unlock() routines will do nothing. */
445 dict_table_stats_latch_create(t, false);
446
447 UT_LIST_INIT(t->indexes);
448
449 for (index = dict_table_get_first_index(table);
450 index != NULL;
451 index = dict_table_get_next_index(index)) {
452
453 if (dict_stats_should_ignore_index(index)) {
454 continue;
455 }
456
457 ut_ad(!dict_index_is_univ(index));
458
459 dict_index_t* idx;
460
461 idx = (dict_index_t*) mem_heap_alloc(heap, sizeof(*idx));
462
463 UNIV_MEM_ASSERT_RW_ABORT(&index->id, sizeof(index->id));
464 idx->id = index->id;
465
466 UNIV_MEM_ASSERT_RW_ABORT(index->name, strlen(index->name) + 1);
467 idx->name = (char*) mem_heap_strdup(heap, index->name);
468
469 idx->table_name = t->name;
470
471 idx->table = t;
472
473 idx->type = index->type;
474
475 idx->to_be_dropped = 0;
476
477 idx->online_status = ONLINE_INDEX_COMPLETE;
478
479 idx->n_uniq = index->n_uniq;
480
481 idx->fields = (dict_field_t*) mem_heap_alloc(
482 heap, idx->n_uniq * sizeof(idx->fields[0]));
483
484 for (ulint i = 0; i < idx->n_uniq; i++) {
485 UNIV_MEM_ASSERT_RW_ABORT(index->fields[i].name, strlen(index->fields[i].name) + 1);
486 idx->fields[i].name = (char*) mem_heap_strdup(
487 heap, index->fields[i].name);
488 }
489
490 /* hook idx into t->indexes */
491 UT_LIST_ADD_LAST(indexes, t->indexes, idx);
492
493 idx->stat_n_diff_key_vals = (ib_uint64_t*) mem_heap_alloc(
494 heap,
495 idx->n_uniq * sizeof(idx->stat_n_diff_key_vals[0]));
496
497 idx->stat_n_sample_sizes = (ib_uint64_t*) mem_heap_alloc(
498 heap,
499 idx->n_uniq * sizeof(idx->stat_n_sample_sizes[0]));
500
501 idx->stat_n_non_null_key_vals = (ib_uint64_t*) mem_heap_alloc(
502 heap,
503 idx->n_uniq * sizeof(idx->stat_n_non_null_key_vals[0]));
504 ut_d(idx->magic_n = DICT_INDEX_MAGIC_N);
505 }
506
507 ut_d(t->magic_n = DICT_TABLE_MAGIC_N);
508
509 return(t);
510 }
511
512 /*********************************************************************//**
513 Free the resources occupied by an object returned by
514 dict_stats_table_clone_create(). */
515 static
516 void
dict_stats_table_clone_free(dict_table_t * t)517 dict_stats_table_clone_free(
518 /*========================*/
519 dict_table_t* t) /*!< in: dummy table object to free */
520 {
521 dict_table_stats_latch_destroy(t);
522 mem_heap_free(t->heap);
523 }
524
525 /*********************************************************************//**
526 Write all zeros (or 1 where it makes sense) into an index
527 statistics members. The resulting stats correspond to an empty index.
528 The caller must own index's table stats latch in X mode
529 (dict_table_stats_lock(table, RW_X_LATCH)) */
530 static
531 void
dict_stats_empty_index(dict_index_t * index)532 dict_stats_empty_index(
533 /*===================*/
534 dict_index_t* index) /*!< in/out: index */
535 {
536 ut_ad(!(index->type & DICT_FTS));
537 ut_ad(!dict_index_is_univ(index));
538
539 ulint n_uniq = index->n_uniq;
540
541 for (ulint i = 0; i < n_uniq; i++) {
542 index->stat_n_diff_key_vals[i] = 0;
543 index->stat_n_sample_sizes[i] = 1;
544 index->stat_n_non_null_key_vals[i] = 0;
545 }
546
547 index->stat_index_size = 1;
548 index->stat_n_leaf_pages = 1;
549 }
550
551 /*********************************************************************//**
552 Write all zeros (or 1 where it makes sense) into a table and its indexes'
553 statistics members. The resulting stats correspond to an empty table. */
554 static
555 void
dict_stats_empty_table(dict_table_t * table)556 dict_stats_empty_table(
557 /*===================*/
558 dict_table_t* table) /*!< in/out: table */
559 {
560 /* Zero the stats members */
561
562 dict_table_stats_lock(table, RW_X_LATCH);
563
564 table->stat_n_rows = 0;
565 table->stat_clustered_index_size = 1;
566 /* 1 page for each index, not counting the clustered */
567 table->stat_sum_of_other_index_sizes
568 = UT_LIST_GET_LEN(table->indexes) - 1;
569 table->stat_modified_counter = 0;
570
571 dict_index_t* index;
572
573 for (index = dict_table_get_first_index(table);
574 index != NULL;
575 index = dict_table_get_next_index(index)) {
576
577 if (index->type & DICT_FTS) {
578 continue;
579 }
580
581 ut_ad(!dict_index_is_univ(index));
582
583 dict_stats_empty_index(index);
584 }
585
586 table->stat_initialized = TRUE;
587
588 dict_table_stats_unlock(table, RW_X_LATCH);
589 }
590
591 /*********************************************************************//**
592 Check whether index's stats are initialized (assert if they are not). */
593 static
594 void
dict_stats_assert_initialized_index(const dict_index_t * index)595 dict_stats_assert_initialized_index(
596 /*================================*/
597 const dict_index_t* index) /*!< in: index */
598 {
599 UNIV_MEM_ASSERT_RW_ABORT(
600 index->stat_n_diff_key_vals,
601 index->n_uniq * sizeof(index->stat_n_diff_key_vals[0]));
602
603 UNIV_MEM_ASSERT_RW_ABORT(
604 index->stat_n_sample_sizes,
605 index->n_uniq * sizeof(index->stat_n_sample_sizes[0]));
606
607 UNIV_MEM_ASSERT_RW_ABORT(
608 index->stat_n_non_null_key_vals,
609 index->n_uniq * sizeof(index->stat_n_non_null_key_vals[0]));
610
611 UNIV_MEM_ASSERT_RW_ABORT(
612 &index->stat_index_size,
613 sizeof(index->stat_index_size));
614
615 UNIV_MEM_ASSERT_RW_ABORT(
616 &index->stat_n_leaf_pages,
617 sizeof(index->stat_n_leaf_pages));
618 }
619
620 /*********************************************************************//**
621 Check whether table's stats are initialized (assert if they are not). */
622 static
623 void
dict_stats_assert_initialized(const dict_table_t * table)624 dict_stats_assert_initialized(
625 /*==========================*/
626 const dict_table_t* table) /*!< in: table */
627 {
628 ut_a(table->stat_initialized);
629
630 UNIV_MEM_ASSERT_RW_ABORT(&table->stats_last_recalc,
631 sizeof(table->stats_last_recalc));
632
633 UNIV_MEM_ASSERT_RW_ABORT(&table->stat_persistent,
634 sizeof(table->stat_persistent));
635
636 UNIV_MEM_ASSERT_RW_ABORT(&table->stats_auto_recalc,
637 sizeof(table->stats_auto_recalc));
638
639 UNIV_MEM_ASSERT_RW_ABORT(&table->stats_sample_pages,
640 sizeof(table->stats_sample_pages));
641
642 UNIV_MEM_ASSERT_RW_ABORT(&table->stat_n_rows,
643 sizeof(table->stat_n_rows));
644
645 UNIV_MEM_ASSERT_RW_ABORT(&table->stat_clustered_index_size,
646 sizeof(table->stat_clustered_index_size));
647
648 UNIV_MEM_ASSERT_RW_ABORT(&table->stat_sum_of_other_index_sizes,
649 sizeof(table->stat_sum_of_other_index_sizes));
650
651 UNIV_MEM_ASSERT_RW_ABORT(&table->stat_modified_counter,
652 sizeof(table->stat_modified_counter));
653
654 UNIV_MEM_ASSERT_RW_ABORT(&table->stats_bg_flag,
655 sizeof(table->stats_bg_flag));
656
657 for (dict_index_t* index = dict_table_get_first_index(table);
658 index != NULL;
659 index = dict_table_get_next_index(index)) {
660
661 if (!dict_stats_should_ignore_index(index)) {
662 dict_stats_assert_initialized_index(index);
663 }
664 }
665 }
666
667 #define INDEX_EQ(i1, i2) \
668 ((i1) != NULL \
669 && (i2) != NULL \
670 && (i1)->id == (i2)->id \
671 && strcmp((i1)->name, (i2)->name) == 0)
672
673 /*********************************************************************//**
674 Copy table and index statistics from one table to another, including index
675 stats. Extra indexes in src are ignored and extra indexes in dst are
676 initialized to correspond to an empty index. */
677 static
678 void
dict_stats_copy(dict_table_t * dst,const dict_table_t * src)679 dict_stats_copy(
680 /*============*/
681 dict_table_t* dst, /*!< in/out: destination table */
682 const dict_table_t* src) /*!< in: source table */
683 {
684 dst->stats_last_recalc = src->stats_last_recalc;
685 dst->stat_n_rows = src->stat_n_rows;
686 dst->stat_clustered_index_size = src->stat_clustered_index_size;
687 dst->stat_sum_of_other_index_sizes = src->stat_sum_of_other_index_sizes;
688 dst->stat_modified_counter = src->stat_modified_counter;
689
690 dict_index_t* dst_idx;
691 dict_index_t* src_idx;
692
693 for (dst_idx = dict_table_get_first_index(dst),
694 src_idx = dict_table_get_first_index(src);
695 dst_idx != NULL;
696 dst_idx = dict_table_get_next_index(dst_idx),
697 (src_idx != NULL
698 && (src_idx = dict_table_get_next_index(src_idx)))) {
699
700 if (dict_stats_should_ignore_index(dst_idx)) {
701 if (!(dst_idx->type & DICT_FTS)) {
702 dict_stats_empty_index(dst_idx);
703 }
704 continue;
705 }
706
707 ut_ad(!dict_index_is_univ(dst_idx));
708
709 if (!INDEX_EQ(src_idx, dst_idx)) {
710 for (src_idx = dict_table_get_first_index(src);
711 src_idx != NULL;
712 src_idx = dict_table_get_next_index(src_idx)) {
713
714 if (INDEX_EQ(src_idx, dst_idx)) {
715 break;
716 }
717 }
718 }
719
720 if (!INDEX_EQ(src_idx, dst_idx)) {
721 dict_stats_empty_index(dst_idx);
722 continue;
723 }
724
725 ulint n_copy_el;
726
727 if (dst_idx->n_uniq > src_idx->n_uniq) {
728 n_copy_el = src_idx->n_uniq;
729 /* Since src is smaller some elements in dst
730 will remain untouched by the following memmove(),
731 thus we init all of them here. */
732 dict_stats_empty_index(dst_idx);
733 } else {
734 n_copy_el = dst_idx->n_uniq;
735 }
736
737 memmove(dst_idx->stat_n_diff_key_vals,
738 src_idx->stat_n_diff_key_vals,
739 n_copy_el * sizeof(dst_idx->stat_n_diff_key_vals[0]));
740
741 memmove(dst_idx->stat_n_sample_sizes,
742 src_idx->stat_n_sample_sizes,
743 n_copy_el * sizeof(dst_idx->stat_n_sample_sizes[0]));
744
745 memmove(dst_idx->stat_n_non_null_key_vals,
746 src_idx->stat_n_non_null_key_vals,
747 n_copy_el * sizeof(dst_idx->stat_n_non_null_key_vals[0]));
748
749 dst_idx->stat_index_size = src_idx->stat_index_size;
750
751 dst_idx->stat_n_leaf_pages = src_idx->stat_n_leaf_pages;
752 }
753
754 dst->stat_initialized = TRUE;
755 }
756
757 /*********************************************************************//**
758 Duplicate the stats of a table and its indexes.
759 This function creates a dummy dict_table_t object and copies the input
760 table's stats into it. The returned table object is not in the dictionary
761 cache and cannot be accessed by any other threads. In addition to the
762 members copied in dict_stats_table_clone_create() this function initializes
763 the following:
764 dict_table_t::stat_initialized
765 dict_table_t::stat_persistent
766 dict_table_t::stat_n_rows
767 dict_table_t::stat_clustered_index_size
768 dict_table_t::stat_sum_of_other_index_sizes
769 dict_table_t::stat_modified_counter
770 dict_index_t::stat_n_diff_key_vals[]
771 dict_index_t::stat_n_sample_sizes[]
772 dict_index_t::stat_n_non_null_key_vals[]
773 dict_index_t::stat_index_size
774 dict_index_t::stat_n_leaf_pages
775 The returned object should be freed with dict_stats_snapshot_free()
776 when no longer needed.
777 @return incomplete table object */
778 static
779 dict_table_t*
dict_stats_snapshot_create(dict_table_t * table)780 dict_stats_snapshot_create(
781 /*=======================*/
782 dict_table_t* table) /*!< in: table whose stats to copy */
783 {
784 mutex_enter(&dict_sys->mutex);
785
786 dict_table_stats_lock(table, RW_S_LATCH);
787
788 dict_stats_assert_initialized(table);
789
790 dict_table_t* t;
791
792 t = dict_stats_table_clone_create(table);
793
794 dict_stats_copy(t, table);
795
796 t->stat_persistent = table->stat_persistent;
797 t->stats_auto_recalc = table->stats_auto_recalc;
798 t->stats_sample_pages = table->stats_sample_pages;
799 t->stats_bg_flag = table->stats_bg_flag;
800
801 dict_table_stats_unlock(table, RW_S_LATCH);
802
803 mutex_exit(&dict_sys->mutex);
804
805 return(t);
806 }
807
808 /*********************************************************************//**
809 Free the resources occupied by an object returned by
810 dict_stats_snapshot_create(). */
811 static
812 void
dict_stats_snapshot_free(dict_table_t * t)813 dict_stats_snapshot_free(
814 /*=====================*/
815 dict_table_t* t) /*!< in: dummy table object to free */
816 {
817 dict_stats_table_clone_free(t);
818 }
819
820 /*********************************************************************//**
821 Calculates new estimates for index statistics. This function is
822 relatively quick and is used to calculate transient statistics that
823 are not saved on disk. This was the only way to calculate statistics
824 before the Persistent Statistics feature was introduced. */
825 static
826 void
dict_stats_update_transient_for_index(dict_index_t * index)827 dict_stats_update_transient_for_index(
828 /*==================================*/
829 dict_index_t* index) /*!< in/out: index */
830 {
831 if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO
832 && (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO
833 || !dict_index_is_clust(index))) {
834 /* If we have set a high innodb_force_recovery
835 level, do not calculate statistics, as a badly
836 corrupted index can cause a crash in it.
837 Initialize some bogus index cardinality
838 statistics, so that the data can be queried in
839 various means, also via secondary indexes. */
840 dict_stats_empty_index(index);
841 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
842 } else if (ibuf_debug && !dict_index_is_clust(index)) {
843 dict_stats_empty_index(index);
844 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
845 } else {
846 mtr_t mtr;
847 ulint size;
848 mtr_start(&mtr);
849 mtr_s_lock(dict_index_get_lock(index), &mtr);
850
851 size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
852
853 if (size != ULINT_UNDEFINED) {
854 index->stat_index_size = size;
855
856 size = btr_get_size(
857 index, BTR_N_LEAF_PAGES, &mtr);
858 }
859
860 mtr_commit(&mtr);
861
862 switch (size) {
863 case ULINT_UNDEFINED:
864 dict_stats_empty_index(index);
865 return;
866 case 0:
867 /* The root node of the tree is a leaf */
868 size = 1;
869 }
870
871 index->stat_n_leaf_pages = size;
872
873 btr_estimate_number_of_different_key_vals(index);
874 }
875 }
876
877 /*********************************************************************//**
878 Calculates new estimates for table and index statistics. This function
879 is relatively quick and is used to calculate transient statistics that
880 are not saved on disk.
881 This was the only way to calculate statistics before the
882 Persistent Statistics feature was introduced. */
883 UNIV_INTERN
884 void
dict_stats_update_transient(dict_table_t * table)885 dict_stats_update_transient(
886 /*========================*/
887 dict_table_t* table) /*!< in/out: table */
888 {
889 dict_index_t* index;
890 ulint sum_of_index_sizes = 0;
891
892 /* Find out the sizes of the indexes and how many different values
893 for the key they approximately have */
894
895 index = dict_table_get_first_index(table);
896
897 if (dict_table_is_discarded(table)) {
898 /* Nothing to do. */
899 dict_stats_empty_table(table);
900 return;
901 } else if (index == NULL) {
902 /* Table definition is corrupt */
903
904 char buf[MAX_FULL_NAME_LEN];
905 ut_print_timestamp(stderr);
906 fprintf(stderr, " InnoDB: table %s has no indexes. "
907 "Cannot calculate statistics.\n",
908 ut_format_name(table->name, TRUE, buf, sizeof(buf)));
909 dict_stats_empty_table(table);
910 return;
911 }
912
913 for (; index != NULL; index = dict_table_get_next_index(index)) {
914
915 ut_ad(!dict_index_is_univ(index));
916
917 if (index->type & DICT_FTS) {
918 continue;
919 }
920
921 dict_stats_empty_index(index);
922
923 if (dict_stats_should_ignore_index(index)) {
924 continue;
925 }
926
927 dict_stats_update_transient_for_index(index);
928
929 sum_of_index_sizes += index->stat_index_size;
930 }
931
932 index = dict_table_get_first_index(table);
933
934 table->stat_n_rows = index->stat_n_diff_key_vals[
935 dict_index_get_n_unique(index) - 1];
936
937 table->stat_clustered_index_size = index->stat_index_size;
938
939 table->stat_sum_of_other_index_sizes = sum_of_index_sizes
940 - index->stat_index_size;
941
942 table->stats_last_recalc = ut_time();
943
944 table->stat_modified_counter = 0;
945
946 table->stat_initialized = TRUE;
947 }
948
949 /* @{ Pseudo code about the relation between the following functions
950
951 let N = N_SAMPLE_PAGES(index)
952
953 dict_stats_analyze_index()
954 for each n_prefix
955 search for good enough level:
956 dict_stats_analyze_index_level() // only called if level has <= N pages
957 // full scan of the level in one mtr
958 collect statistics about the given level
959 if we are not satisfied with the level, search next lower level
960 we have found a good enough level here
961 dict_stats_analyze_index_for_n_prefix(that level, stats collected above)
962 // full scan of the level in one mtr
963 dive below some records and analyze the leaf page there:
964 dict_stats_analyze_index_below_cur()
965 @} */
966
967 /*********************************************************************//**
968 Find the total number and the number of distinct keys on a given level in
969 an index. Each of the 1..n_uniq prefixes are looked up and the results are
970 saved in the array n_diff[0] .. n_diff[n_uniq - 1]. The total number of
971 records on the level is saved in total_recs.
972 Also, the index of the last record in each group of equal records is saved
973 in n_diff_boundaries[0..n_uniq - 1], records indexing starts from the leftmost
974 record on the level and continues cross pages boundaries, counting from 0. */
975 static
976 void
dict_stats_analyze_index_level(dict_index_t * index,ulint level,ib_uint64_t * n_diff,ib_uint64_t * total_recs,ib_uint64_t * total_pages,boundaries_t * n_diff_boundaries,mtr_t * mtr)977 dict_stats_analyze_index_level(
978 /*===========================*/
979 dict_index_t* index, /*!< in: index */
980 ulint level, /*!< in: level */
981 ib_uint64_t* n_diff, /*!< out: array for number of
982 distinct keys for all prefixes */
983 ib_uint64_t* total_recs, /*!< out: total number of records */
984 ib_uint64_t* total_pages, /*!< out: total number of pages */
985 boundaries_t* n_diff_boundaries,/*!< out: boundaries of the groups
986 of distinct keys */
987 mtr_t* mtr) /*!< in/out: mini-transaction */
988 {
989 ulint n_uniq;
990 mem_heap_t* heap;
991 btr_pcur_t pcur;
992 const page_t* page;
993 const rec_t* rec;
994 const rec_t* prev_rec;
995 bool prev_rec_is_copied;
996 byte* prev_rec_buf = NULL;
997 ulint prev_rec_buf_size = 0;
998 ulint* rec_offsets;
999 ulint* prev_rec_offsets;
1000 ulint i;
1001
1002 DEBUG_PRINTF(" %s(table=%s, index=%s, level=%lu)\n", __func__,
1003 index->table->name, index->name, level);
1004
1005 ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
1006 MTR_MEMO_S_LOCK));
1007
1008 n_uniq = dict_index_get_n_unique(index);
1009
1010 /* elements in the n_diff array are 0..n_uniq-1 (inclusive) */
1011 memset(n_diff, 0x0, n_uniq * sizeof(n_diff[0]));
1012
1013 /* Allocate space for the offsets header (the allocation size at
1014 offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_fields + 1,
1015 so that this will never be less than the size calculated in
1016 rec_get_offsets_func(). */
1017 i = (REC_OFFS_HEADER_SIZE + 1 + 1) + index->n_fields;
1018
1019 heap = mem_heap_create((2 * sizeof *rec_offsets) * i);
1020 rec_offsets = static_cast<ulint*>(
1021 mem_heap_alloc(heap, i * sizeof *rec_offsets));
1022 prev_rec_offsets = static_cast<ulint*>(
1023 mem_heap_alloc(heap, i * sizeof *prev_rec_offsets));
1024 rec_offs_set_n_alloc(rec_offsets, i);
1025 rec_offs_set_n_alloc(prev_rec_offsets, i);
1026
1027 /* reset the dynamic arrays n_diff_boundaries[0..n_uniq-1] */
1028 if (n_diff_boundaries != NULL) {
1029 for (i = 0; i < n_uniq; i++) {
1030 n_diff_boundaries[i].erase(
1031 n_diff_boundaries[i].begin(),
1032 n_diff_boundaries[i].end());
1033 }
1034 }
1035
1036 /* Position pcur on the leftmost record on the leftmost page
1037 on the desired level. */
1038
1039 btr_pcur_open_at_index_side(
1040 true, index, BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED,
1041 &pcur, true, level, mtr);
1042 btr_pcur_move_to_next_on_page(&pcur);
1043
1044 page = btr_pcur_get_page(&pcur);
1045
1046 /* The page must not be empty, except when
1047 it is the root page (and the whole index is empty). */
1048 ut_ad(btr_pcur_is_on_user_rec(&pcur) || page_is_leaf(page));
1049 ut_ad(btr_pcur_get_rec(&pcur)
1050 == page_rec_get_next_const(page_get_infimum_rec(page)));
1051
1052 /* check that we are indeed on the desired level */
1053 ut_a(btr_page_get_level(page, mtr) == level);
1054
1055 /* there should not be any pages on the left */
1056 ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
1057
1058 /* check whether the first record on the leftmost page is marked
1059 as such, if we are on a non-leaf level */
1060 ut_a((level == 0)
1061 == !(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
1062 btr_pcur_get_rec(&pcur), page_is_comp(page))));
1063
1064 prev_rec = NULL;
1065 prev_rec_is_copied = false;
1066
1067 /* no records by default */
1068 *total_recs = 0;
1069
1070 *total_pages = 0;
1071
1072 /* iterate over all user records on this level
1073 and compare each two adjacent ones, even the last on page
1074 X and the fist on page X+1 */
1075 for (;
1076 btr_pcur_is_on_user_rec(&pcur);
1077 btr_pcur_move_to_next_user_rec(&pcur, mtr)) {
1078
1079 ulint matched_fields = 0;
1080 ulint matched_bytes = 0;
1081 bool rec_is_last_on_page;
1082
1083 rec = btr_pcur_get_rec(&pcur);
1084
1085 /* If rec and prev_rec are on different pages, then prev_rec
1086 must have been copied, because we hold latch only on the page
1087 where rec resides. */
1088 if (prev_rec != NULL
1089 && page_align(rec) != page_align(prev_rec)) {
1090
1091 ut_a(prev_rec_is_copied);
1092 }
1093
1094 rec_is_last_on_page =
1095 page_rec_is_supremum(page_rec_get_next_const(rec));
1096
1097 /* increment the pages counter at the end of each page */
1098 if (rec_is_last_on_page) {
1099
1100 (*total_pages)++;
1101 }
1102
1103 /* Skip delete-marked records on the leaf level. If we
1104 do not skip them, then ANALYZE quickly after DELETE
1105 could count them or not (purge may have already wiped
1106 them away) which brings non-determinism. We skip only
1107 leaf-level delete marks because delete marks on
1108 non-leaf level do not make sense. */
1109
1110 if (level == 0 && (srv_stats_include_delete_marked ? 0:
1111 rec_get_deleted_flag(
1112 rec,
1113 page_is_comp(btr_pcur_get_page(&pcur))))) {
1114
1115 if (rec_is_last_on_page
1116 && !prev_rec_is_copied
1117 && prev_rec != NULL) {
1118 /* copy prev_rec */
1119
1120 prev_rec_offsets = rec_get_offsets(
1121 prev_rec, index, prev_rec_offsets,
1122 n_uniq, &heap);
1123
1124 prev_rec = rec_copy_prefix_to_buf(
1125 prev_rec, index,
1126 rec_offs_n_fields(prev_rec_offsets),
1127 &prev_rec_buf, &prev_rec_buf_size);
1128
1129 prev_rec_is_copied = true;
1130 }
1131
1132 continue;
1133 }
1134
1135 rec_offsets = rec_get_offsets(
1136 rec, index, rec_offsets, n_uniq, &heap);
1137
1138 (*total_recs)++;
1139
1140 if (prev_rec != NULL) {
1141 prev_rec_offsets = rec_get_offsets(
1142 prev_rec, index, prev_rec_offsets,
1143 n_uniq, &heap);
1144
1145 cmp_rec_rec_with_match(rec,
1146 prev_rec,
1147 rec_offsets,
1148 prev_rec_offsets,
1149 index,
1150 FALSE,
1151 &matched_fields,
1152 &matched_bytes);
1153
1154 for (i = matched_fields; i < n_uniq; i++) {
1155
1156 if (n_diff_boundaries != NULL) {
1157 /* push the index of the previous
1158 record, that is - the last one from
1159 a group of equal keys */
1160
1161 ib_uint64_t idx;
1162
1163 /* the index of the current record
1164 is total_recs - 1, the index of the
1165 previous record is total_recs - 2;
1166 we know that idx is not going to
1167 become negative here because if we
1168 are in this branch then there is a
1169 previous record and thus
1170 total_recs >= 2 */
1171 idx = *total_recs - 2;
1172
1173 n_diff_boundaries[i].push_back(idx);
1174 }
1175
1176 /* increment the number of different keys
1177 for n_prefix=i+1 (e.g. if i=0 then we increment
1178 for n_prefix=1 which is stored in n_diff[0]) */
1179 n_diff[i]++;
1180 }
1181 } else {
1182 /* this is the first non-delete marked record */
1183 for (i = 0; i < n_uniq; i++) {
1184 n_diff[i] = 1;
1185 }
1186 }
1187
1188 if (rec_is_last_on_page) {
1189 /* end of a page has been reached */
1190
1191 /* we need to copy the record instead of assigning
1192 like prev_rec = rec; because when we traverse the
1193 records on this level at some point we will jump from
1194 one page to the next and then rec and prev_rec will
1195 be on different pages and
1196 btr_pcur_move_to_next_user_rec() will release the
1197 latch on the page that prev_rec is on */
1198 prev_rec = rec_copy_prefix_to_buf(
1199 rec, index, rec_offs_n_fields(rec_offsets),
1200 &prev_rec_buf, &prev_rec_buf_size);
1201 prev_rec_is_copied = true;
1202
1203 } else {
1204 /* still on the same page, the next call to
1205 btr_pcur_move_to_next_user_rec() will not jump
1206 on the next page, we can simply assign pointers
1207 instead of copying the records like above */
1208
1209 prev_rec = rec;
1210 prev_rec_is_copied = false;
1211 }
1212 }
1213
1214 /* if *total_pages is left untouched then the above loop was not
1215 entered at all and there is one page in the whole tree which is
1216 empty or the loop was entered but this is level 0, contains one page
1217 and all records are delete-marked */
1218 if (*total_pages == 0) {
1219
1220 ut_ad(level == 0);
1221 ut_ad(*total_recs == 0);
1222
1223 *total_pages = 1;
1224 }
1225
1226 /* if there are records on this level and boundaries
1227 should be saved */
1228 if (*total_recs > 0 && n_diff_boundaries != NULL) {
1229
1230 /* remember the index of the last record on the level as the
1231 last one from the last group of equal keys; this holds for
1232 all possible prefixes */
1233 for (i = 0; i < n_uniq; i++) {
1234 ib_uint64_t idx;
1235
1236 idx = *total_recs - 1;
1237
1238 n_diff_boundaries[i].push_back(idx);
1239 }
1240 }
1241
1242 /* now in n_diff_boundaries[i] there are exactly n_diff[i] integers,
1243 for i=0..n_uniq-1 */
1244
1245 #ifdef UNIV_STATS_DEBUG
1246 for (i = 0; i < n_uniq; i++) {
1247
1248 DEBUG_PRINTF(" %s(): total recs: " UINT64PF
1249 ", total pages: " UINT64PF
1250 ", n_diff[%lu]: " UINT64PF "\n",
1251 __func__, *total_recs,
1252 *total_pages,
1253 i, n_diff[i]);
1254
1255 #if 0
1256 if (n_diff_boundaries != NULL) {
1257 ib_uint64_t j;
1258
1259 DEBUG_PRINTF(" %s(): boundaries[%lu]: ",
1260 __func__, i);
1261
1262 for (j = 0; j < n_diff[i]; j++) {
1263 ib_uint64_t idx;
1264
1265 idx = n_diff_boundaries[i][j];
1266
1267 DEBUG_PRINTF(UINT64PF "=" UINT64PF ", ",
1268 j, idx);
1269 }
1270 DEBUG_PRINTF("\n");
1271 }
1272 #endif
1273 }
1274 #endif /* UNIV_STATS_DEBUG */
1275
1276 /* Release the latch on the last page, because that is not done by
1277 btr_pcur_close(). This function works also for non-leaf pages. */
1278 btr_leaf_page_release(btr_pcur_get_block(&pcur), BTR_SEARCH_LEAF, mtr);
1279
1280 btr_pcur_close(&pcur);
1281
1282 if (prev_rec_buf != NULL) {
1283
1284 mem_free(prev_rec_buf);
1285 }
1286
1287 mem_heap_free(heap);
1288 }
1289
1290 /* aux enum for controlling the behavior of dict_stats_scan_page() @{ */
1291 enum page_scan_method_t {
1292 COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED,/* scan all records on
1293 the given page and count the number of
1294 distinct ones, also ignore delete marked
1295 records */
1296 QUIT_ON_FIRST_NON_BORING,/* quit when the first record that differs
1297 from its right neighbor is found */
1298 COUNT_ALL_NON_BORING_INCLUDE_DEL_MARKED/* scan all records on
1299 the given page and count the number of
1300 distinct ones, include delete marked
1301 records */
1302 };
1303 /* @} */
1304
1305 /** Scan a page, reading records from left to right and counting the number
1306 of distinct records (looking only at the first n_prefix
1307 columns) and the number of external pages pointed by records from this page.
1308 If scan_method is QUIT_ON_FIRST_NON_BORING then the function
1309 will return as soon as it finds a record that does not match its neighbor
1310 to the right, which means that in the case of QUIT_ON_FIRST_NON_BORING the
1311 returned n_diff can either be 0 (empty page), 1 (the whole page has all keys
1312 equal) or 2 (the function found a non-boring record and returned).
1313 @param[out] out_rec record, or NULL
1314 @param[out] offsets1 rec_get_offsets() working space (must
1315 be big enough)
1316 @param[out] offsets2 rec_get_offsets() working space (must
1317 be big enough)
1318 @param[in] index index of the page
1319 @param[in] page the page to scan
1320 @param[in] n_prefix look at the first n_prefix columns
1321 @param[in] scan_method scan to the end of the page or not
1322 @param[out] n_diff number of distinct records encountered
1323 @param[out] n_external_pages if this is non-NULL then it will be set
1324 to the number of externally stored pages which were encountered
1325 @return offsets1 or offsets2 (the offsets of *out_rec),
1326 or NULL if the page is empty and does not contain user records. */
1327 UNIV_INLINE
1328 ulint*
dict_stats_scan_page(const rec_t ** out_rec,ulint * offsets1,ulint * offsets2,dict_index_t * index,const page_t * page,ulint n_prefix,page_scan_method_t scan_method,ib_uint64_t * n_diff,ib_uint64_t * n_external_pages)1329 dict_stats_scan_page(
1330 const rec_t** out_rec,
1331 ulint* offsets1,
1332 ulint* offsets2,
1333 dict_index_t* index,
1334 const page_t* page,
1335 ulint n_prefix,
1336 page_scan_method_t scan_method,
1337 ib_uint64_t* n_diff,
1338 ib_uint64_t* n_external_pages)
1339 {
1340 ulint* offsets_rec = offsets1;
1341 ulint* offsets_next_rec = offsets2;
1342 const rec_t* rec;
1343 const rec_t* next_rec;
1344 /* A dummy heap, to be passed to rec_get_offsets().
1345 Because offsets1,offsets2 should be big enough,
1346 this memory heap should never be used. */
1347 mem_heap_t* heap = NULL;
1348 const rec_t* (*get_next)(const rec_t*);
1349
1350 if (scan_method == COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED) {
1351 get_next = page_rec_get_next_non_del_marked;
1352 } else {
1353 get_next = page_rec_get_next_const;
1354 }
1355
1356 const bool should_count_external_pages = n_external_pages != NULL;
1357
1358 if (should_count_external_pages) {
1359 *n_external_pages = 0;
1360 }
1361
1362 rec = get_next(page_get_infimum_rec(page));
1363
1364 if (page_rec_is_supremum(rec)) {
1365 /* the page is empty or contains only delete-marked records */
1366 *n_diff = 0;
1367 *out_rec = NULL;
1368 return(NULL);
1369 }
1370
1371 offsets_rec = rec_get_offsets(rec, index, offsets_rec,
1372 ULINT_UNDEFINED, &heap);
1373
1374 if (should_count_external_pages) {
1375 *n_external_pages += btr_rec_get_externally_stored_len(
1376 rec, offsets_rec);
1377 }
1378
1379 next_rec = get_next(rec);
1380
1381 *n_diff = 1;
1382
1383 while (!page_rec_is_supremum(next_rec)) {
1384
1385 ulint matched_fields = 0;
1386 ulint matched_bytes = 0;
1387
1388 offsets_next_rec = rec_get_offsets(next_rec, index,
1389 offsets_next_rec,
1390 ULINT_UNDEFINED,
1391 &heap);
1392
1393 /* check whether rec != next_rec when looking at
1394 the first n_prefix fields */
1395 cmp_rec_rec_with_match(rec, next_rec,
1396 offsets_rec, offsets_next_rec,
1397 index, FALSE, &matched_fields,
1398 &matched_bytes);
1399
1400 if (matched_fields < n_prefix) {
1401 /* rec != next_rec, => rec is non-boring */
1402
1403 (*n_diff)++;
1404
1405 if (scan_method == QUIT_ON_FIRST_NON_BORING) {
1406 goto func_exit;
1407 }
1408 }
1409
1410 rec = next_rec;
1411 {
1412 /* Assign offsets_rec = offsets_next_rec
1413 so that offsets_rec matches with rec which
1414 was just assigned rec = next_rec above.
1415 Also need to point offsets_next_rec to the
1416 place where offsets_rec was pointing before
1417 because we have just 2 placeholders where
1418 data is actually stored:
1419 offsets_onstack1 and offsets_onstack2 and we
1420 are using them in circular fashion
1421 (offsets[_next]_rec are just pointers to
1422 those placeholders). */
1423 ulint* offsets_tmp;
1424 offsets_tmp = offsets_rec;
1425 offsets_rec = offsets_next_rec;
1426 offsets_next_rec = offsets_tmp;
1427 }
1428
1429 if (should_count_external_pages) {
1430 *n_external_pages += btr_rec_get_externally_stored_len(
1431 rec, offsets_rec);
1432 }
1433
1434 next_rec = get_next(next_rec);
1435 }
1436
1437 func_exit:
1438 /* offsets1,offsets2 should have been big enough */
1439 ut_a(heap == NULL);
1440 *out_rec = rec;
1441 return(offsets_rec);
1442 }
1443
1444 /** Dive below the current position of a cursor and calculate the number of
1445 distinct records on the leaf page, when looking at the fist n_prefix
1446 columns. Also calculate the number of external pages pointed by records
1447 on the leaf page.
1448 @param[in] cur cursor
1449 @param[in] n_prefix look at the first n_prefix columns
1450 when comparing records
1451 @param[out] n_diff number of distinct records
1452 @param[out] n_external_pages number of external pages
1453 @return number of distinct records on the leaf page */
1454 static
1455 void
dict_stats_analyze_index_below_cur(const btr_cur_t * cur,ulint n_prefix,ib_uint64_t * n_diff,ib_uint64_t * n_external_pages)1456 dict_stats_analyze_index_below_cur(
1457 const btr_cur_t* cur,
1458 ulint n_prefix,
1459 ib_uint64_t* n_diff,
1460 ib_uint64_t* n_external_pages)
1461 {
1462 dict_index_t* index;
1463 ulint space;
1464 ulint zip_size;
1465 buf_block_t* block;
1466 ulint page_no;
1467 const page_t* page;
1468 mem_heap_t* heap;
1469 const rec_t* rec;
1470 ulint* offsets1;
1471 ulint* offsets2;
1472 ulint* offsets_rec;
1473 ulint size;
1474 mtr_t mtr;
1475
1476 index = btr_cur_get_index(cur);
1477
1478 /* Allocate offsets for the record and the node pointer, for
1479 node pointer records. In a secondary index, the node pointer
1480 record will consist of all index fields followed by a child
1481 page number.
1482 Allocate space for the offsets header (the allocation size at
1483 offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_fields + 1,
1484 so that this will never be less than the size calculated in
1485 rec_get_offsets_func(). */
1486 size = (1 + REC_OFFS_HEADER_SIZE) + 1 + dict_index_get_n_fields(index);
1487
1488 heap = mem_heap_create(size * (sizeof *offsets1 + sizeof *offsets2));
1489
1490 offsets1 = static_cast<ulint*>(mem_heap_alloc(
1491 heap, size * sizeof *offsets1));
1492
1493 offsets2 = static_cast<ulint*>(mem_heap_alloc(
1494 heap, size * sizeof *offsets2));
1495
1496 rec_offs_set_n_alloc(offsets1, size);
1497 rec_offs_set_n_alloc(offsets2, size);
1498
1499 space = dict_index_get_space(index);
1500 zip_size = dict_table_zip_size(index->table);
1501
1502 rec = btr_cur_get_rec(cur);
1503
1504 offsets_rec = rec_get_offsets(rec, index, offsets1,
1505 ULINT_UNDEFINED, &heap);
1506
1507 page_no = btr_node_ptr_get_child_page_no(rec, offsets_rec);
1508
1509 /* assume no external pages by default - in case we quit from this
1510 function without analyzing any leaf pages */
1511 *n_external_pages = 0;
1512
1513 mtr_start(&mtr);
1514
1515 /* descend to the leaf level on the B-tree */
1516 for (;;) {
1517
1518 block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH,
1519 NULL /* no guessed block */,
1520 BUF_GET, __FILE__, __LINE__, &mtr);
1521
1522 page = buf_block_get_frame(block);
1523
1524 if (btr_page_get_level(page, mtr) == 0) {
1525 /* leaf level */
1526 break;
1527 }
1528 /* else */
1529
1530 /* search for the first non-boring record on the page */
1531 offsets_rec = dict_stats_scan_page(
1532 &rec, offsets1, offsets2, index, page, n_prefix,
1533 QUIT_ON_FIRST_NON_BORING, n_diff, NULL);
1534
1535 /* pages on level > 0 are not allowed to be empty */
1536 ut_a(offsets_rec != NULL);
1537 /* if page is not empty (offsets_rec != NULL) then n_diff must
1538 be > 0, otherwise there is a bug in dict_stats_scan_page() */
1539 ut_a(*n_diff > 0);
1540
1541 if (*n_diff == 1) {
1542 mtr_commit(&mtr);
1543
1544 /* page has all keys equal and the end of the page
1545 was reached by dict_stats_scan_page(), no need to
1546 descend to the leaf level */
1547 mem_heap_free(heap);
1548 /* can't get an estimate for n_external_pages here
1549 because we do not dive to the leaf level, assume no
1550 external pages (*n_external_pages was assigned to 0
1551 above). */
1552 return;
1553 }
1554 /* else */
1555
1556 /* when we instruct dict_stats_scan_page() to quit on the
1557 first non-boring record it finds, then the returned n_diff
1558 can either be 0 (empty page), 1 (page has all keys equal) or
1559 2 (non-boring record was found) */
1560 ut_a(*n_diff == 2);
1561
1562 /* we have a non-boring record in rec, descend below it */
1563
1564 page_no = btr_node_ptr_get_child_page_no(rec, offsets_rec);
1565 }
1566
1567 /* make sure we got a leaf page as a result from the above loop */
1568 ut_ad(btr_page_get_level(page, &mtr) == 0);
1569
1570 /* scan the leaf page and find the number of distinct keys,
1571 when looking only at the first n_prefix columns; also estimate
1572 the number of externally stored pages pointed by records on this
1573 page */
1574
1575 offsets_rec = dict_stats_scan_page(
1576 &rec, offsets1, offsets2, index, page, n_prefix,
1577 srv_stats_include_delete_marked ?
1578 COUNT_ALL_NON_BORING_INCLUDE_DEL_MARKED:
1579 COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED, n_diff,
1580 n_external_pages);
1581
1582 #if 0
1583 DEBUG_PRINTF(" %s(): n_diff below page_no=%lu: " UINT64PF "\n",
1584 __func__, page_no, n_diff);
1585 #endif
1586
1587 mtr_commit(&mtr);
1588 mem_heap_free(heap);
1589 }
1590
1591 /** Input data that is used to calculate dict_index_t::stat_n_diff_key_vals[]
1592 for each n-columns prefix (n from 1 to n_uniq). */
1593 struct n_diff_data_t {
1594 /** Index of the level on which the descent through the btree
1595 stopped. level 0 is the leaf level. This is >= 1 because we
1596 avoid scanning the leaf level because it may contain too many
1597 pages and doing so is useless when combined with the random dives -
1598 if we are to scan the leaf level, this means a full scan and we can
1599 simply do that instead of fiddling with picking random records higher
1600 in the tree and to dive below them. At the start of the analyzing
1601 we may decide to do full scan of the leaf level, but then this
1602 structure is not used in that code path. */
1603 ulint level;
1604
1605 /** Number of records on the level where the descend through the btree
1606 stopped. When we scan the btree from the root, we stop at some mid
1607 level, choose some records from it and dive below them towards a leaf
1608 page to analyze. */
1609 ib_uint64_t n_recs_on_level;
1610
1611 /** Number of different key values that were found on the mid level. */
1612 ib_uint64_t n_diff_on_level;
1613
1614 /** Number of leaf pages that are analyzed. This is also the same as
1615 the number of records that we pick from the mid level and dive below
1616 them. */
1617 ib_uint64_t n_leaf_pages_to_analyze;
1618
1619 /** Cumulative sum of the number of different key values that were
1620 found on all analyzed pages. */
1621 ib_uint64_t n_diff_all_analyzed_pages;
1622
1623 /** Cumulative sum of the number of external pages (stored outside of
1624 the btree but in the same file segment). */
1625 ib_uint64_t n_external_pages_sum;
1626 };
1627
1628 /** Estimate the number of different key values in an index when looking at
1629 the first n_prefix columns. For a given level in an index select
1630 n_diff_data->n_leaf_pages_to_analyze records from that level and dive below
1631 them to the corresponding leaf pages, then scan those leaf pages and save the
1632 sampling results in n_diff_data->n_diff_all_analyzed_pages.
1633 @param[in] index index
1634 @param[in] n_prefix look at first 'n_prefix' columns when
1635 comparing records
1636 @param[in] boundaries a vector that contains
1637 n_diff_data->n_diff_on_level integers each of which represents the index (on
1638 level 'level', counting from left/smallest to right/biggest from 0) of the
1639 last record from each group of distinct keys
1640 @param[in,out] n_diff_data n_diff_all_analyzed_pages and
1641 n_external_pages_sum in this structure will be set by this function. The
1642 members level, n_diff_on_level and n_leaf_pages_to_analyze must be set by the
1643 caller in advance - they are used by some calculations inside this function
1644 @param[in,out] mtr mini-transaction */
1645 static
1646 void
dict_stats_analyze_index_for_n_prefix(dict_index_t * index,ulint n_prefix,const boundaries_t * boundaries,n_diff_data_t * n_diff_data,mtr_t * mtr)1647 dict_stats_analyze_index_for_n_prefix(
1648 dict_index_t* index,
1649 ulint n_prefix,
1650 const boundaries_t* boundaries,
1651 n_diff_data_t* n_diff_data,
1652 mtr_t* mtr)
1653 {
1654 btr_pcur_t pcur;
1655 const page_t* page;
1656 ib_uint64_t rec_idx;
1657 ib_uint64_t i;
1658
1659 #if 0
1660 DEBUG_PRINTF(" %s(table=%s, index=%s, level=%lu, n_prefix=%lu, "
1661 "n_diff_on_level=" UINT64PF ")\n",
1662 __func__, index->table->name, index->name, level,
1663 n_prefix, n_diff_data->n_diff_on_level);
1664 #endif
1665
1666 ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
1667 MTR_MEMO_S_LOCK));
1668
1669 /* Position pcur on the leftmost record on the leftmost page
1670 on the desired level. */
1671
1672 btr_pcur_open_at_index_side(
1673 true, index, BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED,
1674 &pcur, true, n_diff_data->level, mtr);
1675 btr_pcur_move_to_next_on_page(&pcur);
1676
1677 page = btr_pcur_get_page(&pcur);
1678
1679 const rec_t* first_rec = btr_pcur_get_rec(&pcur);
1680
1681 /* We shouldn't be scanning the leaf level. The caller of this function
1682 should have stopped the descend on level 1 or higher. */
1683 ut_ad(n_diff_data->level > 0);
1684 ut_ad(!page_is_leaf(page));
1685
1686 /* The page must not be empty, except when
1687 it is the root page (and the whole index is empty). */
1688 ut_ad(btr_pcur_is_on_user_rec(&pcur));
1689 ut_ad(first_rec == page_rec_get_next_const(page_get_infimum_rec(page)));
1690
1691 /* check that we are indeed on the desired level */
1692 ut_a(btr_page_get_level(page, mtr) == n_diff_data->level);
1693
1694 /* there should not be any pages on the left */
1695 ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
1696
1697 /* check whether the first record on the leftmost page is marked
1698 as such; we are on a non-leaf level */
1699 ut_a(rec_get_info_bits(first_rec, page_is_comp(page))
1700 & REC_INFO_MIN_REC_FLAG);
1701
1702 const ib_uint64_t last_idx_on_level = boundaries->at(
1703 static_cast<unsigned>(n_diff_data->n_diff_on_level - 1));
1704
1705 rec_idx = 0;
1706
1707 n_diff_data->n_diff_all_analyzed_pages = 0;
1708 n_diff_data->n_external_pages_sum = 0;
1709
1710 for (i = 0; i < n_diff_data->n_leaf_pages_to_analyze; i++) {
1711 /* there are n_diff_on_level elements
1712 in 'boundaries' and we divide those elements
1713 into n_leaf_pages_to_analyze segments, for example:
1714
1715 let n_diff_on_level=100, n_leaf_pages_to_analyze=4, then:
1716 segment i=0: [0, 24]
1717 segment i=1: [25, 49]
1718 segment i=2: [50, 74]
1719 segment i=3: [75, 99] or
1720
1721 let n_diff_on_level=1, n_leaf_pages_to_analyze=1, then:
1722 segment i=0: [0, 0] or
1723
1724 let n_diff_on_level=2, n_leaf_pages_to_analyze=2, then:
1725 segment i=0: [0, 0]
1726 segment i=1: [1, 1] or
1727
1728 let n_diff_on_level=13, n_leaf_pages_to_analyze=7, then:
1729 segment i=0: [0, 0]
1730 segment i=1: [1, 2]
1731 segment i=2: [3, 4]
1732 segment i=3: [5, 6]
1733 segment i=4: [7, 8]
1734 segment i=5: [9, 10]
1735 segment i=6: [11, 12]
1736
1737 then we select a random record from each segment and dive
1738 below it */
1739 const ib_uint64_t n_diff = n_diff_data->n_diff_on_level;
1740 const ib_uint64_t n_pick
1741 = n_diff_data->n_leaf_pages_to_analyze;
1742
1743 const ib_uint64_t left = n_diff * i / n_pick;
1744 const ib_uint64_t right = n_diff * (i + 1) / n_pick - 1;
1745
1746 ut_a(left <= right);
1747 ut_a(right <= last_idx_on_level);
1748
1749 /* we do not pass (left, right) because we do not want to ask
1750 ut_rnd_interval() to work with too big numbers since
1751 ib_uint64_t could be bigger than ulint */
1752 const ulint rnd = ut_rnd_interval(
1753 0, static_cast<ulint>(right - left));
1754
1755 const ib_uint64_t dive_below_idx
1756 = boundaries->at(static_cast<unsigned>(left + rnd));
1757
1758 #if 0
1759 DEBUG_PRINTF(" %s(): dive below record with index="
1760 UINT64PF "\n", __func__, dive_below_idx);
1761 #endif
1762
1763 /* seek to the record with index dive_below_idx */
1764 while (rec_idx < dive_below_idx
1765 && btr_pcur_is_on_user_rec(&pcur)) {
1766
1767 btr_pcur_move_to_next_user_rec(&pcur, mtr);
1768 rec_idx++;
1769 }
1770
1771 /* if the level has finished before the record we are
1772 searching for, this means that the B-tree has changed in
1773 the meantime, quit our sampling and use whatever stats
1774 we have collected so far */
1775 if (rec_idx < dive_below_idx) {
1776
1777 ut_ad(!btr_pcur_is_on_user_rec(&pcur));
1778 break;
1779 }
1780
1781 /* it could be that the tree has changed in such a way that
1782 the record under dive_below_idx is the supremum record, in
1783 this case rec_idx == dive_below_idx and pcur is positioned
1784 on the supremum, we do not want to dive below it */
1785 if (!btr_pcur_is_on_user_rec(&pcur)) {
1786 break;
1787 }
1788
1789 ut_a(rec_idx == dive_below_idx);
1790
1791 ib_uint64_t n_diff_on_leaf_page;
1792 ib_uint64_t n_external_pages;
1793
1794 dict_stats_analyze_index_below_cur(btr_pcur_get_btr_cur(&pcur),
1795 n_prefix,
1796 &n_diff_on_leaf_page,
1797 &n_external_pages);
1798
1799 /* We adjust n_diff_on_leaf_page here to avoid counting
1800 one record twice - once as the last on some page and once
1801 as the first on another page. Consider the following example:
1802 Leaf level:
1803 page: (2,2,2,2,3,3)
1804 ... many pages like (3,3,3,3,3,3) ...
1805 page: (3,3,3,3,5,5)
1806 ... many pages like (5,5,5,5,5,5) ...
1807 page: (5,5,5,5,8,8)
1808 page: (8,8,8,8,9,9)
1809 our algo would (correctly) get an estimate that there are
1810 2 distinct records per page (average). Having 4 pages below
1811 non-boring records, it would (wrongly) estimate the number
1812 of distinct records to 8. */
1813 if (n_diff_on_leaf_page > 0) {
1814 n_diff_on_leaf_page--;
1815 }
1816
1817 n_diff_data->n_diff_all_analyzed_pages += n_diff_on_leaf_page;
1818
1819 n_diff_data->n_external_pages_sum += n_external_pages;
1820 }
1821
1822 btr_pcur_close(&pcur);
1823 }
1824
1825 /** Set dict_index_t::stat_n_diff_key_vals[] and stat_n_sample_sizes[].
1826 @param[in] n_diff_data input data to use to derive the results
1827 @param[in,out] index index whose stat_n_diff_key_vals[] to set */
1828 UNIV_INLINE
1829 void
dict_stats_index_set_n_diff(const n_diff_data_t * n_diff_data,dict_index_t * index)1830 dict_stats_index_set_n_diff(
1831 const n_diff_data_t* n_diff_data,
1832 dict_index_t* index)
1833 {
1834 for (ulint n_prefix = dict_index_get_n_unique(index);
1835 n_prefix >= 1;
1836 n_prefix--) {
1837 /* n_diff_all_analyzed_pages can be 0 here if
1838 all the leaf pages sampled contained only
1839 delete-marked records. In this case we should assign
1840 0 to index->stat_n_diff_key_vals[n_prefix - 1], which
1841 the formula below does. */
1842
1843 const n_diff_data_t* data = &n_diff_data[n_prefix - 1];
1844
1845 ut_ad(data->n_leaf_pages_to_analyze > 0);
1846 ut_ad(data->n_recs_on_level > 0);
1847
1848 ulint n_ordinary_leaf_pages;
1849
1850 if (data->level == 1) {
1851 /* If we know the number of records on level 1, then
1852 this number is the same as the number of pages on
1853 level 0 (leaf). */
1854 n_ordinary_leaf_pages = data->n_recs_on_level;
1855 } else {
1856 /* If we analyzed D ordinary leaf pages and found E
1857 external pages in total linked from those D ordinary
1858 leaf pages, then this means that the ratio
1859 ordinary/external is D/E. Then the ratio ordinary/total
1860 is D / (D + E). Knowing that the total number of pages
1861 is T (including ordinary and external) then we estimate
1862 that the total number of ordinary leaf pages is
1863 T * D / (D + E). */
1864 n_ordinary_leaf_pages
1865 = index->stat_n_leaf_pages
1866 * data->n_leaf_pages_to_analyze
1867 / (data->n_leaf_pages_to_analyze
1868 + data->n_external_pages_sum);
1869 }
1870
1871 /* See REF01 for an explanation of the algorithm */
1872 index->stat_n_diff_key_vals[n_prefix - 1]
1873 = n_ordinary_leaf_pages
1874
1875 * data->n_diff_on_level
1876 / data->n_recs_on_level
1877
1878 * data->n_diff_all_analyzed_pages
1879 / data->n_leaf_pages_to_analyze;
1880
1881 index->stat_n_sample_sizes[n_prefix - 1]
1882 = data->n_leaf_pages_to_analyze;
1883
1884 DEBUG_PRINTF(" %s(): n_diff=" UINT64PF " for n_prefix=%lu"
1885 " (%lu"
1886 " * " UINT64PF " / " UINT64PF
1887 " * " UINT64PF " / " UINT64PF ")\n",
1888 __func__,
1889 index->stat_n_diff_key_vals[n_prefix - 1],
1890 n_prefix,
1891 index->stat_n_leaf_pages,
1892 data->n_diff_on_level,
1893 data->n_recs_on_level,
1894 data->n_diff_all_analyzed_pages,
1895 data->n_leaf_pages_to_analyze);
1896 }
1897 }
1898
1899 /*********************************************************************//**
1900 Calculates new statistics for a given index and saves them to the index
1901 members stat_n_diff_key_vals[], stat_n_sample_sizes[], stat_index_size and
1902 stat_n_leaf_pages. This function could be slow. */
1903 static
1904 void
dict_stats_analyze_index(dict_index_t * index)1905 dict_stats_analyze_index(
1906 /*=====================*/
1907 dict_index_t* index) /*!< in/out: index to analyze */
1908 {
1909 ulint root_level;
1910 ulint level;
1911 bool level_is_analyzed;
1912 ulint n_uniq;
1913 ulint n_prefix;
1914 ib_uint64_t total_recs;
1915 ib_uint64_t total_pages;
1916 mtr_t mtr;
1917 ulint size;
1918 DBUG_ENTER("dict_stats_analyze_index");
1919
1920 DBUG_PRINT("info", ("index: %s, online status: %d", index->name,
1921 dict_index_get_online_status(index)));
1922
1923 DEBUG_PRINTF(" %s(index=%s)\n", __func__, index->name);
1924
1925 dict_stats_empty_index(index);
1926
1927 mtr_start(&mtr);
1928
1929 mtr_s_lock(dict_index_get_lock(index), &mtr);
1930
1931 size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
1932
1933 if (size != ULINT_UNDEFINED) {
1934 index->stat_index_size = size;
1935 size = btr_get_size(index, BTR_N_LEAF_PAGES, &mtr);
1936 }
1937
1938 /* Release the X locks on the root page taken by btr_get_size() */
1939 mtr_commit(&mtr);
1940
1941 switch (size) {
1942 case ULINT_UNDEFINED:
1943 dict_stats_assert_initialized_index(index);
1944 DBUG_VOID_RETURN;
1945 case 0:
1946 /* The root node of the tree is a leaf */
1947 size = 1;
1948 }
1949
1950 index->stat_n_leaf_pages = size;
1951
1952 mtr_start(&mtr);
1953
1954 mtr_s_lock(dict_index_get_lock(index), &mtr);
1955
1956 root_level = btr_height_get(index, &mtr);
1957
1958 n_uniq = dict_index_get_n_unique(index);
1959
1960 /* If the tree has just one level (and one page) or if the user
1961 has requested to sample too many pages then do full scan.
1962
1963 For each n-column prefix (for n=1..n_uniq) N_SAMPLE_PAGES(index)
1964 will be sampled, so in total N_SAMPLE_PAGES(index) * n_uniq leaf
1965 pages will be sampled. If that number is bigger than the total
1966 number of leaf pages then do full scan of the leaf level instead
1967 since it will be faster and will give better results. */
1968
1969 if (root_level == 0
1970 || N_SAMPLE_PAGES(index) * n_uniq > index->stat_n_leaf_pages) {
1971
1972 if (root_level == 0) {
1973 DEBUG_PRINTF(" %s(): just one page, "
1974 "doing full scan\n", __func__);
1975 } else {
1976 DEBUG_PRINTF(" %s(): too many pages requested for "
1977 "sampling, doing full scan\n", __func__);
1978 }
1979
1980 /* do full scan of level 0; save results directly
1981 into the index */
1982
1983 dict_stats_analyze_index_level(index,
1984 0 /* leaf level */,
1985 index->stat_n_diff_key_vals,
1986 &total_recs,
1987 &total_pages,
1988 NULL /* boundaries not needed */,
1989 &mtr);
1990
1991 for (ulint i = 0; i < n_uniq; i++) {
1992 index->stat_n_sample_sizes[i] = total_pages;
1993 }
1994
1995 mtr_commit(&mtr);
1996
1997 dict_stats_assert_initialized_index(index);
1998 DBUG_VOID_RETURN;
1999 }
2000
2001 /* For each level that is being scanned in the btree, this contains the
2002 number of different key values for all possible n-column prefixes. */
2003 ib_uint64_t* n_diff_on_level = new ib_uint64_t[n_uniq];
2004
2005 /* For each level that is being scanned in the btree, this contains the
2006 index of the last record from each group of equal records (when
2007 comparing only the first n columns, n=1..n_uniq). */
2008 boundaries_t* n_diff_boundaries = new boundaries_t[n_uniq];
2009
2010 /* For each n-column prefix this array contains the input data that is
2011 used to calculate dict_index_t::stat_n_diff_key_vals[]. */
2012 n_diff_data_t* n_diff_data = new n_diff_data_t[n_uniq];
2013
2014 /* total_recs is also used to estimate the number of pages on one
2015 level below, so at the start we have 1 page (the root) */
2016 total_recs = 1;
2017
2018 /* Here we use the following optimization:
2019 If we find that level L is the first one (searching from the
2020 root) that contains at least D distinct keys when looking at
2021 the first n_prefix columns, then:
2022 if we look at the first n_prefix-1 columns then the first
2023 level that contains D distinct keys will be either L or a
2024 lower one.
2025 So if we find that the first level containing D distinct
2026 keys (on n_prefix columns) is L, we continue from L when
2027 searching for D distinct keys on n_prefix-1 columns. */
2028 level = root_level;
2029 level_is_analyzed = false;
2030
2031 for (n_prefix = n_uniq; n_prefix >= 1; n_prefix--) {
2032
2033 DEBUG_PRINTF(" %s(): searching level with >=%llu "
2034 "distinct records, n_prefix=%lu\n",
2035 __func__, N_DIFF_REQUIRED(index), n_prefix);
2036
2037 /* Commit the mtr to release the tree S lock to allow
2038 other threads to do some work too. */
2039 mtr_commit(&mtr);
2040 mtr_start(&mtr);
2041 mtr_s_lock(dict_index_get_lock(index), &mtr);
2042 if (root_level != btr_height_get(index, &mtr)) {
2043 /* Just quit if the tree has changed beyond
2044 recognition here. The old stats from previous
2045 runs will remain in the values that we have
2046 not calculated yet. Initially when the index
2047 object is created the stats members are given
2048 some sensible values so leaving them untouched
2049 here even the first time will not cause us to
2050 read uninitialized memory later. */
2051 break;
2052 }
2053
2054 /* check whether we should pick the current level;
2055 we pick level 1 even if it does not have enough
2056 distinct records because we do not want to scan the
2057 leaf level because it may contain too many records */
2058 if (level_is_analyzed
2059 && (n_diff_on_level[n_prefix - 1] >= N_DIFF_REQUIRED(index)
2060 || level == 1)) {
2061
2062 goto found_level;
2063 }
2064
2065 /* search for a level that contains enough distinct records */
2066
2067 if (level_is_analyzed && level > 1) {
2068
2069 /* if this does not hold we should be on
2070 "found_level" instead of here */
2071 ut_ad(n_diff_on_level[n_prefix - 1]
2072 < N_DIFF_REQUIRED(index));
2073
2074 level--;
2075 level_is_analyzed = false;
2076 }
2077
2078 /* descend into the tree, searching for "good enough" level */
2079 for (;;) {
2080
2081 /* make sure we do not scan the leaf level
2082 accidentally, it may contain too many pages */
2083 ut_ad(level > 0);
2084
2085 /* scanning the same level twice is an optimization
2086 bug */
2087 ut_ad(!level_is_analyzed);
2088
2089 /* Do not scan if this would read too many pages.
2090 Here we use the following fact:
2091 the number of pages on level L equals the number
2092 of records on level L+1, thus we deduce that the
2093 following call would scan total_recs pages, because
2094 total_recs is left from the previous iteration when
2095 we scanned one level upper or we have not scanned any
2096 levels yet in which case total_recs is 1. */
2097 if (total_recs > N_SAMPLE_PAGES(index)) {
2098
2099 /* if the above cond is true then we are
2100 not at the root level since on the root
2101 level total_recs == 1 (set before we
2102 enter the n-prefix loop) and cannot
2103 be > N_SAMPLE_PAGES(index) */
2104 ut_a(level != root_level);
2105
2106 /* step one level back and be satisfied with
2107 whatever it contains */
2108 level++;
2109 level_is_analyzed = true;
2110
2111 break;
2112 }
2113
2114 dict_stats_analyze_index_level(index,
2115 level,
2116 n_diff_on_level,
2117 &total_recs,
2118 &total_pages,
2119 n_diff_boundaries,
2120 &mtr);
2121
2122 level_is_analyzed = true;
2123
2124 if (level == 1
2125 || n_diff_on_level[n_prefix - 1]
2126 >= N_DIFF_REQUIRED(index)) {
2127 /* we have reached the last level we could scan
2128 or we found a good level with many distinct
2129 records */
2130 break;
2131 }
2132
2133 level--;
2134 level_is_analyzed = false;
2135 }
2136 found_level:
2137
2138 DEBUG_PRINTF(" %s(): found level %lu that has " UINT64PF
2139 " distinct records for n_prefix=%lu\n",
2140 __func__, level, n_diff_on_level[n_prefix - 1],
2141 n_prefix);
2142 /* here we are either on level 1 or the level that we are on
2143 contains >= N_DIFF_REQUIRED distinct keys or we did not scan
2144 deeper levels because they would contain too many pages */
2145
2146 ut_ad(level > 0);
2147
2148 ut_ad(level_is_analyzed);
2149
2150 /* if any of these is 0 then there is exactly one page in the
2151 B-tree and it is empty and we should have done full scan and
2152 should not be here */
2153 ut_ad(total_recs > 0);
2154 ut_ad(n_diff_on_level[n_prefix - 1] > 0);
2155
2156 ut_ad(N_SAMPLE_PAGES(index) > 0);
2157
2158 n_diff_data_t* data = &n_diff_data[n_prefix - 1];
2159
2160 data->level = level;
2161
2162 data->n_recs_on_level = total_recs;
2163
2164 data->n_diff_on_level = n_diff_on_level[n_prefix - 1];
2165
2166 data->n_leaf_pages_to_analyze = std::min(
2167 N_SAMPLE_PAGES(index),
2168 n_diff_on_level[n_prefix - 1]);
2169
2170 /* pick some records from this level and dive below them for
2171 the given n_prefix */
2172
2173 dict_stats_analyze_index_for_n_prefix(
2174 index, n_prefix, &n_diff_boundaries[n_prefix - 1],
2175 data, &mtr);
2176 }
2177
2178 mtr_commit(&mtr);
2179
2180 delete[] n_diff_boundaries;
2181
2182 delete[] n_diff_on_level;
2183
2184 /* n_prefix == 0 means that the above loop did not end up prematurely
2185 due to tree being changed and so n_diff_data[] is set up. */
2186 if (n_prefix == 0) {
2187 dict_stats_index_set_n_diff(n_diff_data, index);
2188 }
2189
2190 delete[] n_diff_data;
2191
2192 dict_stats_assert_initialized_index(index);
2193 DBUG_VOID_RETURN;
2194 }
2195
2196 /*********************************************************************//**
2197 Calculates new estimates for table and index statistics. This function
2198 is relatively slow and is used to calculate persistent statistics that
2199 will be saved on disk.
2200 @return DB_SUCCESS or error code */
2201 static
2202 dberr_t
dict_stats_update_persistent(dict_table_t * table)2203 dict_stats_update_persistent(
2204 /*=========================*/
2205 dict_table_t* table) /*!< in/out: table */
2206 {
2207 dict_index_t* index;
2208
2209 DEBUG_PRINTF("%s(table=%s)\n", __func__, table->name);
2210
2211 dict_table_stats_lock(table, RW_X_LATCH);
2212
2213 /* analyze the clustered index first */
2214
2215 index = dict_table_get_first_index(table);
2216
2217 if (index == NULL
2218 || dict_index_is_corrupted(index)
2219 || (index->type | DICT_UNIQUE) != (DICT_CLUSTERED | DICT_UNIQUE)) {
2220
2221 /* Table definition is corrupt */
2222 dict_table_stats_unlock(table, RW_X_LATCH);
2223 dict_stats_empty_table(table);
2224
2225 return(DB_CORRUPTION);
2226 }
2227
2228 ut_ad(!dict_index_is_univ(index));
2229
2230 dict_stats_analyze_index(index);
2231
2232 ulint n_unique = dict_index_get_n_unique(index);
2233
2234 table->stat_n_rows = index->stat_n_diff_key_vals[n_unique - 1];
2235
2236 table->stat_clustered_index_size = index->stat_index_size;
2237
2238 /* analyze other indexes from the table, if any */
2239
2240 table->stat_sum_of_other_index_sizes = 0;
2241
2242 for (index = dict_table_get_next_index(index);
2243 index != NULL;
2244 index = dict_table_get_next_index(index)) {
2245
2246 ut_ad(!dict_index_is_univ(index));
2247
2248 if (index->type & DICT_FTS) {
2249 continue;
2250 }
2251
2252 dict_stats_empty_index(index);
2253
2254 if (dict_stats_should_ignore_index(index)) {
2255 continue;
2256 }
2257
2258 if (!(table->stats_bg_flag & BG_STAT_SHOULD_QUIT)) {
2259 dict_stats_analyze_index(index);
2260 }
2261
2262 table->stat_sum_of_other_index_sizes
2263 += index->stat_index_size;
2264 }
2265
2266 table->stats_last_recalc = ut_time();
2267
2268 table->stat_modified_counter = 0;
2269
2270 table->stat_initialized = TRUE;
2271
2272 dict_stats_assert_initialized(table);
2273
2274 dict_table_stats_unlock(table, RW_X_LATCH);
2275
2276 return(DB_SUCCESS);
2277 }
2278
2279 #include "mysql_com.h"
2280 /** Save an individual index's statistic into the persistent statistics
2281 storage.
2282 @param[in] index index to be updated
2283 @param[in] last_update timestamp of the stat
2284 @param[in] stat_name name of the stat
2285 @param[in] stat_value value of the stat
2286 @param[in] sample_size n pages sampled or NULL
2287 @param[in] stat_description description of the stat
2288 @param[in,out] trx in case of NULL the function will
2289 allocate and free the trx object. If it is not NULL then it will be
2290 rolled back only in the case of error, but not freed.
2291 @return DB_SUCCESS or error code */
2292 static
2293 dberr_t
dict_stats_save_index_stat(dict_index_t * index,lint last_update,const char * stat_name,ib_uint64_t stat_value,ib_uint64_t * sample_size,const char * stat_description,trx_t * trx)2294 dict_stats_save_index_stat(
2295 dict_index_t* index,
2296 lint last_update,
2297 const char* stat_name,
2298 ib_uint64_t stat_value,
2299 ib_uint64_t* sample_size,
2300 const char* stat_description,
2301 trx_t* trx)
2302 {
2303 pars_info_t* pinfo;
2304 dberr_t ret;
2305 char db_utf8[MAX_DB_UTF8_LEN];
2306 char table_utf8[MAX_TABLE_UTF8_LEN];
2307
2308 #ifdef UNIV_SYNC_DEBUG
2309 ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
2310 #endif /* UNIV_SYNC_DEBUG */
2311 ut_ad(mutex_own(&dict_sys->mutex));
2312
2313 dict_fs2utf8(index->table->name, db_utf8, sizeof(db_utf8),
2314 table_utf8, sizeof(table_utf8));
2315
2316 pinfo = pars_info_create();
2317 pars_info_add_str_literal(pinfo, "database_name", db_utf8);
2318 pars_info_add_str_literal(pinfo, "table_name", table_utf8);
2319 UNIV_MEM_ASSERT_RW_ABORT(index->name, strlen(index->name));
2320 pars_info_add_str_literal(pinfo, "index_name", index->name);
2321 UNIV_MEM_ASSERT_RW_ABORT(&last_update, 4);
2322 pars_info_add_int4_literal(pinfo, "last_update", last_update);
2323 UNIV_MEM_ASSERT_RW_ABORT(stat_name, strlen(stat_name));
2324 pars_info_add_str_literal(pinfo, "stat_name", stat_name);
2325 UNIV_MEM_ASSERT_RW_ABORT(&stat_value, 8);
2326 pars_info_add_ull_literal(pinfo, "stat_value", stat_value);
2327 if (sample_size != NULL) {
2328 UNIV_MEM_ASSERT_RW_ABORT(sample_size, 8);
2329 pars_info_add_ull_literal(pinfo, "sample_size", *sample_size);
2330 } else {
2331 pars_info_add_literal(pinfo, "sample_size", NULL,
2332 UNIV_SQL_NULL, DATA_FIXBINARY, 0);
2333 }
2334 UNIV_MEM_ASSERT_RW_ABORT(stat_description, strlen(stat_description));
2335 pars_info_add_str_literal(pinfo, "stat_description",
2336 stat_description);
2337
2338 ret = dict_stats_exec_sql(
2339 pinfo,
2340 "PROCEDURE INDEX_STATS_SAVE () IS\n"
2341 "BEGIN\n"
2342
2343 "DELETE FROM \"" INDEX_STATS_NAME "\"\n"
2344 "WHERE\n"
2345 "database_name = :database_name AND\n"
2346 "table_name = :table_name AND\n"
2347 "index_name = :index_name AND\n"
2348 "stat_name = :stat_name;\n"
2349
2350 "INSERT INTO \"" INDEX_STATS_NAME "\"\n"
2351 "VALUES\n"
2352 "(\n"
2353 ":database_name,\n"
2354 ":table_name,\n"
2355 ":index_name,\n"
2356 ":last_update,\n"
2357 ":stat_name,\n"
2358 ":stat_value,\n"
2359 ":sample_size,\n"
2360 ":stat_description\n"
2361 ");\n"
2362 "END;", trx);
2363
2364 if (ret != DB_SUCCESS) {
2365 char buf_table[MAX_FULL_NAME_LEN];
2366 char buf_index[MAX_FULL_NAME_LEN];
2367 ut_print_timestamp(stderr);
2368 fprintf(stderr,
2369 " InnoDB: Cannot save index statistics for table "
2370 "%s, index %s, stat name \"%s\": %s\n",
2371 ut_format_name(index->table->name, TRUE,
2372 buf_table, sizeof(buf_table)),
2373 ut_format_name(index->name, FALSE,
2374 buf_index, sizeof(buf_index)),
2375 stat_name, ut_strerr(ret));
2376 }
2377
2378 return(ret);
2379 }
2380
2381 /** Save the table's statistics into the persistent statistics storage.
2382 @param[in] table_orig table whose stats to save
2383 @param[in] only_for_index if this is non-NULL, then stats for indexes
2384 that are not equal to it will not be saved, if NULL, then all
2385 indexes' stats are saved
2386 @return DB_SUCCESS or error code */
2387 static
2388 dberr_t
dict_stats_save(dict_table_t * table_orig,const index_id_t * only_for_index)2389 dict_stats_save(
2390 /*============*/
2391 dict_table_t* table_orig,
2392 const index_id_t* only_for_index)
2393 {
2394 pars_info_t* pinfo;
2395 lint now;
2396 dberr_t ret;
2397 dict_table_t* table;
2398 char db_utf8[MAX_DB_UTF8_LEN];
2399 char table_utf8[MAX_TABLE_UTF8_LEN];
2400
2401 table = dict_stats_snapshot_create(table_orig);
2402
2403 dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8),
2404 table_utf8, sizeof(table_utf8));
2405
2406 rw_lock_x_lock(&dict_operation_lock);
2407 mutex_enter(&dict_sys->mutex);
2408
2409 /* MySQL's timestamp is 4 byte, so we use
2410 pars_info_add_int4_literal() which takes a lint arg, so "now" is
2411 lint */
2412 now = (lint) ut_time();
2413
2414 pinfo = pars_info_create();
2415
2416 pars_info_add_str_literal(pinfo, "database_name", db_utf8);
2417 pars_info_add_str_literal(pinfo, "table_name", table_utf8);
2418 pars_info_add_int4_literal(pinfo, "last_update", now);
2419 pars_info_add_ull_literal(pinfo, "n_rows", table->stat_n_rows);
2420 pars_info_add_ull_literal(pinfo, "clustered_index_size",
2421 table->stat_clustered_index_size);
2422 pars_info_add_ull_literal(pinfo, "sum_of_other_index_sizes",
2423 table->stat_sum_of_other_index_sizes);
2424
2425 ret = dict_stats_exec_sql(
2426 pinfo,
2427 "PROCEDURE TABLE_STATS_SAVE () IS\n"
2428 "BEGIN\n"
2429
2430 "DELETE FROM \"" TABLE_STATS_NAME "\"\n"
2431 "WHERE\n"
2432 "database_name = :database_name AND\n"
2433 "table_name = :table_name;\n"
2434
2435 "INSERT INTO \"" TABLE_STATS_NAME "\"\n"
2436 "VALUES\n"
2437 "(\n"
2438 ":database_name,\n"
2439 ":table_name,\n"
2440 ":last_update,\n"
2441 ":n_rows,\n"
2442 ":clustered_index_size,\n"
2443 ":sum_of_other_index_sizes\n"
2444 ");\n"
2445 "END;", NULL);
2446
2447 if (ret != DB_SUCCESS) {
2448 char buf[MAX_FULL_NAME_LEN];
2449 ut_print_timestamp(stderr);
2450 fprintf(stderr,
2451 " InnoDB: Cannot save table statistics for table "
2452 "%s: %s\n",
2453 ut_format_name(table->name, TRUE, buf, sizeof(buf)),
2454 ut_strerr(ret));
2455
2456 mutex_exit(&dict_sys->mutex);
2457 rw_lock_x_unlock(&dict_operation_lock);
2458
2459 dict_stats_snapshot_free(table);
2460
2461 return(ret);
2462 }
2463
2464 trx_t* trx = trx_allocate_for_background();
2465 trx_start_if_not_started(trx);
2466
2467 dict_index_t* index;
2468 index_map_t indexes;
2469
2470 /* Below we do all the modifications in innodb_index_stats in a single
2471 transaction for performance reasons. Modifying more than one row in a
2472 single transaction may deadlock with other transactions if they
2473 lock the rows in different order. Other transaction could be for
2474 example when we DROP a table and do
2475 DELETE FROM innodb_index_stats WHERE database_name = '...'
2476 AND table_name = '...'; which will affect more than one row. To
2477 prevent deadlocks we always lock the rows in the same order - the
2478 order of the PK, which is (database_name, table_name, index_name,
2479 stat_name). This is why below we sort the indexes by name and then
2480 for each index, do the mods ordered by stat_name. */
2481
2482 for (index = dict_table_get_first_index(table);
2483 index != NULL;
2484 index = dict_table_get_next_index(index)) {
2485
2486 indexes[index->name] = index;
2487 }
2488
2489 index_map_t::const_iterator it;
2490
2491 for (it = indexes.begin(); it != indexes.end(); ++it) {
2492
2493 index = it->second;
2494
2495 if (only_for_index != NULL && index->id != *only_for_index) {
2496 continue;
2497 }
2498
2499 if (dict_stats_should_ignore_index(index)) {
2500 continue;
2501 }
2502
2503 ut_ad(!dict_index_is_univ(index));
2504
2505 for (ulint i = 0; i < index->n_uniq; i++) {
2506
2507 char stat_name[16];
2508 char stat_description[1024];
2509 ulint j;
2510
2511 ut_snprintf(stat_name, sizeof(stat_name),
2512 "n_diff_pfx%02lu", i + 1);
2513
2514 /* craft a string that contains the columns names */
2515 ut_snprintf(stat_description,
2516 sizeof(stat_description),
2517 "%s", index->fields[0].name);
2518 for (j = 1; j <= i; j++) {
2519 size_t len;
2520
2521 len = strlen(stat_description);
2522
2523 ut_snprintf(stat_description + len,
2524 sizeof(stat_description) - len,
2525 ",%s", index->fields[j].name);
2526 }
2527
2528 ret = dict_stats_save_index_stat(
2529 index, now, stat_name,
2530 index->stat_n_diff_key_vals[i],
2531 &index->stat_n_sample_sizes[i],
2532 stat_description, trx);
2533
2534 if (ret != DB_SUCCESS) {
2535 goto end;
2536 }
2537 }
2538
2539 ret = dict_stats_save_index_stat(index, now, "n_leaf_pages",
2540 index->stat_n_leaf_pages,
2541 NULL,
2542 "Number of leaf pages "
2543 "in the index", trx);
2544 if (ret != DB_SUCCESS) {
2545 goto end;
2546 }
2547
2548 ret = dict_stats_save_index_stat(index, now, "size",
2549 index->stat_index_size,
2550 NULL,
2551 "Number of pages "
2552 "in the index", trx);
2553 if (ret != DB_SUCCESS) {
2554 goto end;
2555 }
2556 }
2557
2558 trx_commit_for_mysql(trx);
2559
2560 end:
2561 trx_free_for_background(trx);
2562
2563 mutex_exit(&dict_sys->mutex);
2564 rw_lock_x_unlock(&dict_operation_lock);
2565
2566 dict_stats_snapshot_free(table);
2567
2568 return(ret);
2569 }
2570
2571 /*********************************************************************//**
2572 Called for the row that is selected by
2573 SELECT ... FROM mysql.innodb_table_stats WHERE table='...'
2574 The second argument is a pointer to the table and the fetched stats are
2575 written to it.
2576 @return non-NULL dummy */
2577 static
2578 ibool
dict_stats_fetch_table_stats_step(void * node_void,void * table_void)2579 dict_stats_fetch_table_stats_step(
2580 /*==============================*/
2581 void* node_void, /*!< in: select node */
2582 void* table_void) /*!< out: table */
2583 {
2584 sel_node_t* node = (sel_node_t*) node_void;
2585 dict_table_t* table = (dict_table_t*) table_void;
2586 que_common_t* cnode;
2587 int i;
2588
2589 /* this should loop exactly 3 times - for
2590 n_rows,clustered_index_size,sum_of_other_index_sizes */
2591 for (cnode = static_cast<que_common_t*>(node->select_list), i = 0;
2592 cnode != NULL;
2593 cnode = static_cast<que_common_t*>(que_node_get_next(cnode)),
2594 i++) {
2595
2596 const byte* data;
2597 dfield_t* dfield = que_node_get_val(cnode);
2598 dtype_t* type = dfield_get_type(dfield);
2599 ulint len = dfield_get_len(dfield);
2600
2601 data = static_cast<const byte*>(dfield_get_data(dfield));
2602
2603 switch (i) {
2604 case 0: /* mysql.innodb_table_stats.n_rows */
2605
2606 ut_a(dtype_get_mtype(type) == DATA_INT);
2607 ut_a(len == 8);
2608
2609 table->stat_n_rows = mach_read_from_8(data);
2610
2611 break;
2612
2613 case 1: /* mysql.innodb_table_stats.clustered_index_size */
2614
2615 ut_a(dtype_get_mtype(type) == DATA_INT);
2616 ut_a(len == 8);
2617
2618 table->stat_clustered_index_size
2619 = (ulint) mach_read_from_8(data);
2620
2621 break;
2622
2623 case 2: /* mysql.innodb_table_stats.sum_of_other_index_sizes */
2624
2625 ut_a(dtype_get_mtype(type) == DATA_INT);
2626 ut_a(len == 8);
2627
2628 table->stat_sum_of_other_index_sizes
2629 = (ulint) mach_read_from_8(data);
2630
2631 break;
2632
2633 default:
2634
2635 /* someone changed SELECT
2636 n_rows,clustered_index_size,sum_of_other_index_sizes
2637 to select more columns from innodb_table_stats without
2638 adjusting here */
2639 ut_error;
2640 }
2641 }
2642
2643 /* if i < 3 this means someone changed the
2644 SELECT n_rows,clustered_index_size,sum_of_other_index_sizes
2645 to select less columns from innodb_table_stats without adjusting here;
2646 if i > 3 we would have ut_error'ed earlier */
2647 ut_a(i == 3 /*n_rows,clustered_index_size,sum_of_other_index_sizes*/);
2648
2649 /* XXX this is not used but returning non-NULL is necessary */
2650 return(TRUE);
2651 }
2652
2653 /** Aux struct used to pass a table and a boolean to
2654 dict_stats_fetch_index_stats_step(). */
2655 struct index_fetch_t {
2656 dict_table_t* table; /*!< table whose indexes are to be modified */
2657 bool stats_were_modified; /*!< will be set to true if at
2658 least one index stats were modified */
2659 };
2660
2661 /*********************************************************************//**
2662 Called for the rows that are selected by
2663 SELECT ... FROM mysql.innodb_index_stats WHERE table='...'
2664 The second argument is a pointer to the table and the fetched stats are
2665 written to its indexes.
2666 Let a table has N indexes and each index has Ui unique columns for i=1..N,
2667 then mysql.innodb_index_stats will have SUM(Ui) i=1..N rows for that table.
2668 So this function will be called SUM(Ui) times where SUM(Ui) is of magnitude
2669 N*AVG(Ui). In each call it searches for the currently fetched index into
2670 table->indexes linearly, assuming this list is not sorted. Thus, overall,
2671 fetching all indexes' stats from mysql.innodb_index_stats is O(N^2) where N
2672 is the number of indexes.
2673 This can be improved if we sort table->indexes in a temporary area just once
2674 and then search in that sorted list. Then the complexity will be O(N*log(N)).
2675 We assume a table will not have more than 100 indexes, so we go with the
2676 simpler N^2 algorithm.
2677 @return non-NULL dummy */
2678 static
2679 ibool
dict_stats_fetch_index_stats_step(void * node_void,void * arg_void)2680 dict_stats_fetch_index_stats_step(
2681 /*==============================*/
2682 void* node_void, /*!< in: select node */
2683 void* arg_void) /*!< out: table + a flag that tells if we
2684 modified anything */
2685 {
2686 sel_node_t* node = (sel_node_t*) node_void;
2687 index_fetch_t* arg = (index_fetch_t*) arg_void;
2688 dict_table_t* table = arg->table;
2689 dict_index_t* index = NULL;
2690 que_common_t* cnode;
2691 const char* stat_name = NULL;
2692 ulint stat_name_len = ULINT_UNDEFINED;
2693 ib_uint64_t stat_value = UINT64_UNDEFINED;
2694 ib_uint64_t sample_size = UINT64_UNDEFINED;
2695 int i;
2696
2697 /* this should loop exactly 4 times - for the columns that
2698 were selected: index_name,stat_name,stat_value,sample_size */
2699 for (cnode = static_cast<que_common_t*>(node->select_list), i = 0;
2700 cnode != NULL;
2701 cnode = static_cast<que_common_t*>(que_node_get_next(cnode)),
2702 i++) {
2703
2704 const byte* data;
2705 dfield_t* dfield = que_node_get_val(cnode);
2706 dtype_t* type = dfield_get_type(dfield);
2707 ulint len = dfield_get_len(dfield);
2708
2709 data = static_cast<const byte*>(dfield_get_data(dfield));
2710
2711 switch (i) {
2712 case 0: /* mysql.innodb_index_stats.index_name */
2713
2714 ut_a(dtype_get_mtype(type) == DATA_VARMYSQL);
2715
2716 /* search for index in table's indexes whose name
2717 matches data; the fetched index name is in data,
2718 has no terminating '\0' and has length len */
2719 for (index = dict_table_get_first_index(table);
2720 index != NULL;
2721 index = dict_table_get_next_index(index)) {
2722
2723 if (strlen(index->name) == len
2724 && memcmp(index->name, data, len) == 0) {
2725 /* the corresponding index was found */
2726 break;
2727 }
2728 }
2729
2730 /* if index is NULL here this means that
2731 mysql.innodb_index_stats contains more rows than the
2732 number of indexes in the table; this is ok, we just
2733 return ignoring those extra rows; in other words
2734 dict_stats_fetch_index_stats_step() has been called
2735 for a row from index_stats with unknown index_name
2736 column */
2737 if (index == NULL) {
2738
2739 return(TRUE);
2740 }
2741
2742 break;
2743
2744 case 1: /* mysql.innodb_index_stats.stat_name */
2745
2746 ut_a(dtype_get_mtype(type) == DATA_VARMYSQL);
2747
2748 ut_a(index != NULL);
2749
2750 stat_name = (const char*) data;
2751 stat_name_len = len;
2752
2753 break;
2754
2755 case 2: /* mysql.innodb_index_stats.stat_value */
2756
2757 ut_a(dtype_get_mtype(type) == DATA_INT);
2758 ut_a(len == 8);
2759
2760 ut_a(index != NULL);
2761 ut_a(stat_name != NULL);
2762 ut_a(stat_name_len != ULINT_UNDEFINED);
2763
2764 stat_value = mach_read_from_8(data);
2765
2766 break;
2767
2768 case 3: /* mysql.innodb_index_stats.sample_size */
2769
2770 ut_a(dtype_get_mtype(type) == DATA_INT);
2771 ut_a(len == 8 || len == UNIV_SQL_NULL);
2772
2773 ut_a(index != NULL);
2774 ut_a(stat_name != NULL);
2775 ut_a(stat_name_len != ULINT_UNDEFINED);
2776 ut_a(stat_value != UINT64_UNDEFINED);
2777
2778 if (len == UNIV_SQL_NULL) {
2779 break;
2780 }
2781 /* else */
2782
2783 sample_size = mach_read_from_8(data);
2784
2785 break;
2786
2787 default:
2788
2789 /* someone changed
2790 SELECT index_name,stat_name,stat_value,sample_size
2791 to select more columns from innodb_index_stats without
2792 adjusting here */
2793 ut_error;
2794 }
2795 }
2796
2797 /* if i < 4 this means someone changed the
2798 SELECT index_name,stat_name,stat_value,sample_size
2799 to select less columns from innodb_index_stats without adjusting here;
2800 if i > 4 we would have ut_error'ed earlier */
2801 ut_a(i == 4 /* index_name,stat_name,stat_value,sample_size */);
2802
2803 ut_a(index != NULL);
2804 ut_a(stat_name != NULL);
2805 ut_a(stat_name_len != ULINT_UNDEFINED);
2806 ut_a(stat_value != UINT64_UNDEFINED);
2807 /* sample_size could be UINT64_UNDEFINED here, if it is NULL */
2808
2809 #define PFX "n_diff_pfx"
2810 #define PFX_LEN 10
2811
2812 if (stat_name_len == 4 /* strlen("size") */
2813 && strncasecmp("size", stat_name, stat_name_len) == 0) {
2814 index->stat_index_size = (ulint) stat_value;
2815 arg->stats_were_modified = true;
2816 } else if (stat_name_len == 12 /* strlen("n_leaf_pages") */
2817 && strncasecmp("n_leaf_pages", stat_name, stat_name_len)
2818 == 0) {
2819 index->stat_n_leaf_pages = (ulint) stat_value;
2820 arg->stats_were_modified = true;
2821 } else if (stat_name_len > PFX_LEN /* e.g. stat_name=="n_diff_pfx01" */
2822 && strncasecmp(PFX, stat_name, PFX_LEN) == 0) {
2823
2824 const char* num_ptr;
2825 unsigned long n_pfx;
2826
2827 /* point num_ptr into "1" from "n_diff_pfx12..." */
2828 num_ptr = stat_name + PFX_LEN;
2829
2830 /* stat_name should have exactly 2 chars appended to PFX
2831 and they should be digits */
2832 if (stat_name_len != PFX_LEN + 2
2833 || num_ptr[0] < '0' || num_ptr[0] > '9'
2834 || num_ptr[1] < '0' || num_ptr[1] > '9') {
2835
2836 char db_utf8[MAX_DB_UTF8_LEN];
2837 char table_utf8[MAX_TABLE_UTF8_LEN];
2838
2839 dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8),
2840 table_utf8, sizeof(table_utf8));
2841
2842 ut_print_timestamp(stderr);
2843 fprintf(stderr,
2844 " InnoDB: Ignoring strange row from "
2845 "%s WHERE "
2846 "database_name = '%s' AND "
2847 "table_name = '%s' AND "
2848 "index_name = '%s' AND "
2849 "stat_name = '%.*s'; because stat_name "
2850 "is malformed\n",
2851 INDEX_STATS_NAME_PRINT,
2852 db_utf8,
2853 table_utf8,
2854 index->name,
2855 (int) stat_name_len,
2856 stat_name);
2857 return(TRUE);
2858 }
2859 /* else */
2860
2861 /* extract 12 from "n_diff_pfx12..." into n_pfx
2862 note that stat_name does not have a terminating '\0' */
2863 n_pfx = (num_ptr[0] - '0') * 10 + (num_ptr[1] - '0');
2864
2865 ulint n_uniq = index->n_uniq;
2866
2867 if (n_pfx == 0 || n_pfx > n_uniq) {
2868
2869 char db_utf8[MAX_DB_UTF8_LEN];
2870 char table_utf8[MAX_TABLE_UTF8_LEN];
2871
2872 dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8),
2873 table_utf8, sizeof(table_utf8));
2874
2875 ut_print_timestamp(stderr);
2876 fprintf(stderr,
2877 " InnoDB: Ignoring strange row from "
2878 "%s WHERE "
2879 "database_name = '%s' AND "
2880 "table_name = '%s' AND "
2881 "index_name = '%s' AND "
2882 "stat_name = '%.*s'; because stat_name is "
2883 "out of range, the index has %lu unique "
2884 "columns\n",
2885 INDEX_STATS_NAME_PRINT,
2886 db_utf8,
2887 table_utf8,
2888 index->name,
2889 (int) stat_name_len,
2890 stat_name,
2891 n_uniq);
2892 return(TRUE);
2893 }
2894 /* else */
2895
2896 index->stat_n_diff_key_vals[n_pfx - 1] = stat_value;
2897
2898 if (sample_size != UINT64_UNDEFINED) {
2899 index->stat_n_sample_sizes[n_pfx - 1] = sample_size;
2900 } else {
2901 /* hmm, strange... the user must have UPDATEd the
2902 table manually and SET sample_size = NULL */
2903 index->stat_n_sample_sizes[n_pfx - 1] = 0;
2904 }
2905
2906 index->stat_n_non_null_key_vals[n_pfx - 1] = 0;
2907
2908 arg->stats_were_modified = true;
2909 } else {
2910 /* silently ignore rows with unknown stat_name, the
2911 user may have developed her own stats */
2912 }
2913
2914 /* XXX this is not used but returning non-NULL is necessary */
2915 return(TRUE);
2916 }
2917
2918 /*********************************************************************//**
2919 Read table's statistics from the persistent statistics storage.
2920 @return DB_SUCCESS or error code */
2921 static
2922 dberr_t
dict_stats_fetch_from_ps(dict_table_t * table)2923 dict_stats_fetch_from_ps(
2924 /*=====================*/
2925 dict_table_t* table) /*!< in/out: table */
2926 {
2927 index_fetch_t index_fetch_arg;
2928 trx_t* trx;
2929 pars_info_t* pinfo;
2930 dberr_t ret;
2931 char db_utf8[MAX_DB_UTF8_LEN];
2932 char table_utf8[MAX_TABLE_UTF8_LEN];
2933
2934 ut_ad(!mutex_own(&dict_sys->mutex));
2935
2936 /* Initialize all stats to dummy values before fetching because if
2937 the persistent storage contains incomplete stats (e.g. missing stats
2938 for some index) then we would end up with (partially) uninitialized
2939 stats. */
2940 dict_stats_empty_table(table);
2941
2942 trx = trx_allocate_for_background();
2943
2944 /* Use 'read-uncommitted' so that the SELECTs we execute
2945 do not get blocked in case some user has locked the rows we
2946 are SELECTing */
2947
2948 trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
2949
2950 trx_start_if_not_started(trx);
2951
2952 dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8),
2953 table_utf8, sizeof(table_utf8));
2954
2955 pinfo = pars_info_create();
2956
2957 pars_info_add_str_literal(pinfo, "database_name", db_utf8);
2958
2959 pars_info_add_str_literal(pinfo, "table_name", table_utf8);
2960
2961 pars_info_bind_function(pinfo,
2962 "fetch_table_stats_step",
2963 dict_stats_fetch_table_stats_step,
2964 table);
2965
2966 index_fetch_arg.table = table;
2967 index_fetch_arg.stats_were_modified = false;
2968 pars_info_bind_function(pinfo,
2969 "fetch_index_stats_step",
2970 dict_stats_fetch_index_stats_step,
2971 &index_fetch_arg);
2972
2973 ret = que_eval_sql(pinfo,
2974 "PROCEDURE FETCH_STATS () IS\n"
2975 "found INT;\n"
2976 "DECLARE FUNCTION fetch_table_stats_step;\n"
2977 "DECLARE FUNCTION fetch_index_stats_step;\n"
2978 "DECLARE CURSOR table_stats_cur IS\n"
2979 " SELECT\n"
2980 /* if you change the selected fields, be
2981 sure to adjust
2982 dict_stats_fetch_table_stats_step() */
2983 " n_rows,\n"
2984 " clustered_index_size,\n"
2985 " sum_of_other_index_sizes\n"
2986 " FROM \"" TABLE_STATS_NAME "\"\n"
2987 " WHERE\n"
2988 " database_name = :database_name AND\n"
2989 " table_name = :table_name;\n"
2990 "DECLARE CURSOR index_stats_cur IS\n"
2991 " SELECT\n"
2992 /* if you change the selected fields, be
2993 sure to adjust
2994 dict_stats_fetch_index_stats_step() */
2995 " index_name,\n"
2996 " stat_name,\n"
2997 " stat_value,\n"
2998 " sample_size\n"
2999 " FROM \"" INDEX_STATS_NAME "\"\n"
3000 " WHERE\n"
3001 " database_name = :database_name AND\n"
3002 " table_name = :table_name;\n"
3003
3004 "BEGIN\n"
3005
3006 "OPEN table_stats_cur;\n"
3007 "FETCH table_stats_cur INTO\n"
3008 " fetch_table_stats_step();\n"
3009 "IF (SQL % NOTFOUND) THEN\n"
3010 " CLOSE table_stats_cur;\n"
3011 " RETURN;\n"
3012 "END IF;\n"
3013 "CLOSE table_stats_cur;\n"
3014
3015 "OPEN index_stats_cur;\n"
3016 "found := 1;\n"
3017 "WHILE found = 1 LOOP\n"
3018 " FETCH index_stats_cur INTO\n"
3019 " fetch_index_stats_step();\n"
3020 " IF (SQL % NOTFOUND) THEN\n"
3021 " found := 0;\n"
3022 " END IF;\n"
3023 "END LOOP;\n"
3024 "CLOSE index_stats_cur;\n"
3025
3026 "END;",
3027 TRUE, trx);
3028 /* pinfo is freed by que_eval_sql() */
3029
3030 trx_commit_for_mysql(trx);
3031
3032 trx_free_for_background(trx);
3033
3034 if (!index_fetch_arg.stats_were_modified) {
3035 return(DB_STATS_DO_NOT_EXIST);
3036 }
3037
3038 return(ret);
3039 }
3040
3041 /*********************************************************************//**
3042 Fetches or calculates new estimates for index statistics. */
3043 UNIV_INTERN
3044 void
dict_stats_update_for_index(dict_index_t * index)3045 dict_stats_update_for_index(
3046 /*========================*/
3047 dict_index_t* index) /*!< in/out: index */
3048 {
3049 DBUG_ENTER("dict_stats_update_for_index");
3050
3051 ut_ad(!mutex_own(&dict_sys->mutex));
3052
3053 if (dict_stats_is_persistent_enabled(index->table)) {
3054
3055 if (dict_stats_persistent_storage_check(false)) {
3056 dict_table_stats_lock(index->table, RW_X_LATCH);
3057 dict_stats_analyze_index(index);
3058 index->table->stat_sum_of_other_index_sizes += index->stat_index_size;
3059 dict_table_stats_unlock(index->table, RW_X_LATCH);
3060 dict_stats_save(index->table, &index->id);
3061 DBUG_VOID_RETURN;
3062 }
3063 /* else */
3064
3065 /* Fall back to transient stats since the persistent
3066 storage is not present or is corrupted */
3067 char buf_table[MAX_FULL_NAME_LEN];
3068 char buf_index[MAX_FULL_NAME_LEN];
3069 ut_print_timestamp(stderr);
3070 fprintf(stderr,
3071 " InnoDB: Recalculation of persistent statistics "
3072 "requested for table %s index %s but the required "
3073 "persistent statistics storage is not present or is "
3074 "corrupted. Using transient stats instead.\n",
3075 ut_format_name(index->table->name, TRUE,
3076 buf_table, sizeof(buf_table)),
3077 ut_format_name(index->name, FALSE,
3078 buf_index, sizeof(buf_index)));
3079 }
3080
3081 dict_table_stats_lock(index->table, RW_X_LATCH);
3082 dict_stats_update_transient_for_index(index);
3083 dict_table_stats_unlock(index->table, RW_X_LATCH);
3084
3085 DBUG_VOID_RETURN;
3086 }
3087
3088 /*********************************************************************//**
3089 Calculates new estimates for table and index statistics. The statistics
3090 are used in query optimization.
3091 @return DB_SUCCESS or error code */
3092 UNIV_INTERN
3093 dberr_t
dict_stats_update(dict_table_t * table,dict_stats_upd_option_t stats_upd_option)3094 dict_stats_update(
3095 /*==============*/
3096 dict_table_t* table, /*!< in/out: table */
3097 dict_stats_upd_option_t stats_upd_option)
3098 /*!< in: whether to (re) calc
3099 the stats or to fetch them from
3100 the persistent statistics
3101 storage */
3102 {
3103 char buf[MAX_FULL_NAME_LEN];
3104
3105 ut_ad(!mutex_own(&dict_sys->mutex));
3106
3107 if (table->ibd_file_missing) {
3108 ut_print_timestamp(stderr);
3109 fprintf(stderr,
3110 " InnoDB: cannot calculate statistics for table %s "
3111 "because the .ibd file is missing. For help, please "
3112 "refer to " REFMAN "innodb-troubleshooting.html\n",
3113 ut_format_name(table->name, TRUE, buf, sizeof(buf)));
3114 dict_stats_empty_table(table);
3115 return(DB_TABLESPACE_DELETED);
3116 } else if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
3117 /* If we have set a high innodb_force_recovery level, do
3118 not calculate statistics, as a badly corrupted index can
3119 cause a crash in it. */
3120 dict_stats_empty_table(table);
3121 return(DB_SUCCESS);
3122 }
3123
3124 switch (stats_upd_option) {
3125 case DICT_STATS_RECALC_PERSISTENT:
3126
3127 if (srv_read_only_mode) {
3128 goto transient;
3129 }
3130
3131 /* Persistent recalculation requested, called from
3132 1) ANALYZE TABLE, or
3133 2) the auto recalculation background thread, or
3134 3) open table if stats do not exist on disk and auto recalc
3135 is enabled */
3136
3137 /* InnoDB internal tables (e.g. SYS_TABLES) cannot have
3138 persistent stats enabled */
3139 ut_a(strchr(table->name, '/') != NULL);
3140
3141 /* check if the persistent statistics storage exists
3142 before calling the potentially slow function
3143 dict_stats_update_persistent(); that is a
3144 prerequisite for dict_stats_save() succeeding */
3145 if (dict_stats_persistent_storage_check(false)) {
3146
3147 dberr_t err;
3148
3149 err = dict_stats_update_persistent(table);
3150
3151 if (err != DB_SUCCESS) {
3152 return(err);
3153 }
3154
3155 err = dict_stats_save(table, NULL);
3156
3157 return(err);
3158 }
3159
3160 /* Fall back to transient stats since the persistent
3161 storage is not present or is corrupted */
3162
3163 ut_print_timestamp(stderr);
3164 fprintf(stderr,
3165 " InnoDB: Recalculation of persistent statistics "
3166 "requested for table %s but the required persistent "
3167 "statistics storage is not present or is corrupted. "
3168 "Using transient stats instead.\n",
3169 ut_format_name(table->name, TRUE, buf, sizeof(buf)));
3170
3171 goto transient;
3172
3173 case DICT_STATS_RECALC_TRANSIENT:
3174
3175 goto transient;
3176
3177 case DICT_STATS_EMPTY_TABLE:
3178
3179 dict_stats_empty_table(table);
3180
3181 /* If table is using persistent stats,
3182 then save the stats on disk */
3183
3184 if (dict_stats_is_persistent_enabled(table)) {
3185
3186 if (dict_stats_persistent_storage_check(false)) {
3187
3188 return(dict_stats_save(table, NULL));
3189 }
3190
3191 return(DB_STATS_DO_NOT_EXIST);
3192 }
3193
3194 return(DB_SUCCESS);
3195
3196 case DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY:
3197
3198 /* fetch requested, either fetch from persistent statistics
3199 storage or use the old method */
3200
3201 if (table->stat_initialized) {
3202 return(DB_SUCCESS);
3203 }
3204
3205 /* InnoDB internal tables (e.g. SYS_TABLES) cannot have
3206 persistent stats enabled */
3207 ut_a(strchr(table->name, '/') != NULL);
3208
3209 if (!dict_stats_persistent_storage_check(false)) {
3210 /* persistent statistics storage does not exist
3211 or is corrupted, calculate the transient stats */
3212
3213 ut_print_timestamp(stderr);
3214 fprintf(stderr,
3215 " InnoDB: Error: Fetch of persistent "
3216 "statistics requested for table %s but the "
3217 "required system tables %s and %s are not "
3218 "present or have unexpected structure. "
3219 "Using transient stats instead.\n",
3220 ut_format_name(table->name, TRUE,
3221 buf, sizeof(buf)),
3222 TABLE_STATS_NAME_PRINT,
3223 INDEX_STATS_NAME_PRINT);
3224
3225 goto transient;
3226 }
3227
3228 dict_table_t* t;
3229
3230 /* Create a dummy table object with the same name and
3231 indexes, suitable for fetching the stats into it. */
3232 t = dict_stats_table_clone_create(table);
3233
3234 dberr_t err = dict_stats_fetch_from_ps(t);
3235
3236 t->stats_last_recalc = table->stats_last_recalc;
3237 t->stat_modified_counter = 0;
3238
3239 switch (err) {
3240 case DB_SUCCESS:
3241
3242 dict_table_stats_lock(table, RW_X_LATCH);
3243
3244 dict_stats_copy(table, t);
3245
3246 dict_stats_assert_initialized(table);
3247
3248 dict_table_stats_unlock(table, RW_X_LATCH);
3249
3250 dict_stats_table_clone_free(t);
3251
3252 return(DB_SUCCESS);
3253 case DB_STATS_DO_NOT_EXIST:
3254
3255 dict_stats_table_clone_free(t);
3256
3257 if (srv_read_only_mode) {
3258 goto transient;
3259 }
3260
3261 if (dict_stats_auto_recalc_is_enabled(table)) {
3262 return(dict_stats_update(
3263 table,
3264 DICT_STATS_RECALC_PERSISTENT));
3265 }
3266
3267 ut_format_name(table->name, TRUE, buf, sizeof(buf));
3268 ut_print_timestamp(stderr);
3269 fprintf(stderr,
3270 " InnoDB: Trying to use table %s which has "
3271 "persistent statistics enabled, but auto "
3272 "recalculation turned off and the statistics "
3273 "do not exist in %s and %s. Please either run "
3274 "\"ANALYZE TABLE %s;\" manually or enable the "
3275 "auto recalculation with "
3276 "\"ALTER TABLE %s STATS_AUTO_RECALC=1;\". "
3277 "InnoDB will now use transient statistics for "
3278 "%s.\n",
3279 buf, TABLE_STATS_NAME, INDEX_STATS_NAME, buf,
3280 buf, buf);
3281
3282 goto transient;
3283 default:
3284
3285 dict_stats_table_clone_free(t);
3286
3287 ut_print_timestamp(stderr);
3288 fprintf(stderr,
3289 " InnoDB: Error fetching persistent statistics "
3290 "for table %s from %s and %s: %s. "
3291 "Using transient stats method instead.\n",
3292 ut_format_name(table->name, TRUE, buf,
3293 sizeof(buf)),
3294 TABLE_STATS_NAME,
3295 INDEX_STATS_NAME,
3296 ut_strerr(err));
3297
3298 goto transient;
3299 }
3300 /* no "default:" in order to produce a compilation warning
3301 about unhandled enumeration value */
3302 }
3303
3304 transient:
3305
3306 dict_table_stats_lock(table, RW_X_LATCH);
3307
3308 dict_stats_update_transient(table);
3309
3310 dict_table_stats_unlock(table, RW_X_LATCH);
3311
3312 return(DB_SUCCESS);
3313 }
3314
3315 /*********************************************************************//**
3316 Removes the information for a particular index's stats from the persistent
3317 storage if it exists and if there is data stored for this index.
3318 This function creates its own trx and commits it.
3319 A note from Marko why we cannot edit user and sys_* tables in one trx:
3320 marko: The problem is that ibuf merges should be disabled while we are
3321 rolling back dict transactions.
3322 marko: If ibuf merges are not disabled, we need to scan the *.ibd files.
3323 But we shouldn't open *.ibd files before we have rolled back dict
3324 transactions and opened the SYS_* records for the *.ibd files.
3325 @return DB_SUCCESS or error code */
3326 UNIV_INTERN
3327 dberr_t
dict_stats_drop_index(const char * db_and_table,const char * iname,char * errstr,ulint errstr_sz)3328 dict_stats_drop_index(
3329 /*==================*/
3330 const char* db_and_table,/*!< in: db and table, e.g. 'db/table' */
3331 const char* iname, /*!< in: index name */
3332 char* errstr, /*!< out: error message if != DB_SUCCESS
3333 is returned */
3334 ulint errstr_sz)/*!< in: size of the errstr buffer */
3335 {
3336 char db_utf8[MAX_DB_UTF8_LEN];
3337 char table_utf8[MAX_TABLE_UTF8_LEN];
3338 pars_info_t* pinfo;
3339 dberr_t ret;
3340
3341 ut_ad(!mutex_own(&dict_sys->mutex));
3342
3343 /* skip indexes whose table names do not contain a database name
3344 e.g. if we are dropping an index from SYS_TABLES */
3345 if (strchr(db_and_table, '/') == NULL) {
3346
3347 return(DB_SUCCESS);
3348 }
3349
3350 dict_fs2utf8(db_and_table, db_utf8, sizeof(db_utf8),
3351 table_utf8, sizeof(table_utf8));
3352
3353 pinfo = pars_info_create();
3354
3355 pars_info_add_str_literal(pinfo, "database_name", db_utf8);
3356
3357 pars_info_add_str_literal(pinfo, "table_name", table_utf8);
3358
3359 pars_info_add_str_literal(pinfo, "index_name", iname);
3360
3361 rw_lock_x_lock(&dict_operation_lock);
3362 mutex_enter(&dict_sys->mutex);
3363
3364 ret = dict_stats_exec_sql(
3365 pinfo,
3366 "PROCEDURE DROP_INDEX_STATS () IS\n"
3367 "BEGIN\n"
3368 "DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
3369 "database_name = :database_name AND\n"
3370 "table_name = :table_name AND\n"
3371 "index_name = :index_name;\n"
3372 "END;\n", NULL);
3373
3374 mutex_exit(&dict_sys->mutex);
3375 rw_lock_x_unlock(&dict_operation_lock);
3376
3377 if (ret == DB_STATS_DO_NOT_EXIST) {
3378 ret = DB_SUCCESS;
3379 }
3380
3381 if (ret != DB_SUCCESS) {
3382 ut_snprintf(errstr, errstr_sz,
3383 "Unable to delete statistics for index %s "
3384 "from %s%s: %s. They can be deleted later using "
3385 "DELETE FROM %s WHERE "
3386 "database_name = '%s' AND "
3387 "table_name = '%s' AND "
3388 "index_name = '%s';",
3389 iname,
3390 INDEX_STATS_NAME_PRINT,
3391 (ret == DB_LOCK_WAIT_TIMEOUT
3392 ? " because the rows are locked"
3393 : ""),
3394 ut_strerr(ret),
3395 INDEX_STATS_NAME_PRINT,
3396 db_utf8,
3397 table_utf8,
3398 iname);
3399
3400 ut_print_timestamp(stderr);
3401 fprintf(stderr, " InnoDB: %s\n", errstr);
3402 }
3403
3404 return(ret);
3405 }
3406
3407 /*********************************************************************//**
3408 Executes
3409 DELETE FROM mysql.innodb_table_stats
3410 WHERE database_name = '...' AND table_name = '...';
3411 Creates its own transaction and commits it.
3412 @return DB_SUCCESS or error code */
3413 UNIV_INLINE
3414 dberr_t
dict_stats_delete_from_table_stats(const char * database_name,const char * table_name)3415 dict_stats_delete_from_table_stats(
3416 /*===============================*/
3417 const char* database_name, /*!< in: database name, e.g. 'db' */
3418 const char* table_name) /*!< in: table name, e.g. 'table' */
3419 {
3420 pars_info_t* pinfo;
3421 dberr_t ret;
3422
3423 #ifdef UNIV_SYNC_DEBUG
3424 ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
3425 #endif /* UNIV_SYNC_DEBUG */
3426 ut_ad(mutex_own(&dict_sys->mutex));
3427
3428 pinfo = pars_info_create();
3429
3430 pars_info_add_str_literal(pinfo, "database_name", database_name);
3431 pars_info_add_str_literal(pinfo, "table_name", table_name);
3432
3433 ret = dict_stats_exec_sql(
3434 pinfo,
3435 "PROCEDURE DELETE_FROM_TABLE_STATS () IS\n"
3436 "BEGIN\n"
3437 "DELETE FROM \"" TABLE_STATS_NAME "\" WHERE\n"
3438 "database_name = :database_name AND\n"
3439 "table_name = :table_name;\n"
3440 "END;\n", NULL);
3441
3442 return(ret);
3443 }
3444
3445 /*********************************************************************//**
3446 Executes
3447 DELETE FROM mysql.innodb_index_stats
3448 WHERE database_name = '...' AND table_name = '...';
3449 Creates its own transaction and commits it.
3450 @return DB_SUCCESS or error code */
3451 UNIV_INLINE
3452 dberr_t
dict_stats_delete_from_index_stats(const char * database_name,const char * table_name)3453 dict_stats_delete_from_index_stats(
3454 /*===============================*/
3455 const char* database_name, /*!< in: database name, e.g. 'db' */
3456 const char* table_name) /*!< in: table name, e.g. 'table' */
3457 {
3458 pars_info_t* pinfo;
3459 dberr_t ret;
3460
3461 #ifdef UNIV_SYNC_DEBUG
3462 ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
3463 #endif /* UNIV_SYNC_DEBUG */
3464 ut_ad(mutex_own(&dict_sys->mutex));
3465
3466 pinfo = pars_info_create();
3467
3468 pars_info_add_str_literal(pinfo, "database_name", database_name);
3469 pars_info_add_str_literal(pinfo, "table_name", table_name);
3470
3471 ret = dict_stats_exec_sql(
3472 pinfo,
3473 "PROCEDURE DELETE_FROM_INDEX_STATS () IS\n"
3474 "BEGIN\n"
3475 "DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
3476 "database_name = :database_name AND\n"
3477 "table_name = :table_name;\n"
3478 "END;\n", NULL);
3479
3480 return(ret);
3481 }
3482
3483 /*********************************************************************//**
3484 Removes the statistics for a table and all of its indexes from the
3485 persistent statistics storage if it exists and if there is data stored for
3486 the table. This function creates its own transaction and commits it.
3487 @return DB_SUCCESS or error code */
3488 UNIV_INTERN
3489 dberr_t
dict_stats_drop_table(const char * db_and_table,char * errstr,ulint errstr_sz)3490 dict_stats_drop_table(
3491 /*==================*/
3492 const char* db_and_table, /*!< in: db and table, e.g. 'db/table' */
3493 char* errstr, /*!< out: error message
3494 if != DB_SUCCESS is returned */
3495 ulint errstr_sz) /*!< in: size of errstr buffer */
3496 {
3497 char db_utf8[MAX_DB_UTF8_LEN];
3498 char table_utf8[MAX_TABLE_UTF8_LEN];
3499 dberr_t ret;
3500
3501 #ifdef UNIV_SYNC_DEBUG
3502 ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
3503 #endif /* UNIV_SYNC_DEBUG */
3504 ut_ad(mutex_own(&dict_sys->mutex));
3505
3506 /* skip tables that do not contain a database name
3507 e.g. if we are dropping SYS_TABLES */
3508 if (strchr(db_and_table, '/') == NULL) {
3509
3510 return(DB_SUCCESS);
3511 }
3512
3513 /* skip innodb_table_stats and innodb_index_stats themselves */
3514 if (strcmp(db_and_table, TABLE_STATS_NAME) == 0
3515 || strcmp(db_and_table, INDEX_STATS_NAME) == 0) {
3516
3517 return(DB_SUCCESS);
3518 }
3519
3520 dict_fs2utf8(db_and_table, db_utf8, sizeof(db_utf8),
3521 table_utf8, sizeof(table_utf8));
3522
3523 ret = dict_stats_delete_from_table_stats(db_utf8, table_utf8);
3524
3525 if (ret == DB_SUCCESS) {
3526 ret = dict_stats_delete_from_index_stats(db_utf8, table_utf8);
3527 }
3528
3529 if (ret == DB_STATS_DO_NOT_EXIST) {
3530 ret = DB_SUCCESS;
3531 }
3532
3533 if (ret != DB_SUCCESS) {
3534
3535 ut_snprintf(errstr, errstr_sz,
3536 "Unable to delete statistics for table %s.%s: %s. "
3537 "They can be deleted later using "
3538
3539 "DELETE FROM %s WHERE "
3540 "database_name = '%s' AND "
3541 "table_name = '%s'; "
3542
3543 "DELETE FROM %s WHERE "
3544 "database_name = '%s' AND "
3545 "table_name = '%s';",
3546
3547 db_utf8, table_utf8,
3548 ut_strerr(ret),
3549
3550 INDEX_STATS_NAME_PRINT,
3551 db_utf8, table_utf8,
3552
3553 TABLE_STATS_NAME_PRINT,
3554 db_utf8, table_utf8);
3555 }
3556
3557 return(ret);
3558 }
3559
3560 /*********************************************************************//**
3561 Executes
3562 UPDATE mysql.innodb_table_stats SET
3563 database_name = '...', table_name = '...'
3564 WHERE database_name = '...' AND table_name = '...';
3565 Creates its own transaction and commits it.
3566 @return DB_SUCCESS or error code */
3567 UNIV_INLINE
3568 dberr_t
dict_stats_rename_in_table_stats(const char * old_dbname_utf8,const char * old_tablename_utf8,const char * new_dbname_utf8,const char * new_tablename_utf8)3569 dict_stats_rename_in_table_stats(
3570 /*=============================*/
3571 const char* old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */
3572 const char* old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */
3573 const char* new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */
3574 const char* new_tablename_utf8)/*!< in: table name, e.g. 'newtable' */
3575 {
3576 pars_info_t* pinfo;
3577 dberr_t ret;
3578
3579 #ifdef UNIV_SYNC_DEBUG
3580 ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
3581 #endif /* UNIV_SYNC_DEBUG */
3582 ut_ad(mutex_own(&dict_sys->mutex));
3583
3584 pinfo = pars_info_create();
3585
3586 pars_info_add_str_literal(pinfo, "old_dbname_utf8", old_dbname_utf8);
3587 pars_info_add_str_literal(pinfo, "old_tablename_utf8", old_tablename_utf8);
3588 pars_info_add_str_literal(pinfo, "new_dbname_utf8", new_dbname_utf8);
3589 pars_info_add_str_literal(pinfo, "new_tablename_utf8", new_tablename_utf8);
3590
3591 ret = dict_stats_exec_sql(
3592 pinfo,
3593 "PROCEDURE RENAME_IN_TABLE_STATS () IS\n"
3594 "BEGIN\n"
3595 "UPDATE \"" TABLE_STATS_NAME "\" SET\n"
3596 "database_name = :new_dbname_utf8,\n"
3597 "table_name = :new_tablename_utf8\n"
3598 "WHERE\n"
3599 "database_name = :old_dbname_utf8 AND\n"
3600 "table_name = :old_tablename_utf8;\n"
3601 "END;\n", NULL);
3602
3603 return(ret);
3604 }
3605
3606 /*********************************************************************//**
3607 Executes
3608 UPDATE mysql.innodb_index_stats SET
3609 database_name = '...', table_name = '...'
3610 WHERE database_name = '...' AND table_name = '...';
3611 Creates its own transaction and commits it.
3612 @return DB_SUCCESS or error code */
3613 UNIV_INLINE
3614 dberr_t
dict_stats_rename_in_index_stats(const char * old_dbname_utf8,const char * old_tablename_utf8,const char * new_dbname_utf8,const char * new_tablename_utf8)3615 dict_stats_rename_in_index_stats(
3616 /*=============================*/
3617 const char* old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */
3618 const char* old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */
3619 const char* new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */
3620 const char* new_tablename_utf8)/*!< in: table name, e.g. 'newtable' */
3621 {
3622 pars_info_t* pinfo;
3623 dberr_t ret;
3624
3625 #ifdef UNIV_SYNC_DEBUG
3626 ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
3627 #endif /* UNIV_SYNC_DEBUG */
3628 ut_ad(mutex_own(&dict_sys->mutex));
3629
3630 pinfo = pars_info_create();
3631
3632 pars_info_add_str_literal(pinfo, "old_dbname_utf8", old_dbname_utf8);
3633 pars_info_add_str_literal(pinfo, "old_tablename_utf8", old_tablename_utf8);
3634 pars_info_add_str_literal(pinfo, "new_dbname_utf8", new_dbname_utf8);
3635 pars_info_add_str_literal(pinfo, "new_tablename_utf8", new_tablename_utf8);
3636
3637 ret = dict_stats_exec_sql(
3638 pinfo,
3639 "PROCEDURE RENAME_IN_INDEX_STATS () IS\n"
3640 "BEGIN\n"
3641 "UPDATE \"" INDEX_STATS_NAME "\" SET\n"
3642 "database_name = :new_dbname_utf8,\n"
3643 "table_name = :new_tablename_utf8\n"
3644 "WHERE\n"
3645 "database_name = :old_dbname_utf8 AND\n"
3646 "table_name = :old_tablename_utf8;\n"
3647 "END;\n", NULL);
3648
3649 return(ret);
3650 }
3651
3652 /*********************************************************************//**
3653 Renames a table in InnoDB persistent stats storage.
3654 This function creates its own transaction and commits it.
3655 @return DB_SUCCESS or error code */
3656 UNIV_INTERN
3657 dberr_t
dict_stats_rename_table(const char * old_name,const char * new_name,char * errstr,size_t errstr_sz)3658 dict_stats_rename_table(
3659 /*====================*/
3660 const char* old_name, /*!< in: old name, e.g. 'db/table' */
3661 const char* new_name, /*!< in: new name, e.g. 'db/table' */
3662 char* errstr, /*!< out: error string if != DB_SUCCESS
3663 is returned */
3664 size_t errstr_sz) /*!< in: errstr size */
3665 {
3666 char old_db_utf8[MAX_DB_UTF8_LEN];
3667 char new_db_utf8[MAX_DB_UTF8_LEN];
3668 char old_table_utf8[MAX_TABLE_UTF8_LEN];
3669 char new_table_utf8[MAX_TABLE_UTF8_LEN];
3670 dberr_t ret;
3671
3672 #ifdef UNIV_SYNC_DEBUG
3673 ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
3674 #endif /* UNIV_SYNC_DEBUG */
3675 ut_ad(!mutex_own(&dict_sys->mutex));
3676
3677 /* skip innodb_table_stats and innodb_index_stats themselves */
3678 if (strcmp(old_name, TABLE_STATS_NAME) == 0
3679 || strcmp(old_name, INDEX_STATS_NAME) == 0
3680 || strcmp(new_name, TABLE_STATS_NAME) == 0
3681 || strcmp(new_name, INDEX_STATS_NAME) == 0) {
3682
3683 return(DB_SUCCESS);
3684 }
3685
3686 dict_fs2utf8(old_name, old_db_utf8, sizeof(old_db_utf8),
3687 old_table_utf8, sizeof(old_table_utf8));
3688
3689 dict_fs2utf8(new_name, new_db_utf8, sizeof(new_db_utf8),
3690 new_table_utf8, sizeof(new_table_utf8));
3691
3692 rw_lock_x_lock(&dict_operation_lock);
3693 mutex_enter(&dict_sys->mutex);
3694
3695 ulint n_attempts = 0;
3696 do {
3697 n_attempts++;
3698
3699 ret = dict_stats_rename_in_table_stats(
3700 old_db_utf8, old_table_utf8,
3701 new_db_utf8, new_table_utf8);
3702
3703 if (ret == DB_DUPLICATE_KEY) {
3704 dict_stats_delete_from_table_stats(
3705 new_db_utf8, new_table_utf8);
3706 }
3707
3708 if (ret == DB_STATS_DO_NOT_EXIST) {
3709 ret = DB_SUCCESS;
3710 }
3711
3712 if (ret != DB_SUCCESS) {
3713 mutex_exit(&dict_sys->mutex);
3714 rw_lock_x_unlock(&dict_operation_lock);
3715 os_thread_sleep(200000 /* 0.2 sec */);
3716 rw_lock_x_lock(&dict_operation_lock);
3717 mutex_enter(&dict_sys->mutex);
3718 }
3719 } while ((ret == DB_DEADLOCK
3720 || ret == DB_DUPLICATE_KEY
3721 || ret == DB_LOCK_WAIT_TIMEOUT)
3722 && n_attempts < 5);
3723
3724 if (ret != DB_SUCCESS) {
3725 ut_snprintf(errstr, errstr_sz,
3726 "Unable to rename statistics from "
3727 "%s.%s to %s.%s in %s: %s. "
3728 "They can be renamed later using "
3729
3730 "UPDATE %s SET "
3731 "database_name = '%s', "
3732 "table_name = '%s' "
3733 "WHERE "
3734 "database_name = '%s' AND "
3735 "table_name = '%s';",
3736
3737 old_db_utf8, old_table_utf8,
3738 new_db_utf8, new_table_utf8,
3739 TABLE_STATS_NAME_PRINT,
3740 ut_strerr(ret),
3741
3742 TABLE_STATS_NAME_PRINT,
3743 new_db_utf8, new_table_utf8,
3744 old_db_utf8, old_table_utf8);
3745 mutex_exit(&dict_sys->mutex);
3746 rw_lock_x_unlock(&dict_operation_lock);
3747 return(ret);
3748 }
3749 /* else */
3750
3751 n_attempts = 0;
3752 do {
3753 n_attempts++;
3754
3755 ret = dict_stats_rename_in_index_stats(
3756 old_db_utf8, old_table_utf8,
3757 new_db_utf8, new_table_utf8);
3758
3759 if (ret == DB_DUPLICATE_KEY) {
3760 dict_stats_delete_from_index_stats(
3761 new_db_utf8, new_table_utf8);
3762 }
3763
3764 if (ret == DB_STATS_DO_NOT_EXIST) {
3765 ret = DB_SUCCESS;
3766 }
3767
3768 if (ret != DB_SUCCESS) {
3769 mutex_exit(&dict_sys->mutex);
3770 rw_lock_x_unlock(&dict_operation_lock);
3771 os_thread_sleep(200000 /* 0.2 sec */);
3772 rw_lock_x_lock(&dict_operation_lock);
3773 mutex_enter(&dict_sys->mutex);
3774 }
3775 } while ((ret == DB_DEADLOCK
3776 || ret == DB_DUPLICATE_KEY
3777 || ret == DB_LOCK_WAIT_TIMEOUT)
3778 && n_attempts < 5);
3779
3780 mutex_exit(&dict_sys->mutex);
3781 rw_lock_x_unlock(&dict_operation_lock);
3782
3783 if (ret != DB_SUCCESS) {
3784 ut_snprintf(errstr, errstr_sz,
3785 "Unable to rename statistics from "
3786 "%s.%s to %s.%s in %s: %s. "
3787 "They can be renamed later using "
3788
3789 "UPDATE %s SET "
3790 "database_name = '%s', "
3791 "table_name = '%s' "
3792 "WHERE "
3793 "database_name = '%s' AND "
3794 "table_name = '%s';",
3795
3796 old_db_utf8, old_table_utf8,
3797 new_db_utf8, new_table_utf8,
3798 INDEX_STATS_NAME_PRINT,
3799 ut_strerr(ret),
3800
3801 INDEX_STATS_NAME_PRINT,
3802 new_db_utf8, new_table_utf8,
3803 old_db_utf8, old_table_utf8);
3804 }
3805
3806 return(ret);
3807 }
3808
3809 /* tests @{ */
3810 #ifdef UNIV_COMPILE_TEST_FUNCS
3811
3812 /* The following unit tests test some of the functions in this file
3813 individually, such testing cannot be performed by the mysql-test framework
3814 via SQL. */
3815
3816 /* test_dict_table_schema_check() @{ */
3817 void
test_dict_table_schema_check()3818 test_dict_table_schema_check()
3819 {
3820 /*
3821 CREATE TABLE tcheck (
3822 c01 VARCHAR(123),
3823 c02 INT,
3824 c03 INT NOT NULL,
3825 c04 INT UNSIGNED,
3826 c05 BIGINT,
3827 c06 BIGINT UNSIGNED NOT NULL,
3828 c07 TIMESTAMP
3829 ) ENGINE=INNODB;
3830 */
3831 /* definition for the table 'test/tcheck' */
3832 dict_col_meta_t columns[] = {
3833 {"c01", DATA_VARCHAR, 0, 123},
3834 {"c02", DATA_INT, 0, 4},
3835 {"c03", DATA_INT, DATA_NOT_NULL, 4},
3836 {"c04", DATA_INT, DATA_UNSIGNED, 4},
3837 {"c05", DATA_INT, 0, 8},
3838 {"c06", DATA_INT, DATA_NOT_NULL | DATA_UNSIGNED, 8},
3839 {"c07", DATA_INT, 0, 4},
3840 {"c_extra", DATA_INT, 0, 4}
3841 };
3842 dict_table_schema_t schema = {
3843 "test/tcheck",
3844 0 /* will be set individually for each test below */,
3845 columns
3846 };
3847 char errstr[512];
3848
3849 ut_snprintf(errstr, sizeof(errstr), "Table not found");
3850
3851 /* prevent any data dictionary modifications while we are checking
3852 the tables' structure */
3853
3854 mutex_enter(&(dict_sys->mutex));
3855
3856 /* check that a valid table is reported as valid */
3857 schema.n_cols = 7;
3858 if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3859 == DB_SUCCESS) {
3860 printf("OK: test.tcheck ok\n");
3861 } else {
3862 printf("ERROR: %s\n", errstr);
3863 printf("ERROR: test.tcheck not present or corrupted\n");
3864 goto test_dict_table_schema_check_end;
3865 }
3866
3867 /* check columns with wrong length */
3868 schema.columns[1].len = 8;
3869 if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3870 != DB_SUCCESS) {
3871 printf("OK: test.tcheck.c02 has different length and is "
3872 "reported as corrupted\n");
3873 } else {
3874 printf("OK: test.tcheck.c02 has different length but is "
3875 "reported as ok\n");
3876 goto test_dict_table_schema_check_end;
3877 }
3878 schema.columns[1].len = 4;
3879
3880 /* request that c02 is NOT NULL while actually it does not have
3881 this flag set */
3882 schema.columns[1].prtype_mask |= DATA_NOT_NULL;
3883 if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3884 != DB_SUCCESS) {
3885 printf("OK: test.tcheck.c02 does not have NOT NULL while "
3886 "it should and is reported as corrupted\n");
3887 } else {
3888 printf("ERROR: test.tcheck.c02 does not have NOT NULL while "
3889 "it should and is not reported as corrupted\n");
3890 goto test_dict_table_schema_check_end;
3891 }
3892 schema.columns[1].prtype_mask &= ~DATA_NOT_NULL;
3893
3894 /* check a table that contains some extra columns */
3895 schema.n_cols = 6;
3896 if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3897 == DB_SUCCESS) {
3898 printf("ERROR: test.tcheck has more columns but is not "
3899 "reported as corrupted\n");
3900 goto test_dict_table_schema_check_end;
3901 } else {
3902 printf("OK: test.tcheck has more columns and is "
3903 "reported as corrupted\n");
3904 }
3905
3906 /* check a table that has some columns missing */
3907 schema.n_cols = 8;
3908 if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3909 != DB_SUCCESS) {
3910 printf("OK: test.tcheck has missing columns and is "
3911 "reported as corrupted\n");
3912 } else {
3913 printf("ERROR: test.tcheck has missing columns but is "
3914 "reported as ok\n");
3915 goto test_dict_table_schema_check_end;
3916 }
3917
3918 /* check non-existent table */
3919 schema.table_name = "test/tcheck_nonexistent";
3920 if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3921 != DB_SUCCESS) {
3922 printf("OK: test.tcheck_nonexistent is not present\n");
3923 } else {
3924 printf("ERROR: test.tcheck_nonexistent is present!?\n");
3925 goto test_dict_table_schema_check_end;
3926 }
3927
3928 test_dict_table_schema_check_end:
3929
3930 mutex_exit(&(dict_sys->mutex));
3931 }
3932 /* @} */
3933
3934 /* save/fetch aux macros @{ */
3935 #define TEST_DATABASE_NAME "foobardb"
3936 #define TEST_TABLE_NAME "test_dict_stats"
3937
3938 #define TEST_N_ROWS 111
3939 #define TEST_CLUSTERED_INDEX_SIZE 222
3940 #define TEST_SUM_OF_OTHER_INDEX_SIZES 333
3941
3942 #define TEST_IDX1_NAME "tidx1"
3943 #define TEST_IDX1_COL1_NAME "tidx1_col1"
3944 #define TEST_IDX1_INDEX_SIZE 123
3945 #define TEST_IDX1_N_LEAF_PAGES 234
3946 #define TEST_IDX1_N_DIFF1 50
3947 #define TEST_IDX1_N_DIFF1_SAMPLE_SIZE 500
3948
3949 #define TEST_IDX2_NAME "tidx2"
3950 #define TEST_IDX2_COL1_NAME "tidx2_col1"
3951 #define TEST_IDX2_COL2_NAME "tidx2_col2"
3952 #define TEST_IDX2_COL3_NAME "tidx2_col3"
3953 #define TEST_IDX2_COL4_NAME "tidx2_col4"
3954 #define TEST_IDX2_INDEX_SIZE 321
3955 #define TEST_IDX2_N_LEAF_PAGES 432
3956 #define TEST_IDX2_N_DIFF1 60
3957 #define TEST_IDX2_N_DIFF1_SAMPLE_SIZE 600
3958 #define TEST_IDX2_N_DIFF2 61
3959 #define TEST_IDX2_N_DIFF2_SAMPLE_SIZE 610
3960 #define TEST_IDX2_N_DIFF3 62
3961 #define TEST_IDX2_N_DIFF3_SAMPLE_SIZE 620
3962 #define TEST_IDX2_N_DIFF4 63
3963 #define TEST_IDX2_N_DIFF4_SAMPLE_SIZE 630
3964 /* @} */
3965
3966 /* test_dict_stats_save() @{ */
3967 void
test_dict_stats_save()3968 test_dict_stats_save()
3969 {
3970 dict_table_t table;
3971 dict_index_t index1;
3972 dict_field_t index1_fields[1];
3973 ib_uint64_t index1_stat_n_diff_key_vals[1];
3974 ib_uint64_t index1_stat_n_sample_sizes[1];
3975 dict_index_t index2;
3976 dict_field_t index2_fields[4];
3977 ib_uint64_t index2_stat_n_diff_key_vals[4];
3978 ib_uint64_t index2_stat_n_sample_sizes[4];
3979 dberr_t ret;
3980
3981 /* craft a dummy dict_table_t */
3982 table.name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
3983 table.stat_n_rows = TEST_N_ROWS;
3984 table.stat_clustered_index_size = TEST_CLUSTERED_INDEX_SIZE;
3985 table.stat_sum_of_other_index_sizes = TEST_SUM_OF_OTHER_INDEX_SIZES;
3986 UT_LIST_INIT(table.indexes);
3987 UT_LIST_ADD_LAST(indexes, table.indexes, &index1);
3988 UT_LIST_ADD_LAST(indexes, table.indexes, &index2);
3989 ut_d(table.magic_n = DICT_TABLE_MAGIC_N);
3990 ut_d(index1.magic_n = DICT_INDEX_MAGIC_N);
3991
3992 index1.name = TEST_IDX1_NAME;
3993 index1.table = &table;
3994 index1.cached = 1;
3995 index1.n_uniq = 1;
3996 index1.fields = index1_fields;
3997 index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals;
3998 index1.stat_n_sample_sizes = index1_stat_n_sample_sizes;
3999 index1.stat_index_size = TEST_IDX1_INDEX_SIZE;
4000 index1.stat_n_leaf_pages = TEST_IDX1_N_LEAF_PAGES;
4001 index1_fields[0].name = TEST_IDX1_COL1_NAME;
4002 index1_stat_n_diff_key_vals[0] = TEST_IDX1_N_DIFF1;
4003 index1_stat_n_sample_sizes[0] = TEST_IDX1_N_DIFF1_SAMPLE_SIZE;
4004
4005 ut_d(index2.magic_n = DICT_INDEX_MAGIC_N);
4006 index2.name = TEST_IDX2_NAME;
4007 index2.table = &table;
4008 index2.cached = 1;
4009 index2.n_uniq = 4;
4010 index2.fields = index2_fields;
4011 index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals;
4012 index2.stat_n_sample_sizes = index2_stat_n_sample_sizes;
4013 index2.stat_index_size = TEST_IDX2_INDEX_SIZE;
4014 index2.stat_n_leaf_pages = TEST_IDX2_N_LEAF_PAGES;
4015 index2_fields[0].name = TEST_IDX2_COL1_NAME;
4016 index2_fields[1].name = TEST_IDX2_COL2_NAME;
4017 index2_fields[2].name = TEST_IDX2_COL3_NAME;
4018 index2_fields[3].name = TEST_IDX2_COL4_NAME;
4019 index2_stat_n_diff_key_vals[0] = TEST_IDX2_N_DIFF1;
4020 index2_stat_n_diff_key_vals[1] = TEST_IDX2_N_DIFF2;
4021 index2_stat_n_diff_key_vals[2] = TEST_IDX2_N_DIFF3;
4022 index2_stat_n_diff_key_vals[3] = TEST_IDX2_N_DIFF4;
4023 index2_stat_n_sample_sizes[0] = TEST_IDX2_N_DIFF1_SAMPLE_SIZE;
4024 index2_stat_n_sample_sizes[1] = TEST_IDX2_N_DIFF2_SAMPLE_SIZE;
4025 index2_stat_n_sample_sizes[2] = TEST_IDX2_N_DIFF3_SAMPLE_SIZE;
4026 index2_stat_n_sample_sizes[3] = TEST_IDX2_N_DIFF4_SAMPLE_SIZE;
4027
4028 ret = dict_stats_save(&table, NULL);
4029
4030 ut_a(ret == DB_SUCCESS);
4031
4032 printf("\nOK: stats saved successfully, now go ahead and read "
4033 "what's inside %s and %s:\n\n",
4034 TABLE_STATS_NAME_PRINT,
4035 INDEX_STATS_NAME_PRINT);
4036
4037 printf("SELECT COUNT(*) = 1 AS table_stats_saved_successfully\n"
4038 "FROM %s\n"
4039 "WHERE\n"
4040 "database_name = '%s' AND\n"
4041 "table_name = '%s' AND\n"
4042 "n_rows = %d AND\n"
4043 "clustered_index_size = %d AND\n"
4044 "sum_of_other_index_sizes = %d;\n"
4045 "\n",
4046 TABLE_STATS_NAME_PRINT,
4047 TEST_DATABASE_NAME,
4048 TEST_TABLE_NAME,
4049 TEST_N_ROWS,
4050 TEST_CLUSTERED_INDEX_SIZE,
4051 TEST_SUM_OF_OTHER_INDEX_SIZES);
4052
4053 printf("SELECT COUNT(*) = 3 AS tidx1_stats_saved_successfully\n"
4054 "FROM %s\n"
4055 "WHERE\n"
4056 "database_name = '%s' AND\n"
4057 "table_name = '%s' AND\n"
4058 "index_name = '%s' AND\n"
4059 "(\n"
4060 " (stat_name = 'size' AND stat_value = %d AND"
4061 " sample_size IS NULL) OR\n"
4062 " (stat_name = 'n_leaf_pages' AND stat_value = %d AND"
4063 " sample_size IS NULL) OR\n"
4064 " (stat_name = 'n_diff_pfx01' AND stat_value = %d AND"
4065 " sample_size = '%d' AND stat_description = '%s')\n"
4066 ");\n"
4067 "\n",
4068 INDEX_STATS_NAME_PRINT,
4069 TEST_DATABASE_NAME,
4070 TEST_TABLE_NAME,
4071 TEST_IDX1_NAME,
4072 TEST_IDX1_INDEX_SIZE,
4073 TEST_IDX1_N_LEAF_PAGES,
4074 TEST_IDX1_N_DIFF1,
4075 TEST_IDX1_N_DIFF1_SAMPLE_SIZE,
4076 TEST_IDX1_COL1_NAME);
4077
4078 printf("SELECT COUNT(*) = 6 AS tidx2_stats_saved_successfully\n"
4079 "FROM %s\n"
4080 "WHERE\n"
4081 "database_name = '%s' AND\n"
4082 "table_name = '%s' AND\n"
4083 "index_name = '%s' AND\n"
4084 "(\n"
4085 " (stat_name = 'size' AND stat_value = %d AND"
4086 " sample_size IS NULL) OR\n"
4087 " (stat_name = 'n_leaf_pages' AND stat_value = %d AND"
4088 " sample_size IS NULL) OR\n"
4089 " (stat_name = 'n_diff_pfx01' AND stat_value = %d AND"
4090 " sample_size = '%d' AND stat_description = '%s') OR\n"
4091 " (stat_name = 'n_diff_pfx02' AND stat_value = %d AND"
4092 " sample_size = '%d' AND stat_description = '%s,%s') OR\n"
4093 " (stat_name = 'n_diff_pfx03' AND stat_value = %d AND"
4094 " sample_size = '%d' AND stat_description = '%s,%s,%s') OR\n"
4095 " (stat_name = 'n_diff_pfx04' AND stat_value = %d AND"
4096 " sample_size = '%d' AND stat_description = '%s,%s,%s,%s')\n"
4097 ");\n"
4098 "\n",
4099 INDEX_STATS_NAME_PRINT,
4100 TEST_DATABASE_NAME,
4101 TEST_TABLE_NAME,
4102 TEST_IDX2_NAME,
4103 TEST_IDX2_INDEX_SIZE,
4104 TEST_IDX2_N_LEAF_PAGES,
4105 TEST_IDX2_N_DIFF1,
4106 TEST_IDX2_N_DIFF1_SAMPLE_SIZE, TEST_IDX2_COL1_NAME,
4107 TEST_IDX2_N_DIFF2,
4108 TEST_IDX2_N_DIFF2_SAMPLE_SIZE,
4109 TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME,
4110 TEST_IDX2_N_DIFF3,
4111 TEST_IDX2_N_DIFF3_SAMPLE_SIZE,
4112 TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, TEST_IDX2_COL3_NAME,
4113 TEST_IDX2_N_DIFF4,
4114 TEST_IDX2_N_DIFF4_SAMPLE_SIZE,
4115 TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, TEST_IDX2_COL3_NAME,
4116 TEST_IDX2_COL4_NAME);
4117 }
4118 /* @} */
4119
4120 /* test_dict_stats_fetch_from_ps() @{ */
4121 void
test_dict_stats_fetch_from_ps()4122 test_dict_stats_fetch_from_ps()
4123 {
4124 dict_table_t table;
4125 dict_index_t index1;
4126 ib_uint64_t index1_stat_n_diff_key_vals[1];
4127 ib_uint64_t index1_stat_n_sample_sizes[1];
4128 dict_index_t index2;
4129 ib_uint64_t index2_stat_n_diff_key_vals[4];
4130 ib_uint64_t index2_stat_n_sample_sizes[4];
4131 dberr_t ret;
4132
4133 /* craft a dummy dict_table_t */
4134 table.name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
4135 UT_LIST_INIT(table.indexes);
4136 UT_LIST_ADD_LAST(indexes, table.indexes, &index1);
4137 UT_LIST_ADD_LAST(indexes, table.indexes, &index2);
4138 ut_d(table.magic_n = DICT_TABLE_MAGIC_N);
4139
4140 index1.name = TEST_IDX1_NAME;
4141 ut_d(index1.magic_n = DICT_INDEX_MAGIC_N);
4142 index1.cached = 1;
4143 index1.n_uniq = 1;
4144 index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals;
4145 index1.stat_n_sample_sizes = index1_stat_n_sample_sizes;
4146
4147 index2.name = TEST_IDX2_NAME;
4148 ut_d(index2.magic_n = DICT_INDEX_MAGIC_N);
4149 index2.cached = 1;
4150 index2.n_uniq = 4;
4151 index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals;
4152 index2.stat_n_sample_sizes = index2_stat_n_sample_sizes;
4153
4154 ret = dict_stats_fetch_from_ps(&table);
4155
4156 ut_a(ret == DB_SUCCESS);
4157
4158 ut_a(table.stat_n_rows == TEST_N_ROWS);
4159 ut_a(table.stat_clustered_index_size == TEST_CLUSTERED_INDEX_SIZE);
4160 ut_a(table.stat_sum_of_other_index_sizes
4161 == TEST_SUM_OF_OTHER_INDEX_SIZES);
4162
4163 ut_a(index1.stat_index_size == TEST_IDX1_INDEX_SIZE);
4164 ut_a(index1.stat_n_leaf_pages == TEST_IDX1_N_LEAF_PAGES);
4165 ut_a(index1_stat_n_diff_key_vals[0] == TEST_IDX1_N_DIFF1);
4166 ut_a(index1_stat_n_sample_sizes[0] == TEST_IDX1_N_DIFF1_SAMPLE_SIZE);
4167
4168 ut_a(index2.stat_index_size == TEST_IDX2_INDEX_SIZE);
4169 ut_a(index2.stat_n_leaf_pages == TEST_IDX2_N_LEAF_PAGES);
4170 ut_a(index2_stat_n_diff_key_vals[0] == TEST_IDX2_N_DIFF1);
4171 ut_a(index2_stat_n_sample_sizes[0] == TEST_IDX2_N_DIFF1_SAMPLE_SIZE);
4172 ut_a(index2_stat_n_diff_key_vals[1] == TEST_IDX2_N_DIFF2);
4173 ut_a(index2_stat_n_sample_sizes[1] == TEST_IDX2_N_DIFF2_SAMPLE_SIZE);
4174 ut_a(index2_stat_n_diff_key_vals[2] == TEST_IDX2_N_DIFF3);
4175 ut_a(index2_stat_n_sample_sizes[2] == TEST_IDX2_N_DIFF3_SAMPLE_SIZE);
4176 ut_a(index2_stat_n_diff_key_vals[3] == TEST_IDX2_N_DIFF4);
4177 ut_a(index2_stat_n_sample_sizes[3] == TEST_IDX2_N_DIFF4_SAMPLE_SIZE);
4178
4179 printf("OK: fetch successful\n");
4180 }
4181 /* @} */
4182
4183 /* test_dict_stats_all() @{ */
4184 void
test_dict_stats_all()4185 test_dict_stats_all()
4186 {
4187 test_dict_table_schema_check();
4188
4189 test_dict_stats_save();
4190
4191 test_dict_stats_fetch_from_ps();
4192 }
4193 /* @} */
4194
4195 #endif /* UNIV_COMPILE_TEST_FUNCS */
4196 /* @} */
4197
4198 #endif /* UNIV_HOTBACKUP */
4199