1 /*****************************************************************************
2 
3 Copyright (c) 2009, 2019, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2015, 2021, MariaDB Corporation.
5 
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free Software
8 Foundation; version 2 of the License.
9 
10 This program is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 
14 You should have received a copy of the GNU General Public License along with
15 this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
17 
18 *****************************************************************************/
19 
20 /**************************************************//**
21 @file dict/dict0stats.cc
22 Code used for calculating and manipulating table statistics.
23 
24 Created Jan 06, 2010 Vasil Dimov
25 *******************************************************/
26 
27 #include "dict0stats.h"
28 #include "ut0ut.h"
29 #include "ut0rnd.h"
30 #include "dyn0buf.h"
31 #include "row0sel.h"
32 #include "trx0trx.h"
33 #include "pars0pars.h"
34 #include <mysql_com.h>
35 #include "btr0btr.h"
36 #include "sync0sync.h"
37 
38 #include <algorithm>
39 #include <map>
40 #include <vector>
41 
42 /* Sampling algorithm description @{
43 
44 The algorithm is controlled by one number - N_SAMPLE_PAGES(index),
45 let it be A, which is the number of leaf pages to analyze for a given index
46 for each n-prefix (if the index is on 3 columns, then 3*A leaf pages will be
47 analyzed).
48 
49 Let the total number of leaf pages in the table be T.
50 Level 0 - leaf pages, level H - root.
51 
52 Definition: N-prefix-boring record is a record on a non-leaf page that equals
53 the next (to the right, cross page boundaries, skipping the supremum and
54 infimum) record on the same level when looking at the fist n-prefix columns.
55 The last (user) record on a level is not boring (it does not match the
56 non-existent user record to the right). We call the records boring because all
57 the records on the page below a boring record are equal to that boring record.
58 
59 We avoid diving below boring records when searching for a leaf page to
60 estimate the number of distinct records because we know that such a leaf
61 page will have number of distinct records == 1.
62 
63 For each n-prefix: start from the root level and full scan subsequent lower
64 levels until a level that contains at least A*10 distinct records is found.
65 Lets call this level LA.
66 As an optimization the search is canceled if it has reached level 1 (never
67 descend to the level 0 (leaf)) and also if the next level to be scanned
68 would contain more than A pages. The latter is because the user has asked
69 to analyze A leaf pages and it does not make sense to scan much more than
70 A non-leaf pages with the sole purpose of finding a good sample of A leaf
71 pages.
72 
73 After finding the appropriate level LA with >A*10 distinct records (or less in
74 the exceptions described above), divide it into groups of equal records and
75 pick A such groups. Then pick the last record from each group. For example,
76 let the level be:
77 
78 index:  0,1,2,3,4,5,6,7,8,9,10
79 record: 1,1,1,2,2,7,7,7,7,7,9
80 
81 There are 4 groups of distinct records and if A=2 random ones are selected,
82 e.g. 1,1,1 and 7,7,7,7,7, then records with indexes 2 and 9 will be selected.
83 
84 After selecting A records as described above, dive below them to find A leaf
85 pages and analyze them, finding the total number of distinct records. The
86 dive to the leaf level is performed by selecting a non-boring record from
87 each page and diving below it.
88 
89 This way, a total of A leaf pages are analyzed for the given n-prefix.
90 
91 Let the number of different key values found in each leaf page i be Pi (i=1..A).
92 Let N_DIFF_AVG_LEAF be (P1 + P2 + ... + PA) / A.
93 Let the number of different key values on level LA be N_DIFF_LA.
94 Let the total number of records on level LA be TOTAL_LA.
95 Let R be N_DIFF_LA / TOTAL_LA, we assume this ratio is the same on the
96 leaf level.
97 Let the number of leaf pages be N.
98 Then the total number of different key values on the leaf level is:
99 N * R * N_DIFF_AVG_LEAF.
100 See REF01 for the implementation.
101 
102 The above describes how to calculate the cardinality of an index.
103 This algorithm is executed for each n-prefix of a multi-column index
104 where n=1..n_uniq.
105 @} */
106 
107 /* names of the tables from the persistent statistics storage */
108 #define TABLE_STATS_NAME_PRINT	"mysql.innodb_table_stats"
109 #define INDEX_STATS_NAME_PRINT	"mysql.innodb_index_stats"
110 
111 #ifdef UNIV_STATS_DEBUG
112 #define DEBUG_PRINTF(fmt, ...)	printf(fmt, ## __VA_ARGS__)
113 #else /* UNIV_STATS_DEBUG */
114 #define DEBUG_PRINTF(fmt, ...)	/* noop */
115 #endif /* UNIV_STATS_DEBUG */
116 
117 /* Gets the number of leaf pages to sample in persistent stats estimation */
118 #define N_SAMPLE_PAGES(index)					\
119 	static_cast<ib_uint64_t>(				\
120 		(index)->table->stats_sample_pages != 0		\
121 		? (index)->table->stats_sample_pages		\
122 		: srv_stats_persistent_sample_pages)
123 
124 /* number of distinct records on a given level that are required to stop
125 descending to lower levels and fetch N_SAMPLE_PAGES(index) records
126 from that level */
127 #define N_DIFF_REQUIRED(index)	(N_SAMPLE_PAGES(index) * 10)
128 
129 /* A dynamic array where we store the boundaries of each distinct group
130 of keys. For example if a btree level is:
131 index: 0,1,2,3,4,5,6,7,8,9,10,11,12
132 data:  b,b,b,b,b,b,g,g,j,j,j, x, y
133 then we would store 5,7,10,11,12 in the array. */
134 typedef std::vector<ib_uint64_t, ut_allocator<ib_uint64_t> >	boundaries_t;
135 
136 /** Allocator type used for index_map_t. */
137 typedef ut_allocator<std::pair<const char* const, dict_index_t*> >
138 	index_map_t_allocator;
139 
140 /** Auxiliary map used for sorting indexes by name in dict_stats_save(). */
141 typedef std::map<const char*, dict_index_t*, ut_strcmp_functor,
142 		index_map_t_allocator>	index_map_t;
143 
144 /*********************************************************************//**
145 Checks whether an index should be ignored in stats manipulations:
146 * stats fetch
147 * stats recalc
148 * stats save
149 @return true if exists and all tables are ok */
150 UNIV_INLINE
151 bool
dict_stats_should_ignore_index(const dict_index_t * index)152 dict_stats_should_ignore_index(
153 /*===========================*/
154 	const dict_index_t*	index)	/*!< in: index */
155 {
156 	return((index->type & (DICT_FTS | DICT_SPATIAL))
157 	       || index->is_corrupted()
158 	       || index->to_be_dropped
159 	       || !index->is_committed());
160 }
161 
162 /*********************************************************************//**
163 Checks whether the persistent statistics storage exists and that all
164 tables have the proper structure.
165 @return true if exists and all tables are ok */
166 static
167 bool
dict_stats_persistent_storage_check(bool caller_has_dict_sys_mutex)168 dict_stats_persistent_storage_check(
169 /*================================*/
170 	bool	caller_has_dict_sys_mutex)	/*!< in: true if the caller
171 						owns dict_sys.mutex */
172 {
173 	/* definition for the table TABLE_STATS_NAME */
174 	dict_col_meta_t	table_stats_columns[] = {
175 		{"database_name", DATA_VARMYSQL,
176 			DATA_NOT_NULL, 192},
177 
178 		{"table_name", DATA_VARMYSQL,
179 			DATA_NOT_NULL, 597},
180 
181 		{"last_update", DATA_INT,
182 			DATA_NOT_NULL | DATA_UNSIGNED, 4},
183 
184 		{"n_rows", DATA_INT,
185 			DATA_NOT_NULL | DATA_UNSIGNED, 8},
186 
187 		{"clustered_index_size", DATA_INT,
188 			DATA_NOT_NULL | DATA_UNSIGNED, 8},
189 
190 		{"sum_of_other_index_sizes", DATA_INT,
191 			DATA_NOT_NULL | DATA_UNSIGNED, 8}
192 	};
193 	dict_table_schema_t	table_stats_schema = {
194 		TABLE_STATS_NAME,
195 		UT_ARR_SIZE(table_stats_columns),
196 		table_stats_columns,
197 		0 /* n_foreign */,
198 		0 /* n_referenced */
199 	};
200 
201 	/* definition for the table INDEX_STATS_NAME */
202 	dict_col_meta_t	index_stats_columns[] = {
203 		{"database_name", DATA_VARMYSQL,
204 			DATA_NOT_NULL, 192},
205 
206 		{"table_name", DATA_VARMYSQL,
207 			DATA_NOT_NULL, 597},
208 
209 		{"index_name", DATA_VARMYSQL,
210 			DATA_NOT_NULL, 192},
211 
212 		{"last_update", DATA_INT,
213 			DATA_NOT_NULL | DATA_UNSIGNED, 4},
214 
215 		{"stat_name", DATA_VARMYSQL,
216 			DATA_NOT_NULL, 64*3},
217 
218 		{"stat_value", DATA_INT,
219 			DATA_NOT_NULL | DATA_UNSIGNED, 8},
220 
221 		{"sample_size", DATA_INT,
222 			DATA_UNSIGNED, 8},
223 
224 		{"stat_description", DATA_VARMYSQL,
225 			DATA_NOT_NULL, 1024*3}
226 	};
227 	dict_table_schema_t	index_stats_schema = {
228 		INDEX_STATS_NAME,
229 		UT_ARR_SIZE(index_stats_columns),
230 		index_stats_columns,
231 		0 /* n_foreign */,
232 		0 /* n_referenced */
233 	};
234 
235 	char		errstr[512];
236 	dberr_t		ret;
237 
238 	if (!caller_has_dict_sys_mutex) {
239 		mutex_enter(&dict_sys.mutex);
240 	}
241 
242 	ut_ad(mutex_own(&dict_sys.mutex));
243 
244 	/* first check table_stats */
245 	ret = dict_table_schema_check(&table_stats_schema, errstr,
246 				      sizeof(errstr));
247 	if (ret == DB_SUCCESS) {
248 		/* if it is ok, then check index_stats */
249 		ret = dict_table_schema_check(&index_stats_schema, errstr,
250 					      sizeof(errstr));
251 	}
252 
253 	if (!caller_has_dict_sys_mutex) {
254 		mutex_exit(&dict_sys.mutex);
255 	}
256 
257 	if (ret != DB_SUCCESS && ret != DB_STATS_DO_NOT_EXIST) {
258 		ib::error() << errstr;
259 		return(false);
260 	} else if (ret == DB_STATS_DO_NOT_EXIST) {
261 		return false;
262 	}
263 	/* else */
264 
265 	return(true);
266 }
267 
268 /** Executes a given SQL statement using the InnoDB internal SQL parser.
269 This function will free the pinfo object.
270 @param[in,out]	pinfo	pinfo to pass to que_eval_sql() must already
271 have any literals bound to it
272 @param[in]	sql	SQL string to execute
273 @param[in,out]	trx	in case of NULL the function will allocate and
274 free the trx object. If it is not NULL then it will be rolled back
275 only in the case of error, but not freed.
276 @return DB_SUCCESS or error code */
277 static
278 dberr_t
dict_stats_exec_sql(pars_info_t * pinfo,const char * sql,trx_t * trx)279 dict_stats_exec_sql(
280 	pars_info_t*	pinfo,
281 	const char*	sql,
282 	trx_t*		trx)
283 {
284 	dberr_t	err;
285 	bool	trx_started = false;
286 
287 	ut_d(dict_sys.assert_locked());
288 
289 	if (!dict_stats_persistent_storage_check(true)) {
290 		pars_info_free(pinfo);
291 		return(DB_STATS_DO_NOT_EXIST);
292 	}
293 
294 	if (trx == NULL) {
295 		trx = trx_create();
296 		trx_started = true;
297 
298 		if (srv_read_only_mode) {
299 			trx_start_internal_read_only(trx);
300 		} else {
301 			trx_start_internal(trx);
302 		}
303 	}
304 
305 	err = que_eval_sql(pinfo, sql, FALSE, trx); /* pinfo is freed here */
306 
307 	DBUG_EXECUTE_IF("stats_index_error",
308 		if (!trx_started) {
309 			err = DB_STATS_DO_NOT_EXIST;
310 			trx->error_state = DB_STATS_DO_NOT_EXIST;
311 		});
312 
313 	if (!trx_started && err == DB_SUCCESS) {
314 		return(DB_SUCCESS);
315 	}
316 
317 	if (err == DB_SUCCESS) {
318 		trx_commit_for_mysql(trx);
319 	} else {
320 		trx->op_info = "rollback of internal trx on stats tables";
321 		trx->dict_operation_lock_mode = RW_X_LATCH;
322 		trx->rollback();
323 		trx->dict_operation_lock_mode = 0;
324 		trx->op_info = "";
325 		ut_a(trx->error_state == DB_SUCCESS);
326 	}
327 
328 	if (trx_started) {
329 		trx->free();
330 	}
331 
332 	return(err);
333 }
334 
335 /*********************************************************************//**
336 Duplicate a table object and its indexes.
337 This function creates a dummy dict_table_t object and initializes the
338 following table and index members:
339 dict_table_t::id (copied)
340 dict_table_t::heap (newly created)
341 dict_table_t::name (copied)
342 dict_table_t::corrupted (copied)
343 dict_table_t::indexes<> (newly created)
344 dict_table_t::magic_n
345 for each entry in dict_table_t::indexes, the following are initialized:
346 (indexes that have DICT_FTS set in index->type are skipped)
347 dict_index_t::id (copied)
348 dict_index_t::name (copied)
349 dict_index_t::table_name (points to the copied table name)
350 dict_index_t::table (points to the above semi-initialized object)
351 dict_index_t::type (copied)
352 dict_index_t::to_be_dropped (copied)
353 dict_index_t::online_status (copied)
354 dict_index_t::n_uniq (copied)
355 dict_index_t::fields[] (newly created, only first n_uniq, only fields[i].name)
356 dict_index_t::indexes<> (newly created)
357 dict_index_t::stat_n_diff_key_vals[] (only allocated, left uninitialized)
358 dict_index_t::stat_n_sample_sizes[] (only allocated, left uninitialized)
359 dict_index_t::stat_n_non_null_key_vals[] (only allocated, left uninitialized)
360 dict_index_t::magic_n
361 The returned object should be freed with dict_stats_table_clone_free()
362 when no longer needed.
363 @return incomplete table object */
364 static
365 dict_table_t*
dict_stats_table_clone_create(const dict_table_t * table)366 dict_stats_table_clone_create(
367 /*==========================*/
368 	const dict_table_t*	table)	/*!< in: table whose stats to copy */
369 {
370 	size_t		heap_size;
371 	dict_index_t*	index;
372 
373 	/* Estimate the size needed for the table and all of its indexes */
374 
375 	heap_size = 0;
376 	heap_size += sizeof(dict_table_t);
377 	heap_size += strlen(table->name.m_name) + 1;
378 
379 	for (index = dict_table_get_first_index(table);
380 	     index != NULL;
381 	     index = dict_table_get_next_index(index)) {
382 
383 		if (dict_stats_should_ignore_index(index)) {
384 			continue;
385 		}
386 
387 		ut_ad(!dict_index_is_ibuf(index));
388 
389 		ulint	n_uniq = dict_index_get_n_unique(index);
390 
391 		heap_size += sizeof(dict_index_t);
392 		heap_size += strlen(index->name) + 1;
393 		heap_size += n_uniq * sizeof(index->fields[0]);
394 		for (ulint i = 0; i < n_uniq; i++) {
395 			heap_size += strlen(index->fields[i].name) + 1;
396 		}
397 		heap_size += n_uniq * sizeof(index->stat_n_diff_key_vals[0]);
398 		heap_size += n_uniq * sizeof(index->stat_n_sample_sizes[0]);
399 		heap_size += n_uniq * sizeof(index->stat_n_non_null_key_vals[0]);
400 	}
401 
402 	/* Allocate the memory and copy the members */
403 
404 	mem_heap_t*	heap;
405 
406 	heap = mem_heap_create(heap_size);
407 
408 	dict_table_t*	t;
409 
410 	t = (dict_table_t*) mem_heap_alloc(heap, sizeof(*t));
411 
412 	MEM_CHECK_DEFINED(&table->id, sizeof(table->id));
413 	t->id = table->id;
414 
415 	t->heap = heap;
416 
417 	t->name.m_name = mem_heap_strdup(heap, table->name.m_name);
418 
419 	t->corrupted = table->corrupted;
420 
421 	UT_LIST_INIT(t->indexes, &dict_index_t::indexes);
422 #ifdef BTR_CUR_HASH_ADAPT
423 	UT_LIST_INIT(t->freed_indexes, &dict_index_t::indexes);
424 #endif /* BTR_CUR_HASH_ADAPT */
425 
426 	for (index = dict_table_get_first_index(table);
427 	     index != NULL;
428 	     index = dict_table_get_next_index(index)) {
429 
430 		if (dict_stats_should_ignore_index(index)) {
431 			continue;
432 		}
433 
434 		ut_ad(!dict_index_is_ibuf(index));
435 
436 		dict_index_t*	idx;
437 
438 		idx = (dict_index_t*) mem_heap_alloc(heap, sizeof(*idx));
439 
440 		MEM_CHECK_DEFINED(&index->id, sizeof(index->id));
441 		idx->id = index->id;
442 
443 		idx->name = mem_heap_strdup(heap, index->name);
444 
445 		idx->table = t;
446 
447 		idx->type = index->type;
448 
449 		idx->to_be_dropped = 0;
450 
451 		idx->online_status = ONLINE_INDEX_COMPLETE;
452 		idx->set_committed(true);
453 
454 		idx->n_uniq = index->n_uniq;
455 
456 		idx->fields = (dict_field_t*) mem_heap_alloc(
457 			heap, idx->n_uniq * sizeof(idx->fields[0]));
458 
459 		for (ulint i = 0; i < idx->n_uniq; i++) {
460 			idx->fields[i].name = mem_heap_strdup(
461 				heap, index->fields[i].name);
462 		}
463 
464 		/* hook idx into t->indexes */
465 		UT_LIST_ADD_LAST(t->indexes, idx);
466 
467 		idx->stat_n_diff_key_vals = (ib_uint64_t*) mem_heap_alloc(
468 			heap,
469 			idx->n_uniq * sizeof(idx->stat_n_diff_key_vals[0]));
470 
471 		idx->stat_n_sample_sizes = (ib_uint64_t*) mem_heap_alloc(
472 			heap,
473 			idx->n_uniq * sizeof(idx->stat_n_sample_sizes[0]));
474 
475 		idx->stat_n_non_null_key_vals = (ib_uint64_t*) mem_heap_alloc(
476 			heap,
477 			idx->n_uniq * sizeof(idx->stat_n_non_null_key_vals[0]));
478 		ut_d(idx->magic_n = DICT_INDEX_MAGIC_N);
479 
480 		idx->stat_defrag_n_page_split = 0;
481 		idx->stat_defrag_n_pages_freed = 0;
482 	}
483 
484 	ut_d(t->magic_n = DICT_TABLE_MAGIC_N);
485 
486 	return(t);
487 }
488 
489 /*********************************************************************//**
490 Free the resources occupied by an object returned by
491 dict_stats_table_clone_create(). */
492 static
493 void
dict_stats_table_clone_free(dict_table_t * t)494 dict_stats_table_clone_free(
495 /*========================*/
496 	dict_table_t*	t)	/*!< in: dummy table object to free */
497 {
498 	mem_heap_free(t->heap);
499 }
500 
501 /*********************************************************************//**
502 Write all zeros (or 1 where it makes sense) into an index
503 statistics members. The resulting stats correspond to an empty index. */
504 static
505 void
dict_stats_empty_index(dict_index_t * index,bool empty_defrag_stats)506 dict_stats_empty_index(
507 /*===================*/
508 	dict_index_t*	index,	/*!< in/out: index */
509 	bool		empty_defrag_stats)
510 				/*!< in: whether to empty defrag stats */
511 {
512 	ut_ad(!(index->type & DICT_FTS));
513 	ut_ad(!dict_index_is_ibuf(index));
514 	ut_ad(mutex_own(&dict_sys.mutex));
515 
516 	ulint	n_uniq = index->n_uniq;
517 
518 	for (ulint i = 0; i < n_uniq; i++) {
519 		index->stat_n_diff_key_vals[i] = 0;
520 		index->stat_n_sample_sizes[i] = 1;
521 		index->stat_n_non_null_key_vals[i] = 0;
522 	}
523 
524 	index->stat_index_size = 1;
525 	index->stat_n_leaf_pages = 1;
526 
527 	if (empty_defrag_stats) {
528 		dict_stats_empty_defrag_stats(index);
529 		dict_stats_empty_defrag_summary(index);
530 	}
531 }
532 
533 /*********************************************************************//**
534 Write all zeros (or 1 where it makes sense) into a table and its indexes'
535 statistics members. The resulting stats correspond to an empty table. */
536 static
537 void
dict_stats_empty_table(dict_table_t * table,bool empty_defrag_stats)538 dict_stats_empty_table(
539 /*===================*/
540 	dict_table_t*	table,	/*!< in/out: table */
541 	bool		empty_defrag_stats)
542 				/*!< in: whether to empty defrag stats */
543 {
544 	mutex_enter(&dict_sys.mutex);
545 
546 	/* Zero the stats members */
547 	table->stat_n_rows = 0;
548 	table->stat_clustered_index_size = 1;
549 	/* 1 page for each index, not counting the clustered */
550 	table->stat_sum_of_other_index_sizes
551 		= UT_LIST_GET_LEN(table->indexes) - 1;
552 	table->stat_modified_counter = 0;
553 
554 	dict_index_t*	index;
555 
556 	for (index = dict_table_get_first_index(table);
557 	     index != NULL;
558 	     index = dict_table_get_next_index(index)) {
559 
560 		if (index->type & DICT_FTS) {
561 			continue;
562 		}
563 
564 		ut_ad(!dict_index_is_ibuf(index));
565 
566 		dict_stats_empty_index(index, empty_defrag_stats);
567 	}
568 
569 	table->stat_initialized = TRUE;
570 	mutex_exit(&dict_sys.mutex);
571 }
572 
573 /*********************************************************************//**
574 Check whether index's stats are initialized (assert if they are not). */
575 static
576 void
dict_stats_assert_initialized_index(const dict_index_t * index)577 dict_stats_assert_initialized_index(
578 /*================================*/
579 	const dict_index_t*	index)	/*!< in: index */
580 {
581 	MEM_CHECK_DEFINED(
582 		index->stat_n_diff_key_vals,
583 		index->n_uniq * sizeof(index->stat_n_diff_key_vals[0]));
584 
585 	MEM_CHECK_DEFINED(
586 		index->stat_n_sample_sizes,
587 		index->n_uniq * sizeof(index->stat_n_sample_sizes[0]));
588 
589 	MEM_CHECK_DEFINED(
590 		index->stat_n_non_null_key_vals,
591 		index->n_uniq * sizeof(index->stat_n_non_null_key_vals[0]));
592 
593 	MEM_CHECK_DEFINED(
594 		&index->stat_index_size,
595 		sizeof(index->stat_index_size));
596 
597 	MEM_CHECK_DEFINED(
598 		&index->stat_n_leaf_pages,
599 		sizeof(index->stat_n_leaf_pages));
600 }
601 
602 /*********************************************************************//**
603 Check whether table's stats are initialized (assert if they are not). */
604 static
605 void
dict_stats_assert_initialized(const dict_table_t * table)606 dict_stats_assert_initialized(
607 /*==========================*/
608 	const dict_table_t*	table)	/*!< in: table */
609 {
610 	ut_a(table->stat_initialized);
611 
612 	MEM_CHECK_DEFINED(&table->stats_last_recalc,
613 			  sizeof table->stats_last_recalc);
614 
615 	MEM_CHECK_DEFINED(&table->stat_persistent,
616 			  sizeof table->stat_persistent);
617 
618 	MEM_CHECK_DEFINED(&table->stats_auto_recalc,
619 			  sizeof table->stats_auto_recalc);
620 
621 	MEM_CHECK_DEFINED(&table->stats_sample_pages,
622 			  sizeof table->stats_sample_pages);
623 
624 	MEM_CHECK_DEFINED(&table->stat_n_rows,
625 			  sizeof table->stat_n_rows);
626 
627 	MEM_CHECK_DEFINED(&table->stat_clustered_index_size,
628 			  sizeof table->stat_clustered_index_size);
629 
630 	MEM_CHECK_DEFINED(&table->stat_sum_of_other_index_sizes,
631 			  sizeof table->stat_sum_of_other_index_sizes);
632 
633 	MEM_CHECK_DEFINED(&table->stat_modified_counter,
634 			  sizeof table->stat_modified_counter);
635 
636 	MEM_CHECK_DEFINED(&table->stats_bg_flag,
637 			  sizeof table->stats_bg_flag);
638 
639 	for (dict_index_t* index = dict_table_get_first_index(table);
640 	     index != NULL;
641 	     index = dict_table_get_next_index(index)) {
642 
643 		if (!dict_stats_should_ignore_index(index)) {
644 			dict_stats_assert_initialized_index(index);
645 		}
646 	}
647 }
648 
649 #define INDEX_EQ(i1, i2) \
650 	((i1) != NULL \
651 	 && (i2) != NULL \
652 	 && (i1)->id == (i2)->id \
653 	 && strcmp((i1)->name, (i2)->name) == 0)
654 
655 /*********************************************************************//**
656 Copy table and index statistics from one table to another, including index
657 stats. Extra indexes in src are ignored and extra indexes in dst are
658 initialized to correspond to an empty index. */
659 static
660 void
dict_stats_copy(dict_table_t * dst,const dict_table_t * src,bool reset_ignored_indexes)661 dict_stats_copy(
662 /*============*/
663 	dict_table_t*		dst,	/*!< in/out: destination table */
664 	const dict_table_t*	src,	/*!< in: source table */
665 	bool reset_ignored_indexes)	/*!< in: if true, set ignored indexes
666                                              to have the same statistics as if
667                                              the table was empty */
668 {
669 	ut_ad(mutex_own(&dict_sys.mutex));
670 
671 	dst->stats_last_recalc = src->stats_last_recalc;
672 	dst->stat_n_rows = src->stat_n_rows;
673 	dst->stat_clustered_index_size = src->stat_clustered_index_size;
674 	dst->stat_sum_of_other_index_sizes = src->stat_sum_of_other_index_sizes;
675 	dst->stat_modified_counter = src->stat_modified_counter;
676 
677 	dict_index_t*	dst_idx;
678 	dict_index_t*	src_idx;
679 
680 	for (dst_idx = dict_table_get_first_index(dst),
681 	     src_idx = dict_table_get_first_index(src);
682 	     dst_idx != NULL;
683 	     dst_idx = dict_table_get_next_index(dst_idx),
684 	     (src_idx != NULL
685 	      && (src_idx = dict_table_get_next_index(src_idx)))) {
686 
687 		if (dict_stats_should_ignore_index(dst_idx)) {
688 			if (reset_ignored_indexes) {
689 				/* Reset index statistics for all ignored indexes,
690 				unless they are FT indexes (these have no statistics)*/
691 				if (dst_idx->type & DICT_FTS) {
692 					continue;
693 				}
694 				dict_stats_empty_index(dst_idx, true);
695 			} else {
696 				continue;
697 			}
698 		}
699 
700 		ut_ad(!dict_index_is_ibuf(dst_idx));
701 
702 		if (!INDEX_EQ(src_idx, dst_idx)) {
703 			for (src_idx = dict_table_get_first_index(src);
704 			     src_idx != NULL;
705 			     src_idx = dict_table_get_next_index(src_idx)) {
706 
707 				if (INDEX_EQ(src_idx, dst_idx)) {
708 					break;
709 				}
710 			}
711 		}
712 
713 		if (!INDEX_EQ(src_idx, dst_idx)) {
714 			dict_stats_empty_index(dst_idx, true);
715 			continue;
716 		}
717 
718 		ulint	n_copy_el;
719 
720 		if (dst_idx->n_uniq > src_idx->n_uniq) {
721 			n_copy_el = src_idx->n_uniq;
722 			/* Since src is smaller some elements in dst
723 			will remain untouched by the following memmove(),
724 			thus we init all of them here. */
725 			dict_stats_empty_index(dst_idx, true);
726 		} else {
727 			n_copy_el = dst_idx->n_uniq;
728 		}
729 
730 		memmove(dst_idx->stat_n_diff_key_vals,
731 			src_idx->stat_n_diff_key_vals,
732 			n_copy_el * sizeof(dst_idx->stat_n_diff_key_vals[0]));
733 
734 		memmove(dst_idx->stat_n_sample_sizes,
735 			src_idx->stat_n_sample_sizes,
736 			n_copy_el * sizeof(dst_idx->stat_n_sample_sizes[0]));
737 
738 		memmove(dst_idx->stat_n_non_null_key_vals,
739 			src_idx->stat_n_non_null_key_vals,
740 			n_copy_el * sizeof(dst_idx->stat_n_non_null_key_vals[0]));
741 
742 		dst_idx->stat_index_size = src_idx->stat_index_size;
743 
744 		dst_idx->stat_n_leaf_pages = src_idx->stat_n_leaf_pages;
745 
746 		dst_idx->stat_defrag_modified_counter =
747 			src_idx->stat_defrag_modified_counter;
748 		dst_idx->stat_defrag_n_pages_freed =
749 			src_idx->stat_defrag_n_pages_freed;
750 		dst_idx->stat_defrag_n_page_split =
751 			src_idx->stat_defrag_n_page_split;
752 	}
753 
754 	dst->stat_initialized = TRUE;
755 }
756 
757 /** Duplicate the stats of a table and its indexes.
758 This function creates a dummy dict_table_t object and copies the input
759 table's stats into it. The returned table object is not in the dictionary
760 cache and cannot be accessed by any other threads. In addition to the
761 members copied in dict_stats_table_clone_create() this function initializes
762 the following:
763 dict_table_t::stat_initialized
764 dict_table_t::stat_persistent
765 dict_table_t::stat_n_rows
766 dict_table_t::stat_clustered_index_size
767 dict_table_t::stat_sum_of_other_index_sizes
768 dict_table_t::stat_modified_counter
769 dict_index_t::stat_n_diff_key_vals[]
770 dict_index_t::stat_n_sample_sizes[]
771 dict_index_t::stat_n_non_null_key_vals[]
772 dict_index_t::stat_index_size
773 dict_index_t::stat_n_leaf_pages
774 dict_index_t::stat_defrag_modified_counter
775 dict_index_t::stat_defrag_n_pages_freed
776 dict_index_t::stat_defrag_n_page_split
777 The returned object should be freed with dict_stats_snapshot_free()
778 when no longer needed.
779 @param[in]	table	table whose stats to copy
780 @return incomplete table object */
781 static
782 dict_table_t*
dict_stats_snapshot_create(dict_table_t * table)783 dict_stats_snapshot_create(
784 	dict_table_t*	table)
785 {
786 	mutex_enter(&dict_sys.mutex);
787 
788 	dict_stats_assert_initialized(table);
789 
790 	dict_table_t*	t;
791 
792 	t = dict_stats_table_clone_create(table);
793 
794 	dict_stats_copy(t, table, false);
795 
796 	t->stat_persistent = table->stat_persistent;
797 	t->stats_auto_recalc = table->stats_auto_recalc;
798 	t->stats_sample_pages = table->stats_sample_pages;
799 	t->stats_bg_flag = table->stats_bg_flag;
800 
801 	mutex_exit(&dict_sys.mutex);
802 
803 	return(t);
804 }
805 
806 /*********************************************************************//**
807 Free the resources occupied by an object returned by
808 dict_stats_snapshot_create(). */
809 static
810 void
dict_stats_snapshot_free(dict_table_t * t)811 dict_stats_snapshot_free(
812 /*=====================*/
813 	dict_table_t*	t)	/*!< in: dummy table object to free */
814 {
815 	dict_stats_table_clone_free(t);
816 }
817 
818 /*********************************************************************//**
819 Calculates new estimates for index statistics. This function is
820 relatively quick and is used to calculate transient statistics that
821 are not saved on disk. This was the only way to calculate statistics
822 before the Persistent Statistics feature was introduced.
823 This function doesn't update the defragmentation related stats.
824 Only persistent statistics supports defragmentation stats. */
825 static
826 void
dict_stats_update_transient_for_index(dict_index_t * index)827 dict_stats_update_transient_for_index(
828 /*==================================*/
829 	dict_index_t*	index)	/*!< in/out: index */
830 {
831 	if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO
832 	    && (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO
833 		|| !dict_index_is_clust(index))) {
834 		/* If we have set a high innodb_force_recovery
835 		level, do not calculate statistics, as a badly
836 		corrupted index can cause a crash in it.
837 		Initialize some bogus index cardinality
838 		statistics, so that the data can be queried in
839 		various means, also via secondary indexes. */
840 		mutex_enter(&dict_sys.mutex);
841 		dict_stats_empty_index(index, false);
842 		mutex_exit(&dict_sys.mutex);
843 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
844 	} else if (ibuf_debug && !dict_index_is_clust(index)) {
845 		mutex_enter(&dict_sys.mutex);
846 		dict_stats_empty_index(index, false);
847 		mutex_exit(&dict_sys.mutex);
848 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
849 	} else {
850 		mtr_t	mtr;
851 		ulint	size;
852 
853 		mtr.start();
854 		mtr_s_lock_index(index, &mtr);
855 		size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
856 
857 		if (size != ULINT_UNDEFINED) {
858 			index->stat_index_size = size;
859 
860 			size = btr_get_size(
861 				index, BTR_N_LEAF_PAGES, &mtr);
862 		}
863 
864 		mtr.commit();
865 
866 		switch (size) {
867 		case ULINT_UNDEFINED:
868 			mutex_enter(&dict_sys.mutex);
869 			dict_stats_empty_index(index, false);
870 			mutex_exit(&dict_sys.mutex);
871 			return;
872 		case 0:
873 			/* The root node of the tree is a leaf */
874 			size = 1;
875 		}
876 
877 		index->stat_n_leaf_pages = size;
878 
879 		/* Do not continue if table decryption has failed or
880 		table is already marked as corrupted. */
881 		if (index->is_readable()) {
882 			std::vector<index_field_stats_t> stats
883 				= btr_estimate_number_of_different_key_vals(
884 					index);
885 
886 			if (!stats.empty()) {
887 				ut_ad(!mutex_own(&dict_sys.mutex));
888 				mutex_enter(&dict_sys.mutex);
889 				for (size_t i = 0; i < stats.size(); ++i) {
890 					index->stat_n_diff_key_vals[i]
891 						= stats[i].n_diff_key_vals;
892 					index->stat_n_sample_sizes[i]
893 						= stats[i].n_sample_sizes;
894 					index->stat_n_non_null_key_vals[i]
895 						= stats[i].n_non_null_key_vals;
896 				}
897 				mutex_exit(&dict_sys.mutex);
898 			}
899 		}
900 	}
901 }
902 
903 /*********************************************************************//**
904 Calculates new estimates for table and index statistics. This function
905 is relatively quick and is used to calculate transient statistics that
906 are not saved on disk.
907 This was the only way to calculate statistics before the
908 Persistent Statistics feature was introduced. */
909 static
910 void
dict_stats_update_transient(dict_table_t * table)911 dict_stats_update_transient(
912 /*========================*/
913 	dict_table_t*	table)	/*!< in/out: table */
914 {
915 	ut_ad(!mutex_own(&dict_sys.mutex));
916 
917 	dict_index_t*	index;
918 	ulint		sum_of_index_sizes	= 0;
919 
920 	/* Find out the sizes of the indexes and how many different values
921 	for the key they approximately have */
922 
923 	index = dict_table_get_first_index(table);
924 
925 	if (!table->space) {
926 		/* Nothing to do. */
927 		dict_stats_empty_table(table, true);
928 		return;
929 	} else if (index == NULL) {
930 		/* Table definition is corrupt */
931 
932 		ib::warn() << "Table " << table->name
933 			<< " has no indexes. Cannot calculate statistics.";
934 		dict_stats_empty_table(table, true);
935 		return;
936 	}
937 
938 	for (; index != NULL; index = dict_table_get_next_index(index)) {
939 
940 		ut_ad(!dict_index_is_ibuf(index));
941 
942 		if (index->type & (DICT_FTS | DICT_SPATIAL)) {
943 			continue;
944 		}
945 
946 		if (dict_stats_should_ignore_index(index)
947 		    || !index->is_readable()) {
948 			mutex_enter(&dict_sys.mutex);
949 			dict_stats_empty_index(index, false);
950 			mutex_exit(&dict_sys.mutex);
951 			continue;
952 		}
953 
954 		dict_stats_update_transient_for_index(index);
955 
956 		sum_of_index_sizes += index->stat_index_size;
957 	}
958 
959 	mutex_enter(&dict_sys.mutex);
960 
961 	index = dict_table_get_first_index(table);
962 
963 	table->stat_n_rows = index->stat_n_diff_key_vals[
964 		dict_index_get_n_unique(index) - 1];
965 
966 	table->stat_clustered_index_size = index->stat_index_size;
967 
968 	table->stat_sum_of_other_index_sizes = sum_of_index_sizes
969 		- index->stat_index_size;
970 
971 	table->stats_last_recalc = time(NULL);
972 
973 	table->stat_modified_counter = 0;
974 
975 	table->stat_initialized = TRUE;
976 
977 	mutex_exit(&dict_sys.mutex);
978 }
979 
980 /* @{ Pseudo code about the relation between the following functions
981 
982 let N = N_SAMPLE_PAGES(index)
983 
984 dict_stats_analyze_index()
985   for each n_prefix
986     search for good enough level:
987       dict_stats_analyze_index_level() // only called if level has <= N pages
988         // full scan of the level in one mtr
989         collect statistics about the given level
990       if we are not satisfied with the level, search next lower level
991     we have found a good enough level here
992     dict_stats_analyze_index_for_n_prefix(that level, stats collected above)
993       // full scan of the level in one mtr
994       dive below some records and analyze the leaf page there:
995       dict_stats_analyze_index_below_cur()
996 @} */
997 
998 /*********************************************************************//**
999 Find the total number and the number of distinct keys on a given level in
1000 an index. Each of the 1..n_uniq prefixes are looked up and the results are
1001 saved in the array n_diff[0] .. n_diff[n_uniq - 1]. The total number of
1002 records on the level is saved in total_recs.
1003 Also, the index of the last record in each group of equal records is saved
1004 in n_diff_boundaries[0..n_uniq - 1], records indexing starts from the leftmost
1005 record on the level and continues cross pages boundaries, counting from 0. */
1006 static
1007 void
dict_stats_analyze_index_level(dict_index_t * index,ulint level,ib_uint64_t * n_diff,ib_uint64_t * total_recs,ib_uint64_t * total_pages,boundaries_t * n_diff_boundaries,mtr_t * mtr)1008 dict_stats_analyze_index_level(
1009 /*===========================*/
1010 	dict_index_t*	index,		/*!< in: index */
1011 	ulint		level,		/*!< in: level */
1012 	ib_uint64_t*	n_diff,		/*!< out: array for number of
1013 					distinct keys for all prefixes */
1014 	ib_uint64_t*	total_recs,	/*!< out: total number of records */
1015 	ib_uint64_t*	total_pages,	/*!< out: total number of pages */
1016 	boundaries_t*	n_diff_boundaries,/*!< out: boundaries of the groups
1017 					of distinct keys */
1018 	mtr_t*		mtr)		/*!< in/out: mini-transaction */
1019 {
1020 	ulint		n_uniq;
1021 	mem_heap_t*	heap;
1022 	btr_pcur_t	pcur;
1023 	const page_t*	page;
1024 	const rec_t*	rec;
1025 	const rec_t*	prev_rec;
1026 	bool		prev_rec_is_copied;
1027 	byte*		prev_rec_buf = NULL;
1028 	ulint		prev_rec_buf_size = 0;
1029 	rec_offs*	rec_offsets;
1030 	rec_offs*	prev_rec_offsets;
1031 	ulint		i;
1032 
1033 	DEBUG_PRINTF("    %s(table=%s, index=%s, level=" ULINTPF ")\n",
1034 		     __func__, index->table->name, index->name, level);
1035 
1036 	ut_ad(mtr->memo_contains(index->lock, MTR_MEMO_SX_LOCK));
1037 
1038 	n_uniq = dict_index_get_n_unique(index);
1039 
1040 	/* elements in the n_diff array are 0..n_uniq-1 (inclusive) */
1041 	memset(n_diff, 0x0, n_uniq * sizeof(n_diff[0]));
1042 
1043 	/* Allocate space for the offsets header (the allocation size at
1044 	offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_uniq + 1,
1045 	so that this will never be less than the size calculated in
1046 	rec_get_offsets_func(). */
1047 	i = (REC_OFFS_HEADER_SIZE + 1 + 1) + n_uniq;
1048 
1049 	heap = mem_heap_create((2 * sizeof *rec_offsets) * i);
1050 	rec_offsets = static_cast<rec_offs*>(
1051 		mem_heap_alloc(heap, i * sizeof *rec_offsets));
1052 	prev_rec_offsets = static_cast<rec_offs*>(
1053 		mem_heap_alloc(heap, i * sizeof *prev_rec_offsets));
1054 	rec_offs_set_n_alloc(rec_offsets, i);
1055 	rec_offs_set_n_alloc(prev_rec_offsets, i);
1056 
1057 	/* reset the dynamic arrays n_diff_boundaries[0..n_uniq-1] */
1058 	if (n_diff_boundaries != NULL) {
1059 		for (i = 0; i < n_uniq; i++) {
1060 			n_diff_boundaries[i].erase(
1061 				n_diff_boundaries[i].begin(),
1062 				n_diff_boundaries[i].end());
1063 		}
1064 	}
1065 
1066 	/* Position pcur on the leftmost record on the leftmost page
1067 	on the desired level. */
1068 
1069 	btr_pcur_open_at_index_side(
1070 		true, index, BTR_SEARCH_TREE_ALREADY_S_LATCHED,
1071 		&pcur, true, level, mtr);
1072 	btr_pcur_move_to_next_on_page(&pcur);
1073 
1074 	page = btr_pcur_get_page(&pcur);
1075 
1076 	/* The page must not be empty, except when
1077 	it is the root page (and the whole index is empty). */
1078 	ut_ad(btr_pcur_is_on_user_rec(&pcur) || page_is_leaf(page));
1079 	ut_ad(btr_pcur_get_rec(&pcur)
1080 	      == page_rec_get_next_const(page_get_infimum_rec(page)));
1081 
1082 	/* check that we are indeed on the desired level */
1083 	ut_a(btr_page_get_level(page) == level);
1084 
1085 	/* there should not be any pages on the left */
1086 	ut_a(!page_has_prev(page));
1087 
1088 	if (REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
1089 		    btr_pcur_get_rec(&pcur), page_is_comp(page))) {
1090 		ut_ad(btr_pcur_is_on_user_rec(&pcur));
1091 		if (level == 0) {
1092 			/* Skip the metadata pseudo-record */
1093 			ut_ad(index->is_instant());
1094 			btr_pcur_move_to_next_user_rec(&pcur, mtr);
1095 		}
1096 	} else {
1097 		/* The first record on the leftmost page must be
1098 		marked as such on each level except the leaf level. */
1099 		ut_a(level == 0);
1100 	}
1101 
1102 	prev_rec = NULL;
1103 	prev_rec_is_copied = false;
1104 
1105 	/* no records by default */
1106 	*total_recs = 0;
1107 
1108 	*total_pages = 0;
1109 
1110 	/* iterate over all user records on this level
1111 	and compare each two adjacent ones, even the last on page
1112 	X and the fist on page X+1 */
1113 	for (;
1114 	     btr_pcur_is_on_user_rec(&pcur);
1115 	     btr_pcur_move_to_next_user_rec(&pcur, mtr)) {
1116 
1117 		bool	rec_is_last_on_page;
1118 
1119 		rec = btr_pcur_get_rec(&pcur);
1120 
1121 		/* If rec and prev_rec are on different pages, then prev_rec
1122 		must have been copied, because we hold latch only on the page
1123 		where rec resides. */
1124 		if (prev_rec != NULL
1125 		    && page_align(rec) != page_align(prev_rec)) {
1126 
1127 			ut_a(prev_rec_is_copied);
1128 		}
1129 
1130 		rec_is_last_on_page =
1131 			page_rec_is_supremum(page_rec_get_next_const(rec));
1132 
1133 		/* increment the pages counter at the end of each page */
1134 		if (rec_is_last_on_page) {
1135 
1136 			(*total_pages)++;
1137 		}
1138 
1139 		/* Skip delete-marked records on the leaf level. If we
1140 		do not skip them, then ANALYZE quickly after DELETE
1141 		could count them or not (purge may have already wiped
1142 		them away) which brings non-determinism. We skip only
1143 		leaf-level delete marks because delete marks on
1144 		non-leaf level do not make sense. */
1145 
1146 		if (level == 0
1147 		    && !srv_stats_include_delete_marked
1148 		    && rec_get_deleted_flag(
1149 			    rec,
1150 			    page_is_comp(btr_pcur_get_page(&pcur)))) {
1151 
1152 			if (rec_is_last_on_page
1153 			    && !prev_rec_is_copied
1154 			    && prev_rec != NULL) {
1155 				/* copy prev_rec */
1156 
1157 				prev_rec_offsets = rec_get_offsets(
1158 					prev_rec, index, prev_rec_offsets,
1159 					index->n_core_fields,
1160 					n_uniq, &heap);
1161 
1162 				prev_rec = rec_copy_prefix_to_buf(
1163 					prev_rec, index, n_uniq,
1164 					&prev_rec_buf, &prev_rec_buf_size);
1165 
1166 				prev_rec_is_copied = true;
1167 			}
1168 
1169 			continue;
1170 		}
1171 		rec_offsets = rec_get_offsets(rec, index, rec_offsets,
1172 					      level ? 0 : index->n_core_fields,
1173 					      n_uniq, &heap);
1174 
1175 		(*total_recs)++;
1176 
1177 		if (prev_rec != NULL) {
1178 			ulint	matched_fields;
1179 
1180 			prev_rec_offsets = rec_get_offsets(
1181 				prev_rec, index, prev_rec_offsets,
1182 				level ? 0 : index->n_core_fields,
1183 				n_uniq, &heap);
1184 
1185 			cmp_rec_rec(prev_rec, rec,
1186 				    prev_rec_offsets, rec_offsets, index,
1187 				    false, &matched_fields);
1188 
1189 			for (i = matched_fields; i < n_uniq; i++) {
1190 
1191 				if (n_diff_boundaries != NULL) {
1192 					/* push the index of the previous
1193 					record, that is - the last one from
1194 					a group of equal keys */
1195 
1196 					ib_uint64_t	idx;
1197 
1198 					/* the index of the current record
1199 					is total_recs - 1, the index of the
1200 					previous record is total_recs - 2;
1201 					we know that idx is not going to
1202 					become negative here because if we
1203 					are in this branch then there is a
1204 					previous record and thus
1205 					total_recs >= 2 */
1206 					idx = *total_recs - 2;
1207 
1208 					n_diff_boundaries[i].push_back(idx);
1209 				}
1210 
1211 				/* increment the number of different keys
1212 				for n_prefix=i+1 (e.g. if i=0 then we increment
1213 				for n_prefix=1 which is stored in n_diff[0]) */
1214 				n_diff[i]++;
1215 			}
1216 		} else {
1217 			/* this is the first non-delete marked record */
1218 			for (i = 0; i < n_uniq; i++) {
1219 				n_diff[i] = 1;
1220 			}
1221 		}
1222 
1223 		if (rec_is_last_on_page) {
1224 			/* end of a page has been reached */
1225 
1226 			/* we need to copy the record instead of assigning
1227 			like prev_rec = rec; because when we traverse the
1228 			records on this level at some point we will jump from
1229 			one page to the next and then rec and prev_rec will
1230 			be on different pages and
1231 			btr_pcur_move_to_next_user_rec() will release the
1232 			latch on the page that prev_rec is on */
1233 			prev_rec = rec_copy_prefix_to_buf(
1234 				rec, index, n_uniq,
1235 				&prev_rec_buf, &prev_rec_buf_size);
1236 			prev_rec_is_copied = true;
1237 
1238 		} else {
1239 			/* still on the same page, the next call to
1240 			btr_pcur_move_to_next_user_rec() will not jump
1241 			on the next page, we can simply assign pointers
1242 			instead of copying the records like above */
1243 
1244 			prev_rec = rec;
1245 			prev_rec_is_copied = false;
1246 		}
1247 	}
1248 
1249 	/* if *total_pages is left untouched then the above loop was not
1250 	entered at all and there is one page in the whole tree which is
1251 	empty or the loop was entered but this is level 0, contains one page
1252 	and all records are delete-marked */
1253 	if (*total_pages == 0) {
1254 
1255 		ut_ad(level == 0);
1256 		ut_ad(*total_recs == 0);
1257 
1258 		*total_pages = 1;
1259 	}
1260 
1261 	/* if there are records on this level and boundaries
1262 	should be saved */
1263 	if (*total_recs > 0 && n_diff_boundaries != NULL) {
1264 
1265 		/* remember the index of the last record on the level as the
1266 		last one from the last group of equal keys; this holds for
1267 		all possible prefixes */
1268 		for (i = 0; i < n_uniq; i++) {
1269 			ib_uint64_t	idx;
1270 
1271 			idx = *total_recs - 1;
1272 
1273 			n_diff_boundaries[i].push_back(idx);
1274 		}
1275 	}
1276 
1277 	/* now in n_diff_boundaries[i] there are exactly n_diff[i] integers,
1278 	for i=0..n_uniq-1 */
1279 
1280 #ifdef UNIV_STATS_DEBUG
1281 	for (i = 0; i < n_uniq; i++) {
1282 
1283 		DEBUG_PRINTF("    %s(): total recs: " UINT64PF
1284 			     ", total pages: " UINT64PF
1285 			     ", n_diff[" ULINTPF "]: " UINT64PF "\n",
1286 			     __func__, *total_recs,
1287 			     *total_pages,
1288 			     i, n_diff[i]);
1289 
1290 #if 0
1291 		if (n_diff_boundaries != NULL) {
1292 			ib_uint64_t	j;
1293 
1294 			DEBUG_PRINTF("    %s(): boundaries[%lu]: ",
1295 				     __func__, i);
1296 
1297 			for (j = 0; j < n_diff[i]; j++) {
1298 				ib_uint64_t	idx;
1299 
1300 				idx = n_diff_boundaries[i][j];
1301 
1302 				DEBUG_PRINTF(UINT64PF "=" UINT64PF ", ",
1303 					     j, idx);
1304 			}
1305 			DEBUG_PRINTF("\n");
1306 		}
1307 #endif
1308 	}
1309 #endif /* UNIV_STATS_DEBUG */
1310 
1311 	/* Release the latch on the last page, because that is not done by
1312 	btr_pcur_close(). This function works also for non-leaf pages. */
1313 	btr_leaf_page_release(btr_pcur_get_block(&pcur), BTR_SEARCH_LEAF, mtr);
1314 
1315 	btr_pcur_close(&pcur);
1316 	ut_free(prev_rec_buf);
1317 	mem_heap_free(heap);
1318 }
1319 
1320 /** Scan a page, reading records from left to right and counting the number
1321 of distinct records (looking only at the first n_prefix
1322 columns) and the number of external pages pointed by records from this page.
1323 If scan_method is QUIT_ON_FIRST_NON_BORING then the function
1324 will return as soon as it finds a record that does not match its neighbor
1325 to the right, which means that in the case of QUIT_ON_FIRST_NON_BORING the
1326 returned n_diff can either be 0 (empty page), 1 (the whole page has all keys
1327 equal) or 2 (the function found a non-boring record and returned).
1328 @param[out]	out_rec			record, or NULL
1329 @param[out]	offsets1		rec_get_offsets() working space (must
1330 be big enough)
1331 @param[out]	offsets2		rec_get_offsets() working space (must
1332 be big enough)
1333 @param[in]	index			index of the page
1334 @param[in]	page			the page to scan
1335 @param[in]	n_prefix		look at the first n_prefix columns
1336 @param[in]	n_core			0, or index->n_core_fields for leaf
1337 @param[out]	n_diff			number of distinct records encountered
1338 @param[out]	n_external_pages	if this is non-NULL then it will be set
1339 to the number of externally stored pages which were encountered
1340 @return offsets1 or offsets2 (the offsets of *out_rec),
1341 or NULL if the page is empty and does not contain user records. */
1342 UNIV_INLINE
1343 rec_offs*
dict_stats_scan_page(const rec_t ** out_rec,rec_offs * offsets1,rec_offs * offsets2,const dict_index_t * index,const page_t * page,ulint n_prefix,ulint n_core,ib_uint64_t * n_diff,ib_uint64_t * n_external_pages)1344 dict_stats_scan_page(
1345 	const rec_t**		out_rec,
1346 	rec_offs*		offsets1,
1347 	rec_offs*		offsets2,
1348 	const dict_index_t*	index,
1349 	const page_t*		page,
1350 	ulint			n_prefix,
1351 	ulint		 	n_core,
1352 	ib_uint64_t*		n_diff,
1353 	ib_uint64_t*		n_external_pages)
1354 {
1355 	rec_offs*	offsets_rec		= offsets1;
1356 	rec_offs*	offsets_next_rec	= offsets2;
1357 	const rec_t*	rec;
1358 	const rec_t*	next_rec;
1359 	/* A dummy heap, to be passed to rec_get_offsets().
1360 	Because offsets1,offsets2 should be big enough,
1361 	this memory heap should never be used. */
1362 	mem_heap_t*	heap			= NULL;
1363 	ut_ad(!!n_core == page_is_leaf(page));
1364 	const rec_t*	(*get_next)(const rec_t*)
1365 		= !n_core || srv_stats_include_delete_marked
1366 		? page_rec_get_next_const
1367 		: page_rec_get_next_non_del_marked;
1368 
1369 	const bool	should_count_external_pages = n_external_pages != NULL;
1370 
1371 	if (should_count_external_pages) {
1372 		*n_external_pages = 0;
1373 	}
1374 
1375 	rec = get_next(page_get_infimum_rec(page));
1376 
1377 	if (page_rec_is_supremum(rec)) {
1378 		/* the page is empty or contains only delete-marked records */
1379 		*n_diff = 0;
1380 		*out_rec = NULL;
1381 		return(NULL);
1382 	}
1383 
1384 	offsets_rec = rec_get_offsets(rec, index, offsets_rec, n_core,
1385 				      ULINT_UNDEFINED, &heap);
1386 
1387 	if (should_count_external_pages) {
1388 		*n_external_pages += btr_rec_get_externally_stored_len(
1389 			rec, offsets_rec);
1390 	}
1391 
1392 	next_rec = get_next(rec);
1393 
1394 	*n_diff = 1;
1395 
1396 	while (!page_rec_is_supremum(next_rec)) {
1397 
1398 		ulint	matched_fields;
1399 
1400 		offsets_next_rec = rec_get_offsets(next_rec, index,
1401 						   offsets_next_rec, n_core,
1402 						   ULINT_UNDEFINED,
1403 						   &heap);
1404 
1405 		/* check whether rec != next_rec when looking at
1406 		the first n_prefix fields */
1407 		cmp_rec_rec(rec, next_rec, offsets_rec, offsets_next_rec,
1408 			    index, false, &matched_fields);
1409 
1410 		if (matched_fields < n_prefix) {
1411 			/* rec != next_rec, => rec is non-boring */
1412 
1413 			(*n_diff)++;
1414 
1415 			if (!n_core) {
1416 				break;
1417 			}
1418 		}
1419 
1420 		rec = next_rec;
1421 		/* Assign offsets_rec = offsets_next_rec so that
1422 		offsets_rec matches with rec which was just assigned
1423 		rec = next_rec above.  Also need to point
1424 		offsets_next_rec to the place where offsets_rec was
1425 		pointing before because we have just 2 placeholders
1426 		where data is actually stored: offsets1 and offsets2
1427 		and we are using them in circular fashion
1428 		(offsets[_next]_rec are just pointers to those
1429 		placeholders). */
1430 		std::swap(offsets_rec, offsets_next_rec);
1431 
1432 		if (should_count_external_pages) {
1433 			*n_external_pages += btr_rec_get_externally_stored_len(
1434 				rec, offsets_rec);
1435 		}
1436 
1437 		next_rec = get_next(next_rec);
1438 	}
1439 
1440 	/* offsets1,offsets2 should have been big enough */
1441 	ut_a(heap == NULL);
1442 	*out_rec = rec;
1443 	return(offsets_rec);
1444 }
1445 
1446 /** Dive below the current position of a cursor and calculate the number of
1447 distinct records on the leaf page, when looking at the fist n_prefix
1448 columns. Also calculate the number of external pages pointed by records
1449 on the leaf page.
1450 @param[in]	cur			cursor
1451 @param[in]	n_prefix		look at the first n_prefix columns
1452 when comparing records
1453 @param[out]	n_diff			number of distinct records
1454 @param[out]	n_external_pages	number of external pages
1455 @return number of distinct records on the leaf page */
1456 static
1457 void
dict_stats_analyze_index_below_cur(const btr_cur_t * cur,ulint n_prefix,ib_uint64_t * n_diff,ib_uint64_t * n_external_pages)1458 dict_stats_analyze_index_below_cur(
1459 	const btr_cur_t*	cur,
1460 	ulint			n_prefix,
1461 	ib_uint64_t*		n_diff,
1462 	ib_uint64_t*		n_external_pages)
1463 {
1464 	dict_index_t*	index;
1465 	buf_block_t*	block;
1466 	const page_t*	page;
1467 	mem_heap_t*	heap;
1468 	const rec_t*	rec;
1469 	rec_offs*	offsets1;
1470 	rec_offs*	offsets2;
1471 	rec_offs*	offsets_rec;
1472 	ulint		size;
1473 	mtr_t		mtr;
1474 
1475 	index = btr_cur_get_index(cur);
1476 
1477 	/* Allocate offsets for the record and the node pointer, for
1478 	node pointer records. In a secondary index, the node pointer
1479 	record will consist of all index fields followed by a child
1480 	page number.
1481 	Allocate space for the offsets header (the allocation size at
1482 	offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_fields + 1,
1483 	so that this will never be less than the size calculated in
1484 	rec_get_offsets_func(). */
1485 	size = (1 + REC_OFFS_HEADER_SIZE) + 1 + dict_index_get_n_fields(index);
1486 
1487 	heap = mem_heap_create(size * (sizeof *offsets1 + sizeof *offsets2));
1488 
1489 	offsets1 = static_cast<rec_offs*>(mem_heap_alloc(
1490 			heap, size * sizeof *offsets1));
1491 
1492 	offsets2 = static_cast<rec_offs*>(mem_heap_alloc(
1493 			heap, size * sizeof *offsets2));
1494 
1495 	rec_offs_set_n_alloc(offsets1, size);
1496 	rec_offs_set_n_alloc(offsets2, size);
1497 
1498 	rec = btr_cur_get_rec(cur);
1499 	page = page_align(rec);
1500 	ut_ad(!page_rec_is_leaf(rec));
1501 
1502 	offsets_rec = rec_get_offsets(rec, index, offsets1, 0,
1503 				      ULINT_UNDEFINED, &heap);
1504 
1505 	page_id_t		page_id(index->table->space_id,
1506 					btr_node_ptr_get_child_page_no(
1507 						rec, offsets_rec));
1508 	const ulint zip_size = index->table->space->zip_size();
1509 
1510 	/* assume no external pages by default - in case we quit from this
1511 	function without analyzing any leaf pages */
1512 	*n_external_pages = 0;
1513 
1514 	mtr_start(&mtr);
1515 
1516 	/* descend to the leaf level on the B-tree */
1517 	for (;;) {
1518 
1519 		dberr_t err = DB_SUCCESS;
1520 
1521 		block = buf_page_get_gen(page_id, zip_size,
1522 					 RW_S_LATCH, NULL, BUF_GET,
1523 					 __FILE__, __LINE__, &mtr, &err,
1524 					 !index->is_clust()
1525 					 && 1 == btr_page_get_level(page));
1526 
1527 		page = buf_block_get_frame(block);
1528 
1529 		if (page_is_leaf(page)) {
1530 			/* leaf level */
1531 			break;
1532 		}
1533 		/* else */
1534 
1535 		/* search for the first non-boring record on the page */
1536 		offsets_rec = dict_stats_scan_page(
1537 			&rec, offsets1, offsets2, index, page, n_prefix,
1538 			0, n_diff, NULL);
1539 
1540 		/* pages on level > 0 are not allowed to be empty */
1541 		ut_a(offsets_rec != NULL);
1542 		/* if page is not empty (offsets_rec != NULL) then n_diff must
1543 		be > 0, otherwise there is a bug in dict_stats_scan_page() */
1544 		ut_a(*n_diff > 0);
1545 
1546 		if (*n_diff == 1) {
1547 			mtr_commit(&mtr);
1548 
1549 			/* page has all keys equal and the end of the page
1550 			was reached by dict_stats_scan_page(), no need to
1551 			descend to the leaf level */
1552 			mem_heap_free(heap);
1553 			/* can't get an estimate for n_external_pages here
1554 			because we do not dive to the leaf level, assume no
1555 			external pages (*n_external_pages was assigned to 0
1556 			above). */
1557 			return;
1558 		}
1559 		/* else */
1560 
1561 		/* when we instruct dict_stats_scan_page() to quit on the
1562 		first non-boring record it finds, then the returned n_diff
1563 		can either be 0 (empty page), 1 (page has all keys equal) or
1564 		2 (non-boring record was found) */
1565 		ut_a(*n_diff == 2);
1566 
1567 		/* we have a non-boring record in rec, descend below it */
1568 
1569 		page_id.set_page_no(
1570 			btr_node_ptr_get_child_page_no(rec, offsets_rec));
1571 	}
1572 
1573 	/* make sure we got a leaf page as a result from the above loop */
1574 	ut_ad(page_is_leaf(page));
1575 
1576 	/* scan the leaf page and find the number of distinct keys,
1577 	when looking only at the first n_prefix columns; also estimate
1578 	the number of externally stored pages pointed by records on this
1579 	page */
1580 
1581 	offsets_rec = dict_stats_scan_page(
1582 		&rec, offsets1, offsets2, index, page, n_prefix,
1583 		index->n_core_fields, n_diff,
1584 		n_external_pages);
1585 
1586 #if 0
1587 	DEBUG_PRINTF("      %s(): n_diff below page_no=%lu: " UINT64PF "\n",
1588 		     __func__, page_no, n_diff);
1589 #endif
1590 
1591 	mtr_commit(&mtr);
1592 	mem_heap_free(heap);
1593 }
1594 
1595 /** Input data that is used to calculate dict_index_t::stat_n_diff_key_vals[]
1596 for each n-columns prefix (n from 1 to n_uniq). */
1597 struct n_diff_data_t {
1598 	/** Index of the level on which the descent through the btree
1599 	stopped. level 0 is the leaf level. This is >= 1 because we
1600 	avoid scanning the leaf level because it may contain too many
1601 	pages and doing so is useless when combined with the random dives -
1602 	if we are to scan the leaf level, this means a full scan and we can
1603 	simply do that instead of fiddling with picking random records higher
1604 	in the tree and to dive below them. At the start of the analyzing
1605 	we may decide to do full scan of the leaf level, but then this
1606 	structure is not used in that code path. */
1607 	ulint		level;
1608 
1609 	/** Number of records on the level where the descend through the btree
1610 	stopped. When we scan the btree from the root, we stop at some mid
1611 	level, choose some records from it and dive below them towards a leaf
1612 	page to analyze. */
1613 	ib_uint64_t	n_recs_on_level;
1614 
1615 	/** Number of different key values that were found on the mid level. */
1616 	ib_uint64_t	n_diff_on_level;
1617 
1618 	/** Number of leaf pages that are analyzed. This is also the same as
1619 	the number of records that we pick from the mid level and dive below
1620 	them. */
1621 	ib_uint64_t	n_leaf_pages_to_analyze;
1622 
1623 	/** Cumulative sum of the number of different key values that were
1624 	found on all analyzed pages. */
1625 	ib_uint64_t	n_diff_all_analyzed_pages;
1626 
1627 	/** Cumulative sum of the number of external pages (stored outside of
1628 	the btree but in the same file segment). */
1629 	ib_uint64_t	n_external_pages_sum;
1630 };
1631 
1632 /** Estimate the number of different key values in an index when looking at
1633 the first n_prefix columns. For a given level in an index select
1634 n_diff_data->n_leaf_pages_to_analyze records from that level and dive below
1635 them to the corresponding leaf pages, then scan those leaf pages and save the
1636 sampling results in n_diff_data->n_diff_all_analyzed_pages.
1637 @param[in]	index			index
1638 @param[in]	n_prefix		look at first 'n_prefix' columns when
1639 comparing records
1640 @param[in]	boundaries		a vector that contains
1641 n_diff_data->n_diff_on_level integers each of which represents the index (on
1642 level 'level', counting from left/smallest to right/biggest from 0) of the
1643 last record from each group of distinct keys
1644 @param[in,out]	n_diff_data		n_diff_all_analyzed_pages and
1645 n_external_pages_sum in this structure will be set by this function. The
1646 members level, n_diff_on_level and n_leaf_pages_to_analyze must be set by the
1647 caller in advance - they are used by some calculations inside this function
1648 @param[in,out]	mtr			mini-transaction */
1649 static
1650 void
dict_stats_analyze_index_for_n_prefix(dict_index_t * index,ulint n_prefix,const boundaries_t * boundaries,n_diff_data_t * n_diff_data,mtr_t * mtr)1651 dict_stats_analyze_index_for_n_prefix(
1652 	dict_index_t*		index,
1653 	ulint			n_prefix,
1654 	const boundaries_t*	boundaries,
1655 	n_diff_data_t*		n_diff_data,
1656 	mtr_t*			mtr)
1657 {
1658 	btr_pcur_t	pcur;
1659 	const page_t*	page;
1660 	ib_uint64_t	rec_idx;
1661 	ib_uint64_t	i;
1662 
1663 #if 0
1664 	DEBUG_PRINTF("    %s(table=%s, index=%s, level=%lu, n_prefix=%lu,"
1665 		     " n_diff_on_level=" UINT64PF ")\n",
1666 		     __func__, index->table->name, index->name, level,
1667 		     n_prefix, n_diff_data->n_diff_on_level);
1668 #endif
1669 
1670 	ut_ad(mtr->memo_contains(index->lock, MTR_MEMO_SX_LOCK));
1671 
1672 	/* Position pcur on the leftmost record on the leftmost page
1673 	on the desired level. */
1674 
1675 	btr_pcur_open_at_index_side(
1676 		true, index, BTR_SEARCH_TREE_ALREADY_S_LATCHED,
1677 		&pcur, true, n_diff_data->level, mtr);
1678 	btr_pcur_move_to_next_on_page(&pcur);
1679 
1680 	page = btr_pcur_get_page(&pcur);
1681 
1682 	const rec_t*	first_rec = btr_pcur_get_rec(&pcur);
1683 
1684 	/* We shouldn't be scanning the leaf level. The caller of this function
1685 	should have stopped the descend on level 1 or higher. */
1686 	ut_ad(n_diff_data->level > 0);
1687 	ut_ad(!page_is_leaf(page));
1688 
1689 	/* The page must not be empty, except when
1690 	it is the root page (and the whole index is empty). */
1691 	ut_ad(btr_pcur_is_on_user_rec(&pcur));
1692 	ut_ad(first_rec == page_rec_get_next_const(page_get_infimum_rec(page)));
1693 
1694 	/* check that we are indeed on the desired level */
1695 	ut_a(btr_page_get_level(page) == n_diff_data->level);
1696 
1697 	/* there should not be any pages on the left */
1698 	ut_a(!page_has_prev(page));
1699 
1700 	/* check whether the first record on the leftmost page is marked
1701 	as such; we are on a non-leaf level */
1702 	ut_a(rec_get_info_bits(first_rec, page_is_comp(page))
1703 	     & REC_INFO_MIN_REC_FLAG);
1704 
1705 	const ib_uint64_t	last_idx_on_level = boundaries->at(
1706 		static_cast<unsigned>(n_diff_data->n_diff_on_level - 1));
1707 
1708 	rec_idx = 0;
1709 
1710 	n_diff_data->n_diff_all_analyzed_pages = 0;
1711 	n_diff_data->n_external_pages_sum = 0;
1712 
1713 	for (i = 0; i < n_diff_data->n_leaf_pages_to_analyze; i++) {
1714 		/* there are n_diff_on_level elements
1715 		in 'boundaries' and we divide those elements
1716 		into n_leaf_pages_to_analyze segments, for example:
1717 
1718 		let n_diff_on_level=100, n_leaf_pages_to_analyze=4, then:
1719 		segment i=0:  [0, 24]
1720 		segment i=1: [25, 49]
1721 		segment i=2: [50, 74]
1722 		segment i=3: [75, 99] or
1723 
1724 		let n_diff_on_level=1, n_leaf_pages_to_analyze=1, then:
1725 		segment i=0: [0, 0] or
1726 
1727 		let n_diff_on_level=2, n_leaf_pages_to_analyze=2, then:
1728 		segment i=0: [0, 0]
1729 		segment i=1: [1, 1] or
1730 
1731 		let n_diff_on_level=13, n_leaf_pages_to_analyze=7, then:
1732 		segment i=0:  [0,  0]
1733 		segment i=1:  [1,  2]
1734 		segment i=2:  [3,  4]
1735 		segment i=3:  [5,  6]
1736 		segment i=4:  [7,  8]
1737 		segment i=5:  [9, 10]
1738 		segment i=6: [11, 12]
1739 
1740 		then we select a random record from each segment and dive
1741 		below it */
1742 		const ib_uint64_t	n_diff = n_diff_data->n_diff_on_level;
1743 		const ib_uint64_t	n_pick
1744 			= n_diff_data->n_leaf_pages_to_analyze;
1745 
1746 		const ib_uint64_t	left = n_diff * i / n_pick;
1747 		const ib_uint64_t	right = n_diff * (i + 1) / n_pick - 1;
1748 
1749 		ut_a(left <= right);
1750 		ut_a(right <= last_idx_on_level);
1751 
1752 		const ulint	rnd = ut_rnd_interval(
1753 			static_cast<ulint>(right - left));
1754 
1755 		const ib_uint64_t	dive_below_idx
1756 			= boundaries->at(static_cast<unsigned>(left + rnd));
1757 
1758 #if 0
1759 		DEBUG_PRINTF("    %s(): dive below record with index="
1760 			     UINT64PF "\n", __func__, dive_below_idx);
1761 #endif
1762 
1763 		/* seek to the record with index dive_below_idx */
1764 		while (rec_idx < dive_below_idx
1765 		       && btr_pcur_is_on_user_rec(&pcur)) {
1766 
1767 			btr_pcur_move_to_next_user_rec(&pcur, mtr);
1768 			rec_idx++;
1769 		}
1770 
1771 		/* if the level has finished before the record we are
1772 		searching for, this means that the B-tree has changed in
1773 		the meantime, quit our sampling and use whatever stats
1774 		we have collected so far */
1775 		if (rec_idx < dive_below_idx) {
1776 
1777 			ut_ad(!btr_pcur_is_on_user_rec(&pcur));
1778 			break;
1779 		}
1780 
1781 		/* it could be that the tree has changed in such a way that
1782 		the record under dive_below_idx is the supremum record, in
1783 		this case rec_idx == dive_below_idx and pcur is positioned
1784 		on the supremum, we do not want to dive below it */
1785 		if (!btr_pcur_is_on_user_rec(&pcur)) {
1786 			break;
1787 		}
1788 
1789 		ut_a(rec_idx == dive_below_idx);
1790 
1791 		ib_uint64_t	n_diff_on_leaf_page;
1792 		ib_uint64_t	n_external_pages;
1793 
1794 		dict_stats_analyze_index_below_cur(btr_pcur_get_btr_cur(&pcur),
1795 						   n_prefix,
1796 						   &n_diff_on_leaf_page,
1797 						   &n_external_pages);
1798 
1799 		/* We adjust n_diff_on_leaf_page here to avoid counting
1800 		one value twice - once as the last on some page and once
1801 		as the first on another page. Consider the following example:
1802 		Leaf level:
1803 		page: (2,2,2,2,3,3)
1804 		... many pages like (3,3,3,3,3,3) ...
1805 		page: (3,3,3,3,5,5)
1806 		... many pages like (5,5,5,5,5,5) ...
1807 		page: (5,5,5,5,8,8)
1808 		page: (8,8,8,8,9,9)
1809 		our algo would (correctly) get an estimate that there are
1810 		2 distinct records per page (average). Having 4 pages below
1811 		non-boring records, it would (wrongly) estimate the number
1812 		of distinct records to 8. */
1813 		if (n_diff_on_leaf_page > 0) {
1814 			n_diff_on_leaf_page--;
1815 		}
1816 
1817 		n_diff_data->n_diff_all_analyzed_pages += n_diff_on_leaf_page;
1818 
1819 		n_diff_data->n_external_pages_sum += n_external_pages;
1820 	}
1821 
1822 	btr_pcur_close(&pcur);
1823 }
1824 
1825 /** statistics for an index */
1826 struct index_stats_t
1827 {
1828   std::vector<index_field_stats_t> stats;
1829   ulint index_size;
1830   ulint n_leaf_pages;
1831 
index_stats_tindex_stats_t1832   index_stats_t(ulint n_uniq) : index_size(1), n_leaf_pages(1)
1833   {
1834     stats.reserve(n_uniq);
1835     for (ulint i= 0; i < n_uniq; ++i)
1836       stats.push_back(index_field_stats_t(0, 1, 0));
1837   }
1838 };
1839 
1840 /** Set dict_index_t::stat_n_diff_key_vals[] and stat_n_sample_sizes[].
1841 @param[in]	n_diff_data	input data to use to derive the results
1842 @param[in,out]	index_stats	index stats to set */
1843 UNIV_INLINE
1844 void
dict_stats_index_set_n_diff(const n_diff_data_t * n_diff_data,index_stats_t & index_stats)1845 dict_stats_index_set_n_diff(
1846 	const n_diff_data_t*	n_diff_data,
1847 	index_stats_t&		index_stats)
1848 {
1849 	for (ulint n_prefix = index_stats.stats.size();
1850 	     n_prefix >= 1;
1851 	     n_prefix--) {
1852 		/* n_diff_all_analyzed_pages can be 0 here if
1853 		all the leaf pages sampled contained only
1854 		delete-marked records. In this case we should assign
1855 		0 to index->stat_n_diff_key_vals[n_prefix - 1], which
1856 		the formula below does. */
1857 
1858 		const n_diff_data_t*	data = &n_diff_data[n_prefix - 1];
1859 
1860 		ut_ad(data->n_leaf_pages_to_analyze > 0);
1861 		ut_ad(data->n_recs_on_level > 0);
1862 
1863 		ib_uint64_t	n_ordinary_leaf_pages;
1864 
1865 		if (data->level == 1) {
1866 			/* If we know the number of records on level 1, then
1867 			this number is the same as the number of pages on
1868 			level 0 (leaf). */
1869 			n_ordinary_leaf_pages = data->n_recs_on_level;
1870 		} else {
1871 			/* If we analyzed D ordinary leaf pages and found E
1872 			external pages in total linked from those D ordinary
1873 			leaf pages, then this means that the ratio
1874 			ordinary/external is D/E. Then the ratio ordinary/total
1875 			is D / (D + E). Knowing that the total number of pages
1876 			is T (including ordinary and external) then we estimate
1877 			that the total number of ordinary leaf pages is
1878 			T * D / (D + E). */
1879 			n_ordinary_leaf_pages
1880 				= index_stats.n_leaf_pages
1881 				* data->n_leaf_pages_to_analyze
1882 				/ (data->n_leaf_pages_to_analyze
1883 				   + data->n_external_pages_sum);
1884 		}
1885 
1886 		/* See REF01 for an explanation of the algorithm */
1887 		index_stats.stats[n_prefix - 1].n_diff_key_vals
1888 			= n_ordinary_leaf_pages
1889 
1890 			* data->n_diff_on_level
1891 			/ data->n_recs_on_level
1892 
1893 			* data->n_diff_all_analyzed_pages
1894 			/ data->n_leaf_pages_to_analyze;
1895 
1896 		index_stats.stats[n_prefix - 1].n_sample_sizes
1897 			= data->n_leaf_pages_to_analyze;
1898 
1899 		DEBUG_PRINTF("    %s(): n_diff=" UINT64PF
1900 			     " for n_prefix=" ULINTPF
1901 			     " (" ULINTPF
1902 			     " * " UINT64PF " / " UINT64PF
1903 			     " * " UINT64PF " / " UINT64PF ")\n",
1904 			     __func__,
1905 			     index_stats.stats[n_prefix - 1].n_diff_key_vals,
1906 			     n_prefix,
1907 			     index_stats.n_leaf_pages,
1908 			     data->n_diff_on_level,
1909 			     data->n_recs_on_level,
1910 			     data->n_diff_all_analyzed_pages,
1911 			     data->n_leaf_pages_to_analyze);
1912 	}
1913 }
1914 
1915 /** Calculates new statistics for a given index and saves them to the index
1916 members stat_n_diff_key_vals[], stat_n_sample_sizes[], stat_index_size and
1917 stat_n_leaf_pages. This function can be slow.
1918 @param[in]	index	index to analyze
1919 @return index stats */
dict_stats_analyze_index(dict_index_t * index)1920 static index_stats_t dict_stats_analyze_index(dict_index_t* index)
1921 {
1922 	ulint		root_level;
1923 	ulint		level;
1924 	bool		level_is_analyzed;
1925 	ulint		n_uniq;
1926 	ulint		n_prefix;
1927 	ib_uint64_t	total_recs;
1928 	ib_uint64_t	total_pages;
1929 	mtr_t		mtr;
1930 	ulint		size;
1931 	index_stats_t	result(index->n_uniq);
1932 	DBUG_ENTER("dict_stats_analyze_index");
1933 
1934 	DBUG_PRINT("info", ("index: %s, online status: %d", index->name(),
1935 			    dict_index_get_online_status(index)));
1936 
1937 	ut_ad(!mutex_own(&dict_sys.mutex)); // because this function is slow
1938 	ut_ad(index->table->get_ref_count());
1939 
1940 	/* Disable update statistic for Rtree */
1941 	if (dict_index_is_spatial(index)) {
1942 		DBUG_RETURN(result);
1943 	}
1944 
1945 	DEBUG_PRINTF("  %s(index=%s)\n", __func__, index->name());
1946 
1947 	mtr.start();
1948 	mtr_s_lock_index(index, &mtr);
1949 	size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
1950 
1951 	if (size != ULINT_UNDEFINED) {
1952 		result.index_size = size;
1953 		size = btr_get_size(index, BTR_N_LEAF_PAGES, &mtr);
1954 	}
1955 
1956 	/* Release the X locks on the root page taken by btr_get_size() */
1957 	mtr.commit();
1958 
1959 	switch (size) {
1960 	case ULINT_UNDEFINED:
1961 		dict_stats_assert_initialized_index(index);
1962 		DBUG_RETURN(result);
1963 	case 0:
1964 		/* The root node of the tree is a leaf */
1965 		size = 1;
1966 	}
1967 
1968 	result.n_leaf_pages = size;
1969 
1970 	mtr.start();
1971 	mtr_sx_lock_index(index, &mtr);
1972 	root_level = btr_height_get(index, &mtr);
1973 
1974 	n_uniq = dict_index_get_n_unique(index);
1975 
1976 	/* If the tree has just one level (and one page) or if the user
1977 	has requested to sample too many pages then do full scan.
1978 
1979 	For each n-column prefix (for n=1..n_uniq) N_SAMPLE_PAGES(index)
1980 	will be sampled, so in total N_SAMPLE_PAGES(index) * n_uniq leaf
1981 	pages will be sampled. If that number is bigger than the total
1982 	number of leaf pages then do full scan of the leaf level instead
1983 	since it will be faster and will give better results. */
1984 
1985 	if (root_level == 0
1986 	    || N_SAMPLE_PAGES(index) * n_uniq > result.n_leaf_pages) {
1987 
1988 		if (root_level == 0) {
1989 			DEBUG_PRINTF("  %s(): just one page,"
1990 				     " doing full scan\n", __func__);
1991 		} else {
1992 			DEBUG_PRINTF("  %s(): too many pages requested for"
1993 				     " sampling, doing full scan\n", __func__);
1994 		}
1995 
1996 		/* do full scan of level 0; save results directly
1997 		into the index */
1998 
1999 		dict_stats_analyze_index_level(index,
2000 					       0 /* leaf level */,
2001 					       index->stat_n_diff_key_vals,
2002 					       &total_recs,
2003 					       &total_pages,
2004 					       NULL /* boundaries not needed */,
2005 					       &mtr);
2006 
2007 		mtr.commit();
2008 
2009 		mutex_enter(&dict_sys.mutex);
2010 		for (ulint i = 0; i < n_uniq; i++) {
2011 			result.stats[i].n_diff_key_vals = index->stat_n_diff_key_vals[i];
2012 			result.stats[i].n_sample_sizes = total_pages;
2013 			result.stats[i].n_non_null_key_vals = index->stat_n_non_null_key_vals[i];
2014 		}
2015 		result.n_leaf_pages = index->stat_n_leaf_pages;
2016 		mutex_exit(&dict_sys.mutex);
2017 
2018 		DBUG_RETURN(result);
2019 	}
2020 
2021 	/* For each level that is being scanned in the btree, this contains the
2022 	number of different key values for all possible n-column prefixes. */
2023 	ib_uint64_t*	n_diff_on_level = UT_NEW_ARRAY(
2024 		ib_uint64_t, n_uniq, mem_key_dict_stats_n_diff_on_level);
2025 
2026 	/* For each level that is being scanned in the btree, this contains the
2027 	index of the last record from each group of equal records (when
2028 	comparing only the first n columns, n=1..n_uniq). */
2029 	boundaries_t*	n_diff_boundaries = UT_NEW_ARRAY_NOKEY(boundaries_t,
2030 							       n_uniq);
2031 
2032 	/* For each n-column prefix this array contains the input data that is
2033 	used to calculate dict_index_t::stat_n_diff_key_vals[]. */
2034 	n_diff_data_t*	n_diff_data = UT_NEW_ARRAY_NOKEY(n_diff_data_t, n_uniq);
2035 
2036 	/* total_recs is also used to estimate the number of pages on one
2037 	level below, so at the start we have 1 page (the root) */
2038 	total_recs = 1;
2039 
2040 	/* Here we use the following optimization:
2041 	If we find that level L is the first one (searching from the
2042 	root) that contains at least D distinct keys when looking at
2043 	the first n_prefix columns, then:
2044 	if we look at the first n_prefix-1 columns then the first
2045 	level that contains D distinct keys will be either L or a
2046 	lower one.
2047 	So if we find that the first level containing D distinct
2048 	keys (on n_prefix columns) is L, we continue from L when
2049 	searching for D distinct keys on n_prefix-1 columns. */
2050 	level = root_level;
2051 	level_is_analyzed = false;
2052 
2053 	for (n_prefix = n_uniq; n_prefix >= 1; n_prefix--) {
2054 
2055 		DEBUG_PRINTF("  %s(): searching level with >=%llu "
2056 			     "distinct records, n_prefix=" ULINTPF "\n",
2057 			     __func__, N_DIFF_REQUIRED(index), n_prefix);
2058 
2059 		/* Commit the mtr to release the tree S lock to allow
2060 		other threads to do some work too. */
2061 		mtr.commit();
2062 		mtr.start();
2063 		mtr_sx_lock_index(index, &mtr);
2064 		if (root_level != btr_height_get(index, &mtr)) {
2065 			/* Just quit if the tree has changed beyond
2066 			recognition here. The old stats from previous
2067 			runs will remain in the values that we have
2068 			not calculated yet. Initially when the index
2069 			object is created the stats members are given
2070 			some sensible values so leaving them untouched
2071 			here even the first time will not cause us to
2072 			read uninitialized memory later. */
2073 			break;
2074 		}
2075 
2076 		/* check whether we should pick the current level;
2077 		we pick level 1 even if it does not have enough
2078 		distinct records because we do not want to scan the
2079 		leaf level because it may contain too many records */
2080 		if (level_is_analyzed
2081 		    && (n_diff_on_level[n_prefix - 1] >= N_DIFF_REQUIRED(index)
2082 			|| level == 1)) {
2083 
2084 			goto found_level;
2085 		}
2086 
2087 		/* search for a level that contains enough distinct records */
2088 
2089 		if (level_is_analyzed && level > 1) {
2090 
2091 			/* if this does not hold we should be on
2092 			"found_level" instead of here */
2093 			ut_ad(n_diff_on_level[n_prefix - 1]
2094 			      < N_DIFF_REQUIRED(index));
2095 
2096 			level--;
2097 			level_is_analyzed = false;
2098 		}
2099 
2100 		/* descend into the tree, searching for "good enough" level */
2101 		for (;;) {
2102 
2103 			/* make sure we do not scan the leaf level
2104 			accidentally, it may contain too many pages */
2105 			ut_ad(level > 0);
2106 
2107 			/* scanning the same level twice is an optimization
2108 			bug */
2109 			ut_ad(!level_is_analyzed);
2110 
2111 			/* Do not scan if this would read too many pages.
2112 			Here we use the following fact:
2113 			the number of pages on level L equals the number
2114 			of records on level L+1, thus we deduce that the
2115 			following call would scan total_recs pages, because
2116 			total_recs is left from the previous iteration when
2117 			we scanned one level upper or we have not scanned any
2118 			levels yet in which case total_recs is 1. */
2119 			if (total_recs > N_SAMPLE_PAGES(index)) {
2120 
2121 				/* if the above cond is true then we are
2122 				not at the root level since on the root
2123 				level total_recs == 1 (set before we
2124 				enter the n-prefix loop) and cannot
2125 				be > N_SAMPLE_PAGES(index) */
2126 				ut_a(level != root_level);
2127 
2128 				/* step one level back and be satisfied with
2129 				whatever it contains */
2130 				level++;
2131 				level_is_analyzed = true;
2132 
2133 				break;
2134 			}
2135 
2136 			dict_stats_analyze_index_level(index,
2137 						       level,
2138 						       n_diff_on_level,
2139 						       &total_recs,
2140 						       &total_pages,
2141 						       n_diff_boundaries,
2142 						       &mtr);
2143 
2144 			level_is_analyzed = true;
2145 
2146 			if (level == 1
2147 			    || n_diff_on_level[n_prefix - 1]
2148 			    >= N_DIFF_REQUIRED(index)) {
2149 				/* we have reached the last level we could scan
2150 				or we found a good level with many distinct
2151 				records */
2152 				break;
2153 			}
2154 
2155 			level--;
2156 			level_is_analyzed = false;
2157 		}
2158 found_level:
2159 
2160 		DEBUG_PRINTF("  %s(): found level " ULINTPF
2161 			     " that has " UINT64PF
2162 			     " distinct records for n_prefix=" ULINTPF "\n",
2163 			     __func__, level, n_diff_on_level[n_prefix - 1],
2164 			     n_prefix);
2165 		/* here we are either on level 1 or the level that we are on
2166 		contains >= N_DIFF_REQUIRED distinct keys or we did not scan
2167 		deeper levels because they would contain too many pages */
2168 
2169 		ut_ad(level > 0);
2170 
2171 		ut_ad(level_is_analyzed);
2172 
2173 		/* if any of these is 0 then there is exactly one page in the
2174 		B-tree and it is empty and we should have done full scan and
2175 		should not be here */
2176 		ut_ad(total_recs > 0);
2177 		ut_ad(n_diff_on_level[n_prefix - 1] > 0);
2178 
2179 		ut_ad(N_SAMPLE_PAGES(index) > 0);
2180 
2181 		n_diff_data_t*	data = &n_diff_data[n_prefix - 1];
2182 
2183 		data->level = level;
2184 
2185 		data->n_recs_on_level = total_recs;
2186 
2187 		data->n_diff_on_level = n_diff_on_level[n_prefix - 1];
2188 
2189 		data->n_leaf_pages_to_analyze = std::min(
2190 			N_SAMPLE_PAGES(index),
2191 			n_diff_on_level[n_prefix - 1]);
2192 
2193 		/* pick some records from this level and dive below them for
2194 		the given n_prefix */
2195 
2196 		dict_stats_analyze_index_for_n_prefix(
2197 			index, n_prefix, &n_diff_boundaries[n_prefix - 1],
2198 			data, &mtr);
2199 	}
2200 
2201 	mtr.commit();
2202 
2203 	UT_DELETE_ARRAY(n_diff_boundaries);
2204 
2205 	UT_DELETE_ARRAY(n_diff_on_level);
2206 
2207 	/* n_prefix == 0 means that the above loop did not end up prematurely
2208 	due to tree being changed and so n_diff_data[] is set up. */
2209 	if (n_prefix == 0) {
2210 		dict_stats_index_set_n_diff(n_diff_data, result);
2211 	}
2212 
2213 	UT_DELETE_ARRAY(n_diff_data);
2214 
2215 	DBUG_RETURN(result);
2216 }
2217 
2218 /*********************************************************************//**
2219 Calculates new estimates for table and index statistics. This function
2220 is relatively slow and is used to calculate persistent statistics that
2221 will be saved on disk.
2222 @return DB_SUCCESS or error code */
2223 static
2224 dberr_t
dict_stats_update_persistent(dict_table_t * table)2225 dict_stats_update_persistent(
2226 /*=========================*/
2227 	dict_table_t*	table)		/*!< in/out: table */
2228 {
2229 	dict_index_t*	index;
2230 
2231 	DEBUG_PRINTF("%s(table=%s)\n", __func__, table->name);
2232 
2233 	DEBUG_SYNC_C("dict_stats_update_persistent");
2234 
2235 	/* analyze the clustered index first */
2236 
2237 	index = dict_table_get_first_index(table);
2238 
2239 	if (index == NULL
2240 	    || index->is_corrupted()
2241 	    || (index->type | DICT_UNIQUE) != (DICT_CLUSTERED | DICT_UNIQUE)) {
2242 
2243 		/* Table definition is corrupt */
2244 		dict_stats_empty_table(table, true);
2245 
2246 		return(DB_CORRUPTION);
2247 	}
2248 
2249 	ut_ad(!dict_index_is_ibuf(index));
2250 	mutex_enter(&dict_sys.mutex);
2251 	dict_stats_empty_index(index, false);
2252 	mutex_exit(&dict_sys.mutex);
2253 
2254 	index_stats_t stats = dict_stats_analyze_index(index);
2255 
2256 	mutex_enter(&dict_sys.mutex);
2257 	index->stat_index_size = stats.index_size;
2258 	index->stat_n_leaf_pages = stats.n_leaf_pages;
2259 	for (size_t i = 0; i < stats.stats.size(); ++i) {
2260 		index->stat_n_diff_key_vals[i] = stats.stats[i].n_diff_key_vals;
2261 		index->stat_n_sample_sizes[i] = stats.stats[i].n_sample_sizes;
2262 		index->stat_n_non_null_key_vals[i] = stats.stats[i].n_non_null_key_vals;
2263 	}
2264 
2265 	ulint	n_unique = dict_index_get_n_unique(index);
2266 
2267 	table->stat_n_rows = index->stat_n_diff_key_vals[n_unique - 1];
2268 
2269 	table->stat_clustered_index_size = index->stat_index_size;
2270 
2271 	/* analyze other indexes from the table, if any */
2272 
2273 	table->stat_sum_of_other_index_sizes = 0;
2274 
2275 	for (index = dict_table_get_next_index(index);
2276 	     index != NULL;
2277 	     index = dict_table_get_next_index(index)) {
2278 
2279 		ut_ad(!dict_index_is_ibuf(index));
2280 
2281 		if (index->type & (DICT_FTS | DICT_SPATIAL)) {
2282 			continue;
2283 		}
2284 
2285 		dict_stats_empty_index(index, false);
2286 
2287 		if (dict_stats_should_ignore_index(index)) {
2288 			continue;
2289 		}
2290 
2291 		if (!(table->stats_bg_flag & BG_STAT_SHOULD_QUIT)) {
2292 			mutex_exit(&dict_sys.mutex);
2293 			stats = dict_stats_analyze_index(index);
2294 			mutex_enter(&dict_sys.mutex);
2295 
2296 			index->stat_index_size = stats.index_size;
2297 			index->stat_n_leaf_pages = stats.n_leaf_pages;
2298 			for (size_t i = 0; i < stats.stats.size(); ++i) {
2299 				index->stat_n_diff_key_vals[i]
2300 					= stats.stats[i].n_diff_key_vals;
2301 				index->stat_n_sample_sizes[i]
2302 					= stats.stats[i].n_sample_sizes;
2303 				index->stat_n_non_null_key_vals[i]
2304 					= stats.stats[i].n_non_null_key_vals;
2305 			}
2306 		}
2307 
2308 		table->stat_sum_of_other_index_sizes
2309 			+= index->stat_index_size;
2310 	}
2311 
2312 	table->stats_last_recalc = time(NULL);
2313 
2314 	table->stat_modified_counter = 0;
2315 
2316 	table->stat_initialized = TRUE;
2317 
2318 	dict_stats_assert_initialized(table);
2319 
2320 	mutex_exit(&dict_sys.mutex);
2321 
2322 	return(DB_SUCCESS);
2323 }
2324 
2325 #include "mysql_com.h"
2326 /** Save an individual index's statistic into the persistent statistics
2327 storage.
2328 @param[in]	index			index to be updated
2329 @param[in]	last_update		timestamp of the stat
2330 @param[in]	stat_name		name of the stat
2331 @param[in]	stat_value		value of the stat
2332 @param[in]	sample_size		n pages sampled or NULL
2333 @param[in]	stat_description	description of the stat
2334 @param[in,out]	trx			in case of NULL the function will
2335 allocate and free the trx object. If it is not NULL then it will be
2336 rolled back only in the case of error, but not freed.
2337 @return DB_SUCCESS or error code */
2338 dberr_t
dict_stats_save_index_stat(dict_index_t * index,time_t last_update,const char * stat_name,ib_uint64_t stat_value,ib_uint64_t * sample_size,const char * stat_description,trx_t * trx)2339 dict_stats_save_index_stat(
2340 	dict_index_t*	index,
2341 	time_t		last_update,
2342 	const char*	stat_name,
2343 	ib_uint64_t	stat_value,
2344 	ib_uint64_t*	sample_size,
2345 	const char*	stat_description,
2346 	trx_t*		trx)
2347 {
2348 	dberr_t		ret;
2349 	pars_info_t*	pinfo;
2350 	char		db_utf8[MAX_DB_UTF8_LEN];
2351 	char		table_utf8[MAX_TABLE_UTF8_LEN];
2352 
2353 	ut_ad(!trx || trx->internal || trx->mysql_thd);
2354 	ut_d(dict_sys.assert_locked());
2355 
2356 	dict_fs2utf8(index->table->name.m_name, db_utf8, sizeof(db_utf8),
2357 		     table_utf8, sizeof(table_utf8));
2358 
2359 	pinfo = pars_info_create();
2360 	pars_info_add_str_literal(pinfo, "database_name", db_utf8);
2361 	pars_info_add_str_literal(pinfo, "table_name", table_utf8);
2362 	pars_info_add_str_literal(pinfo, "index_name", index->name);
2363 	MEM_CHECK_DEFINED(&last_update, 4);
2364 	pars_info_add_int4_literal(pinfo, "last_update", uint32(last_update));
2365 	MEM_CHECK_DEFINED(stat_name, strlen(stat_name));
2366 	pars_info_add_str_literal(pinfo, "stat_name", stat_name);
2367 	MEM_CHECK_DEFINED(&stat_value, 8);
2368 	pars_info_add_ull_literal(pinfo, "stat_value", stat_value);
2369 	if (sample_size != NULL) {
2370 		MEM_CHECK_DEFINED(sample_size, 8);
2371 		pars_info_add_ull_literal(pinfo, "sample_size", *sample_size);
2372 	} else {
2373 		pars_info_add_literal(pinfo, "sample_size", NULL,
2374 				      UNIV_SQL_NULL, DATA_FIXBINARY, 0);
2375 	}
2376 	pars_info_add_str_literal(pinfo, "stat_description",
2377 				  stat_description);
2378 
2379 	ret = dict_stats_exec_sql(
2380 		pinfo,
2381 		"PROCEDURE INDEX_STATS_SAVE () IS\n"
2382 		"BEGIN\n"
2383 
2384 		"DELETE FROM \"" INDEX_STATS_NAME "\"\n"
2385 		"WHERE\n"
2386 		"database_name = :database_name AND\n"
2387 		"table_name = :table_name AND\n"
2388 		"index_name = :index_name AND\n"
2389 		"stat_name = :stat_name;\n"
2390 
2391 		"INSERT INTO \"" INDEX_STATS_NAME "\"\n"
2392 		"VALUES\n"
2393 		"(\n"
2394 		":database_name,\n"
2395 		":table_name,\n"
2396 		":index_name,\n"
2397 		":last_update,\n"
2398 		":stat_name,\n"
2399 		":stat_value,\n"
2400 		":sample_size,\n"
2401 		":stat_description\n"
2402 		");\n"
2403 		"END;", trx);
2404 
2405 	if (UNIV_UNLIKELY(ret != DB_SUCCESS)) {
2406 		if (innodb_index_stats_not_found == false &&
2407 		    index->stats_error_printed == false) {
2408 		ib::error() << "Cannot save index statistics for table "
2409 			<< index->table->name
2410 			<< ", index " << index->name
2411 			<< ", stat name \"" << stat_name << "\": "
2412 			<< ret;
2413 			index->stats_error_printed = true;
2414 		}
2415 	}
2416 
2417 	return(ret);
2418 }
2419 
2420 /** Report an error if updating table statistics failed because
2421 .ibd file is missing, table decryption failed or table is corrupted.
2422 @param[in,out]	table	Table
2423 @param[in]	defragment	true if statistics is for defragment
2424 @retval DB_DECRYPTION_FAILED if decryption of the table failed
2425 @retval DB_TABLESPACE_DELETED if .ibd file is missing
2426 @retval DB_CORRUPTION if table is marked as corrupted */
2427 dberr_t
dict_stats_report_error(dict_table_t * table,bool defragment)2428 dict_stats_report_error(dict_table_t* table, bool defragment)
2429 {
2430 	dberr_t		err;
2431 
2432 	const char*	df = defragment ? " defragment" : "";
2433 
2434 	if (!table->space) {
2435 		ib::warn() << "Cannot save" << df << " statistics for table "
2436 			   << table->name
2437 			   << " because the .ibd file is missing. "
2438 			   << TROUBLESHOOTING_MSG;
2439 		err = DB_TABLESPACE_DELETED;
2440 	} else {
2441 		ib::warn() << "Cannot save" << df << " statistics for table "
2442 			   << table->name
2443 			   << " because file "
2444 			   << table->space->chain.start->name
2445 			   << (table->corrupted
2446 			       ? " is corrupted."
2447 			       : " cannot be decrypted.");
2448 		err = table->corrupted ? DB_CORRUPTION : DB_DECRYPTION_FAILED;
2449 	}
2450 
2451 	dict_stats_empty_table(table, defragment);
2452 	return err;
2453 }
2454 
2455 /** Save the table's statistics into the persistent statistics storage.
2456 @param[in]	table_orig	table whose stats to save
2457 @param[in]	only_for_index	if this is non-NULL, then stats for indexes
2458 that are not equal to it will not be saved, if NULL, then all indexes' stats
2459 are saved
2460 @return DB_SUCCESS or error code */
2461 static
2462 dberr_t
dict_stats_save(dict_table_t * table_orig,const index_id_t * only_for_index)2463 dict_stats_save(
2464 	dict_table_t*		table_orig,
2465 	const index_id_t*	only_for_index)
2466 {
2467 	pars_info_t*	pinfo;
2468 	dberr_t		ret;
2469 	dict_table_t*	table;
2470 	char		db_utf8[MAX_DB_UTF8_LEN];
2471 	char		table_utf8[MAX_TABLE_UTF8_LEN];
2472 
2473 	if (high_level_read_only) {
2474 		return DB_READ_ONLY;
2475 	}
2476 
2477 	if (!table_orig->is_readable()) {
2478 		return (dict_stats_report_error(table_orig));
2479 	}
2480 
2481 	table = dict_stats_snapshot_create(table_orig);
2482 
2483 	dict_fs2utf8(table->name.m_name, db_utf8, sizeof(db_utf8),
2484 		     table_utf8, sizeof(table_utf8));
2485 
2486 	const time_t now = time(NULL);
2487 	dict_sys_lock();
2488 
2489 	pinfo = pars_info_create();
2490 
2491 	pars_info_add_str_literal(pinfo, "database_name", db_utf8);
2492 	pars_info_add_str_literal(pinfo, "table_name", table_utf8);
2493 	pars_info_add_int4_literal(pinfo, "last_update", uint32(now));
2494 	pars_info_add_ull_literal(pinfo, "n_rows", table->stat_n_rows);
2495 	pars_info_add_ull_literal(pinfo, "clustered_index_size",
2496 		table->stat_clustered_index_size);
2497 	pars_info_add_ull_literal(pinfo, "sum_of_other_index_sizes",
2498 		table->stat_sum_of_other_index_sizes);
2499 
2500 	ret = dict_stats_exec_sql(
2501 		pinfo,
2502 		"PROCEDURE TABLE_STATS_SAVE () IS\n"
2503 		"BEGIN\n"
2504 
2505 		"DELETE FROM \"" TABLE_STATS_NAME "\"\n"
2506 		"WHERE\n"
2507 		"database_name = :database_name AND\n"
2508 		"table_name = :table_name;\n"
2509 
2510 		"INSERT INTO \"" TABLE_STATS_NAME "\"\n"
2511 		"VALUES\n"
2512 		"(\n"
2513 		":database_name,\n"
2514 		":table_name,\n"
2515 		":last_update,\n"
2516 		":n_rows,\n"
2517 		":clustered_index_size,\n"
2518 		":sum_of_other_index_sizes\n"
2519 		");\n"
2520 		"END;", NULL);
2521 
2522 	if (UNIV_UNLIKELY(ret != DB_SUCCESS)) {
2523 		ib::error() << "Cannot save table statistics for table "
2524 			<< table->name << ": " << ret;
2525 func_exit:
2526 		dict_sys_unlock();
2527 		dict_stats_snapshot_free(table);
2528 		return ret;
2529 	}
2530 
2531 	trx_t*	trx = trx_create();
2532 	trx_start_internal(trx);
2533 
2534 	dict_index_t*	index;
2535 	index_map_t	indexes(
2536 		(ut_strcmp_functor()),
2537 		index_map_t_allocator(mem_key_dict_stats_index_map_t));
2538 
2539 	/* Below we do all the modifications in innodb_index_stats in a single
2540 	transaction for performance reasons. Modifying more than one row in a
2541 	single transaction may deadlock with other transactions if they
2542 	lock the rows in different order. Other transaction could be for
2543 	example when we DROP a table and do
2544 	DELETE FROM innodb_index_stats WHERE database_name = '...'
2545 	AND table_name = '...'; which will affect more than one row. To
2546 	prevent deadlocks we always lock the rows in the same order - the
2547 	order of the PK, which is (database_name, table_name, index_name,
2548 	stat_name). This is why below we sort the indexes by name and then
2549 	for each index, do the mods ordered by stat_name. */
2550 
2551 	for (index = dict_table_get_first_index(table);
2552 	     index != NULL;
2553 	     index = dict_table_get_next_index(index)) {
2554 
2555 		indexes[index->name] = index;
2556 	}
2557 
2558 	index_map_t::const_iterator	it;
2559 
2560 	for (it = indexes.begin(); it != indexes.end(); ++it) {
2561 
2562 		index = it->second;
2563 
2564 		if (only_for_index != NULL && index->id != *only_for_index) {
2565 			continue;
2566 		}
2567 
2568 		if (dict_stats_should_ignore_index(index)) {
2569 			continue;
2570 		}
2571 
2572 		ut_ad(!dict_index_is_ibuf(index));
2573 
2574 		for (unsigned i = 0; i < index->n_uniq; i++) {
2575 
2576 			char	stat_name[16];
2577 			char	stat_description[1024];
2578 
2579 			snprintf(stat_name, sizeof(stat_name),
2580 				 "n_diff_pfx%02u", i + 1);
2581 
2582 			/* craft a string that contains the column names */
2583 			snprintf(stat_description, sizeof(stat_description),
2584 				 "%s", index->fields[0].name());
2585 			for (unsigned j = 1; j <= i; j++) {
2586 				size_t	len;
2587 
2588 				len = strlen(stat_description);
2589 
2590 				snprintf(stat_description + len,
2591 					 sizeof(stat_description) - len,
2592 					 ",%s", index->fields[j].name());
2593 			}
2594 
2595 			ret = dict_stats_save_index_stat(
2596 				index, now, stat_name,
2597 				index->stat_n_diff_key_vals[i],
2598 				&index->stat_n_sample_sizes[i],
2599 				stat_description, trx);
2600 
2601 			if (ret != DB_SUCCESS) {
2602 				goto end;
2603 			}
2604 		}
2605 
2606 		ret = dict_stats_save_index_stat(index, now, "n_leaf_pages",
2607 						 index->stat_n_leaf_pages,
2608 						 NULL,
2609 						 "Number of leaf pages "
2610 						 "in the index", trx);
2611 		if (ret != DB_SUCCESS) {
2612 			goto end;
2613 		}
2614 
2615 		ret = dict_stats_save_index_stat(index, now, "size",
2616 						 index->stat_index_size,
2617 						 NULL,
2618 						 "Number of pages "
2619 						 "in the index", trx);
2620 		if (ret != DB_SUCCESS) {
2621 			goto end;
2622 		}
2623 	}
2624 
2625 	trx_commit_for_mysql(trx);
2626 
2627 end:
2628 	trx->free();
2629 	goto func_exit;
2630 }
2631 
2632 /*********************************************************************//**
2633 Called for the row that is selected by
2634 SELECT ... FROM mysql.innodb_table_stats WHERE table='...'
2635 The second argument is a pointer to the table and the fetched stats are
2636 written to it.
2637 @return non-NULL dummy */
2638 static
2639 ibool
dict_stats_fetch_table_stats_step(void * node_void,void * table_void)2640 dict_stats_fetch_table_stats_step(
2641 /*==============================*/
2642 	void*	node_void,	/*!< in: select node */
2643 	void*	table_void)	/*!< out: table */
2644 {
2645 	sel_node_t*	node = (sel_node_t*) node_void;
2646 	dict_table_t*	table = (dict_table_t*) table_void;
2647 	que_common_t*	cnode;
2648 	int		i;
2649 
2650 	/* this should loop exactly 3 times - for
2651 	n_rows,clustered_index_size,sum_of_other_index_sizes */
2652 	for (cnode = static_cast<que_common_t*>(node->select_list), i = 0;
2653 	     cnode != NULL;
2654 	     cnode = static_cast<que_common_t*>(que_node_get_next(cnode)),
2655 	     i++) {
2656 
2657 		const byte*	data;
2658 		dfield_t*	dfield = que_node_get_val(cnode);
2659 		dtype_t*	type = dfield_get_type(dfield);
2660 		ulint		len = dfield_get_len(dfield);
2661 
2662 		data = static_cast<const byte*>(dfield_get_data(dfield));
2663 
2664 		switch (i) {
2665 		case 0: /* mysql.innodb_table_stats.n_rows */
2666 
2667 			ut_a(dtype_get_mtype(type) == DATA_INT);
2668 			ut_a(len == 8);
2669 
2670 			table->stat_n_rows = mach_read_from_8(data);
2671 
2672 			break;
2673 
2674 		case 1: /* mysql.innodb_table_stats.clustered_index_size */
2675 
2676 			ut_a(dtype_get_mtype(type) == DATA_INT);
2677 			ut_a(len == 8);
2678 
2679 			table->stat_clustered_index_size
2680 				= (ulint) mach_read_from_8(data);
2681 
2682 			break;
2683 
2684 		case 2: /* mysql.innodb_table_stats.sum_of_other_index_sizes */
2685 
2686 			ut_a(dtype_get_mtype(type) == DATA_INT);
2687 			ut_a(len == 8);
2688 
2689 			table->stat_sum_of_other_index_sizes
2690 				= (ulint) mach_read_from_8(data);
2691 
2692 			break;
2693 
2694 		default:
2695 
2696 			/* someone changed SELECT
2697 			n_rows,clustered_index_size,sum_of_other_index_sizes
2698 			to select more columns from innodb_table_stats without
2699 			adjusting here */
2700 			ut_error;
2701 		}
2702 	}
2703 
2704 	/* if i < 3 this means someone changed the
2705 	SELECT n_rows,clustered_index_size,sum_of_other_index_sizes
2706 	to select less columns from innodb_table_stats without adjusting here;
2707 	if i > 3 we would have ut_error'ed earlier */
2708 	ut_a(i == 3 /*n_rows,clustered_index_size,sum_of_other_index_sizes*/);
2709 
2710 	/* XXX this is not used but returning non-NULL is necessary */
2711 	return(TRUE);
2712 }
2713 
2714 /** Aux struct used to pass a table and a boolean to
2715 dict_stats_fetch_index_stats_step(). */
2716 struct index_fetch_t {
2717 	dict_table_t*	table;	/*!< table whose indexes are to be modified */
2718 	bool		stats_were_modified; /*!< will be set to true if at
2719 				least one index stats were modified */
2720 };
2721 
2722 /*********************************************************************//**
2723 Called for the rows that are selected by
2724 SELECT ... FROM mysql.innodb_index_stats WHERE table='...'
2725 The second argument is a pointer to the table and the fetched stats are
2726 written to its indexes.
2727 Let a table has N indexes and each index has Ui unique columns for i=1..N,
2728 then mysql.innodb_index_stats will have SUM(Ui) i=1..N rows for that table.
2729 So this function will be called SUM(Ui) times where SUM(Ui) is of magnitude
2730 N*AVG(Ui). In each call it searches for the currently fetched index into
2731 table->indexes linearly, assuming this list is not sorted. Thus, overall,
2732 fetching all indexes' stats from mysql.innodb_index_stats is O(N^2) where N
2733 is the number of indexes.
2734 This can be improved if we sort table->indexes in a temporary area just once
2735 and then search in that sorted list. Then the complexity will be O(N*log(N)).
2736 We assume a table will not have more than 100 indexes, so we go with the
2737 simpler N^2 algorithm.
2738 @return non-NULL dummy */
2739 static
2740 ibool
dict_stats_fetch_index_stats_step(void * node_void,void * arg_void)2741 dict_stats_fetch_index_stats_step(
2742 /*==============================*/
2743 	void*	node_void,	/*!< in: select node */
2744 	void*	arg_void)	/*!< out: table + a flag that tells if we
2745 				modified anything */
2746 {
2747 	sel_node_t*	node = (sel_node_t*) node_void;
2748 	index_fetch_t*	arg = (index_fetch_t*) arg_void;
2749 	dict_table_t*	table = arg->table;
2750 	dict_index_t*	index = NULL;
2751 	que_common_t*	cnode;
2752 	const char*	stat_name = NULL;
2753 	ulint		stat_name_len = ULINT_UNDEFINED;
2754 	ib_uint64_t	stat_value = UINT64_UNDEFINED;
2755 	ib_uint64_t	sample_size = UINT64_UNDEFINED;
2756 	int		i;
2757 
2758 	/* this should loop exactly 4 times - for the columns that
2759 	were selected: index_name,stat_name,stat_value,sample_size */
2760 	for (cnode = static_cast<que_common_t*>(node->select_list), i = 0;
2761 	     cnode != NULL;
2762 	     cnode = static_cast<que_common_t*>(que_node_get_next(cnode)),
2763 	     i++) {
2764 
2765 		const byte*	data;
2766 		dfield_t*	dfield = que_node_get_val(cnode);
2767 		dtype_t*	type = dfield_get_type(dfield);
2768 		ulint		len = dfield_get_len(dfield);
2769 
2770 		data = static_cast<const byte*>(dfield_get_data(dfield));
2771 
2772 		switch (i) {
2773 		case 0: /* mysql.innodb_index_stats.index_name */
2774 
2775 			ut_a(dtype_get_mtype(type) == DATA_VARMYSQL);
2776 
2777 			/* search for index in table's indexes whose name
2778 			matches data; the fetched index name is in data,
2779 			has no terminating '\0' and has length len */
2780 			for (index = dict_table_get_first_index(table);
2781 			     index != NULL;
2782 			     index = dict_table_get_next_index(index)) {
2783 
2784 				if (index->is_committed()
2785 				    && strlen(index->name) == len
2786 				    && memcmp(index->name, data, len) == 0) {
2787 					/* the corresponding index was found */
2788 					break;
2789 				}
2790 			}
2791 
2792 			/* if index is NULL here this means that
2793 			mysql.innodb_index_stats contains more rows than the
2794 			number of indexes in the table; this is ok, we just
2795 			return ignoring those extra rows; in other words
2796 			dict_stats_fetch_index_stats_step() has been called
2797 			for a row from index_stats with unknown index_name
2798 			column */
2799 			if (index == NULL) {
2800 
2801 				return(TRUE);
2802 			}
2803 
2804 			break;
2805 
2806 		case 1: /* mysql.innodb_index_stats.stat_name */
2807 
2808 			ut_a(dtype_get_mtype(type) == DATA_VARMYSQL);
2809 
2810 			ut_a(index != NULL);
2811 
2812 			stat_name = (const char*) data;
2813 			stat_name_len = len;
2814 
2815 			break;
2816 
2817 		case 2: /* mysql.innodb_index_stats.stat_value */
2818 
2819 			ut_a(dtype_get_mtype(type) == DATA_INT);
2820 			ut_a(len == 8);
2821 
2822 			ut_a(index != NULL);
2823 			ut_a(stat_name != NULL);
2824 			ut_a(stat_name_len != ULINT_UNDEFINED);
2825 
2826 			stat_value = mach_read_from_8(data);
2827 
2828 			break;
2829 
2830 		case 3: /* mysql.innodb_index_stats.sample_size */
2831 
2832 			ut_a(dtype_get_mtype(type) == DATA_INT);
2833 			ut_a(len == 8 || len == UNIV_SQL_NULL);
2834 
2835 			ut_a(index != NULL);
2836 			ut_a(stat_name != NULL);
2837 			ut_a(stat_name_len != ULINT_UNDEFINED);
2838 			ut_a(stat_value != UINT64_UNDEFINED);
2839 
2840 			if (len == UNIV_SQL_NULL) {
2841 				break;
2842 			}
2843 			/* else */
2844 
2845 			sample_size = mach_read_from_8(data);
2846 
2847 			break;
2848 
2849 		default:
2850 
2851 			/* someone changed
2852 			SELECT index_name,stat_name,stat_value,sample_size
2853 			to select more columns from innodb_index_stats without
2854 			adjusting here */
2855 			ut_error;
2856 		}
2857 	}
2858 
2859 	/* if i < 4 this means someone changed the
2860 	SELECT index_name,stat_name,stat_value,sample_size
2861 	to select less columns from innodb_index_stats without adjusting here;
2862 	if i > 4 we would have ut_error'ed earlier */
2863 	ut_a(i == 4 /* index_name,stat_name,stat_value,sample_size */);
2864 
2865 	ut_a(index != NULL);
2866 	ut_a(stat_name != NULL);
2867 	ut_a(stat_name_len != ULINT_UNDEFINED);
2868 	ut_a(stat_value != UINT64_UNDEFINED);
2869 	/* sample_size could be UINT64_UNDEFINED here, if it is NULL */
2870 
2871 #define PFX	"n_diff_pfx"
2872 #define PFX_LEN	10
2873 
2874 	if (stat_name_len == 4 /* strlen("size") */
2875 	    && strncasecmp("size", stat_name, stat_name_len) == 0) {
2876 		index->stat_index_size = (ulint) stat_value;
2877 		arg->stats_were_modified = true;
2878 	} else if (stat_name_len == 12 /* strlen("n_leaf_pages") */
2879 		   && strncasecmp("n_leaf_pages", stat_name, stat_name_len)
2880 		   == 0) {
2881 		index->stat_n_leaf_pages = (ulint) stat_value;
2882 		arg->stats_were_modified = true;
2883 	} else if (stat_name_len == 12 /* strlen("n_page_split") */
2884 		   && strncasecmp("n_page_split", stat_name, stat_name_len)
2885 		      == 0) {
2886 		index->stat_defrag_n_page_split = (ulint) stat_value;
2887 		arg->stats_were_modified = true;
2888 	} else if (stat_name_len == 13 /* strlen("n_pages_freed") */
2889 		   && strncasecmp("n_pages_freed", stat_name, stat_name_len)
2890 		      == 0) {
2891 		index->stat_defrag_n_pages_freed = (ulint) stat_value;
2892 		arg->stats_were_modified = true;
2893 	} else if (stat_name_len > PFX_LEN /* e.g. stat_name=="n_diff_pfx01" */
2894 		   && strncasecmp(PFX, stat_name, PFX_LEN) == 0) {
2895 
2896 		const char*	num_ptr;
2897 		unsigned long	n_pfx;
2898 
2899 		/* point num_ptr into "1" from "n_diff_pfx12..." */
2900 		num_ptr = stat_name + PFX_LEN;
2901 
2902 		/* stat_name should have exactly 2 chars appended to PFX
2903 		and they should be digits */
2904 		if (stat_name_len != PFX_LEN + 2
2905 		    || num_ptr[0] < '0' || num_ptr[0] > '9'
2906 		    || num_ptr[1] < '0' || num_ptr[1] > '9') {
2907 
2908 			char	db_utf8[MAX_DB_UTF8_LEN];
2909 			char	table_utf8[MAX_TABLE_UTF8_LEN];
2910 
2911 			dict_fs2utf8(table->name.m_name,
2912 				     db_utf8, sizeof(db_utf8),
2913 				     table_utf8, sizeof(table_utf8));
2914 
2915 			ib::info	out;
2916 			out << "Ignoring strange row from "
2917 				<< INDEX_STATS_NAME_PRINT << " WHERE"
2918 				" database_name = '" << db_utf8
2919 				<< "' AND table_name = '" << table_utf8
2920 				<< "' AND index_name = '" << index->name()
2921 				<< "' AND stat_name = '";
2922 			out.write(stat_name, stat_name_len);
2923 			out << "'; because stat_name is malformed";
2924 			return(TRUE);
2925 		}
2926 		/* else */
2927 
2928 		/* extract 12 from "n_diff_pfx12..." into n_pfx
2929 		note that stat_name does not have a terminating '\0' */
2930 		n_pfx = ulong(num_ptr[0] - '0') * 10 + ulong(num_ptr[1] - '0');
2931 
2932 		ulint	n_uniq = index->n_uniq;
2933 
2934 		if (n_pfx == 0 || n_pfx > n_uniq) {
2935 
2936 			char	db_utf8[MAX_DB_UTF8_LEN];
2937 			char	table_utf8[MAX_TABLE_UTF8_LEN];
2938 
2939 			dict_fs2utf8(table->name.m_name,
2940 				     db_utf8, sizeof(db_utf8),
2941 				     table_utf8, sizeof(table_utf8));
2942 
2943 			ib::info	out;
2944 			out << "Ignoring strange row from "
2945 				<< INDEX_STATS_NAME_PRINT << " WHERE"
2946 				" database_name = '" << db_utf8
2947 				<< "' AND table_name = '" << table_utf8
2948 				<< "' AND index_name = '" << index->name()
2949 				<< "' AND stat_name = '";
2950 			out.write(stat_name, stat_name_len);
2951 			out << "'; because stat_name is out of range, the index"
2952 				" has " << n_uniq << " unique columns";
2953 
2954 			return(TRUE);
2955 		}
2956 		/* else */
2957 
2958 		index->stat_n_diff_key_vals[n_pfx - 1] = stat_value;
2959 
2960 		if (sample_size != UINT64_UNDEFINED) {
2961 			index->stat_n_sample_sizes[n_pfx - 1] = sample_size;
2962 		} else {
2963 			/* hmm, strange... the user must have UPDATEd the
2964 			table manually and SET sample_size = NULL */
2965 			index->stat_n_sample_sizes[n_pfx - 1] = 0;
2966 		}
2967 
2968 		index->stat_n_non_null_key_vals[n_pfx - 1] = 0;
2969 
2970 		arg->stats_were_modified = true;
2971 	} else {
2972 		/* silently ignore rows with unknown stat_name, the
2973 		user may have developed her own stats */
2974 	}
2975 
2976 	/* XXX this is not used but returning non-NULL is necessary */
2977 	return(TRUE);
2978 }
2979 
2980 /*********************************************************************//**
2981 Read table's statistics from the persistent statistics storage.
2982 @return DB_SUCCESS or error code */
2983 static
2984 dberr_t
dict_stats_fetch_from_ps(dict_table_t * table)2985 dict_stats_fetch_from_ps(
2986 /*=====================*/
2987 	dict_table_t*	table)	/*!< in/out: table */
2988 {
2989 	index_fetch_t	index_fetch_arg;
2990 	trx_t*		trx;
2991 	pars_info_t*	pinfo;
2992 	dberr_t		ret;
2993 	char		db_utf8[MAX_DB_UTF8_LEN];
2994 	char		table_utf8[MAX_TABLE_UTF8_LEN];
2995 
2996 	ut_ad(!mutex_own(&dict_sys.mutex));
2997 
2998 	/* Initialize all stats to dummy values before fetching because if
2999 	the persistent storage contains incomplete stats (e.g. missing stats
3000 	for some index) then we would end up with (partially) uninitialized
3001 	stats. */
3002 	dict_stats_empty_table(table, true);
3003 
3004 	trx = trx_create();
3005 
3006 	/* Use 'read-uncommitted' so that the SELECTs we execute
3007 	do not get blocked in case some user has locked the rows we
3008 	are SELECTing */
3009 
3010 	trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
3011 
3012 	if (srv_read_only_mode) {
3013 		trx_start_internal_read_only(trx);
3014 	} else {
3015 		trx_start_internal(trx);
3016 	}
3017 
3018 	dict_fs2utf8(table->name.m_name, db_utf8, sizeof(db_utf8),
3019 		     table_utf8, sizeof(table_utf8));
3020 
3021 	pinfo = pars_info_create();
3022 
3023 	pars_info_add_str_literal(pinfo, "database_name", db_utf8);
3024 
3025 	pars_info_add_str_literal(pinfo, "table_name", table_utf8);
3026 
3027 	pars_info_bind_function(pinfo,
3028 			       "fetch_table_stats_step",
3029 			       dict_stats_fetch_table_stats_step,
3030 			       table);
3031 
3032 	index_fetch_arg.table = table;
3033 	index_fetch_arg.stats_were_modified = false;
3034 	pars_info_bind_function(pinfo,
3035 			        "fetch_index_stats_step",
3036 			        dict_stats_fetch_index_stats_step,
3037 			        &index_fetch_arg);
3038 
3039 	ret = que_eval_sql(pinfo,
3040 			   "PROCEDURE FETCH_STATS () IS\n"
3041 			   "found INT;\n"
3042 			   "DECLARE FUNCTION fetch_table_stats_step;\n"
3043 			   "DECLARE FUNCTION fetch_index_stats_step;\n"
3044 			   "DECLARE CURSOR table_stats_cur IS\n"
3045 			   "  SELECT\n"
3046 			   /* if you change the selected fields, be
3047 			   sure to adjust
3048 			   dict_stats_fetch_table_stats_step() */
3049 			   "  n_rows,\n"
3050 			   "  clustered_index_size,\n"
3051 			   "  sum_of_other_index_sizes\n"
3052 			   "  FROM \"" TABLE_STATS_NAME "\"\n"
3053 			   "  WHERE\n"
3054 			   "  database_name = :database_name AND\n"
3055 			   "  table_name = :table_name;\n"
3056 			   "DECLARE CURSOR index_stats_cur IS\n"
3057 			   "  SELECT\n"
3058 			   /* if you change the selected fields, be
3059 			   sure to adjust
3060 			   dict_stats_fetch_index_stats_step() */
3061 			   "  index_name,\n"
3062 			   "  stat_name,\n"
3063 			   "  stat_value,\n"
3064 			   "  sample_size\n"
3065 			   "  FROM \"" INDEX_STATS_NAME "\"\n"
3066 			   "  WHERE\n"
3067 			   "  database_name = :database_name AND\n"
3068 			   "  table_name = :table_name;\n"
3069 
3070 			   "BEGIN\n"
3071 
3072 			   "OPEN table_stats_cur;\n"
3073 			   "FETCH table_stats_cur INTO\n"
3074 			   "  fetch_table_stats_step();\n"
3075 			   "IF (SQL % NOTFOUND) THEN\n"
3076 			   "  CLOSE table_stats_cur;\n"
3077 			   "  RETURN;\n"
3078 			   "END IF;\n"
3079 			   "CLOSE table_stats_cur;\n"
3080 
3081 			   "OPEN index_stats_cur;\n"
3082 			   "found := 1;\n"
3083 			   "WHILE found = 1 LOOP\n"
3084 			   "  FETCH index_stats_cur INTO\n"
3085 			   "    fetch_index_stats_step();\n"
3086 			   "  IF (SQL % NOTFOUND) THEN\n"
3087 			   "    found := 0;\n"
3088 			   "  END IF;\n"
3089 			   "END LOOP;\n"
3090 			   "CLOSE index_stats_cur;\n"
3091 
3092 			   "END;",
3093 			   TRUE, trx);
3094 	/* pinfo is freed by que_eval_sql() */
3095 
3096 	trx_commit_for_mysql(trx);
3097 
3098 	trx->free();
3099 
3100 	if (!index_fetch_arg.stats_were_modified) {
3101 		return(DB_STATS_DO_NOT_EXIST);
3102 	}
3103 
3104 	return(ret);
3105 }
3106 
3107 /*********************************************************************//**
3108 Clear defragmentation stats modified counter for all indices in table. */
3109 static
3110 void
dict_stats_empty_defrag_modified_counter(dict_table_t * table)3111 dict_stats_empty_defrag_modified_counter(
3112 	dict_table_t*	table)	/*!< in: table */
3113 {
3114 	dict_index_t*	index;
3115 	ut_a(table);
3116 	for (index = dict_table_get_first_index(table);
3117 	     index != NULL;
3118 	     index = dict_table_get_next_index(index)) {
3119 		index->stat_defrag_modified_counter = 0;
3120 	}
3121 }
3122 
3123 /*********************************************************************//**
3124 Fetches or calculates new estimates for index statistics. */
3125 void
dict_stats_update_for_index(dict_index_t * index)3126 dict_stats_update_for_index(
3127 /*========================*/
3128 	dict_index_t*	index)	/*!< in/out: index */
3129 {
3130 	DBUG_ENTER("dict_stats_update_for_index");
3131 
3132 	ut_ad(!mutex_own(&dict_sys.mutex));
3133 
3134 	if (dict_stats_is_persistent_enabled(index->table)) {
3135 
3136 		if (dict_stats_persistent_storage_check(false)) {
3137 			index_stats_t stats = dict_stats_analyze_index(index);
3138 			mutex_enter(&dict_sys.mutex);
3139 			index->stat_index_size = stats.index_size;
3140 			index->stat_n_leaf_pages = stats.n_leaf_pages;
3141 			for (size_t i = 0; i < stats.stats.size(); ++i) {
3142 				index->stat_n_diff_key_vals[i]
3143 					= stats.stats[i].n_diff_key_vals;
3144 				index->stat_n_sample_sizes[i]
3145 					= stats.stats[i].n_sample_sizes;
3146 				index->stat_n_non_null_key_vals[i]
3147 					= stats.stats[i].n_non_null_key_vals;
3148 			}
3149 			index->table->stat_sum_of_other_index_sizes
3150 				+= index->stat_index_size;
3151 			mutex_exit(&dict_sys.mutex);
3152 
3153 			dict_stats_save(index->table, &index->id);
3154 			DBUG_VOID_RETURN;
3155 		}
3156 		/* else */
3157 
3158 		if (innodb_index_stats_not_found == false &&
3159 		    index->stats_error_printed == false) {
3160 			/* Fall back to transient stats since the persistent
3161 			storage is not present or is corrupted */
3162 
3163 		ib::info() << "Recalculation of persistent statistics"
3164 			" requested for table " << index->table->name
3165 			<< " index " << index->name
3166 			<< " but the required"
3167 			" persistent statistics storage is not present or is"
3168 			" corrupted. Using transient stats instead.";
3169 			index->stats_error_printed = false;
3170 		}
3171 	}
3172 
3173 	dict_stats_update_transient_for_index(index);
3174 
3175 	DBUG_VOID_RETURN;
3176 }
3177 
3178 /*********************************************************************//**
3179 Calculates new estimates for table and index statistics. The statistics
3180 are used in query optimization.
3181 @return DB_SUCCESS or error code */
3182 dberr_t
dict_stats_update(dict_table_t * table,dict_stats_upd_option_t stats_upd_option)3183 dict_stats_update(
3184 /*==============*/
3185 	dict_table_t*		table,	/*!< in/out: table */
3186 	dict_stats_upd_option_t	stats_upd_option)
3187 					/*!< in: whether to (re) calc
3188 					the stats or to fetch them from
3189 					the persistent statistics
3190 					storage */
3191 {
3192 	ut_ad(!mutex_own(&dict_sys.mutex));
3193 
3194 	if (!table->is_readable()) {
3195 		return (dict_stats_report_error(table));
3196 	} else if (srv_force_recovery > SRV_FORCE_NO_IBUF_MERGE) {
3197 		/* If we have set a high innodb_force_recovery level, do
3198 		not calculate statistics, as a badly corrupted index can
3199 		cause a crash in it. */
3200 		dict_stats_empty_table(table, false);
3201 		return(DB_SUCCESS);
3202 	}
3203 
3204 	switch (stats_upd_option) {
3205 	case DICT_STATS_RECALC_PERSISTENT:
3206 
3207 		if (srv_read_only_mode) {
3208 			goto transient;
3209 		}
3210 
3211 		/* Persistent recalculation requested, called from
3212 		1) ANALYZE TABLE, or
3213 		2) the auto recalculation background thread, or
3214 		3) open table if stats do not exist on disk and auto recalc
3215 		   is enabled */
3216 
3217 		/* InnoDB internal tables (e.g. SYS_TABLES) cannot have
3218 		persistent stats enabled */
3219 		ut_a(strchr(table->name.m_name, '/') != NULL);
3220 
3221 		/* check if the persistent statistics storage exists
3222 		before calling the potentially slow function
3223 		dict_stats_update_persistent(); that is a
3224 		prerequisite for dict_stats_save() succeeding */
3225 		if (dict_stats_persistent_storage_check(false)) {
3226 
3227 			dberr_t	err;
3228 
3229 			err = dict_stats_update_persistent(table);
3230 
3231 			if (err != DB_SUCCESS) {
3232 				return(err);
3233 			}
3234 
3235 			err = dict_stats_save(table, NULL);
3236 
3237 			return(err);
3238 		}
3239 
3240 		/* Fall back to transient stats since the persistent
3241 		storage is not present or is corrupted */
3242 
3243 		if (innodb_table_stats_not_found == false &&
3244 		    table->stats_error_printed == false) {
3245 		ib::warn() << "Recalculation of persistent statistics"
3246 			" requested for table "
3247 			<< table->name
3248 			<< " but the required persistent"
3249 			" statistics storage is not present or is corrupted."
3250 			" Using transient stats instead.";
3251 			table->stats_error_printed = true;
3252 		}
3253 
3254 		goto transient;
3255 
3256 	case DICT_STATS_RECALC_TRANSIENT:
3257 
3258 		goto transient;
3259 
3260 	case DICT_STATS_EMPTY_TABLE:
3261 
3262 		dict_stats_empty_table(table, true);
3263 
3264 		/* If table is using persistent stats,
3265 		then save the stats on disk */
3266 
3267 		if (dict_stats_is_persistent_enabled(table)) {
3268 
3269 			if (dict_stats_persistent_storage_check(false)) {
3270 
3271 				return(dict_stats_save(table, NULL));
3272 			}
3273 
3274 			return(DB_STATS_DO_NOT_EXIST);
3275 		}
3276 
3277 		return(DB_SUCCESS);
3278 
3279 	case DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY:
3280 
3281 		/* fetch requested, either fetch from persistent statistics
3282 		storage or use the old method */
3283 
3284 		if (table->stat_initialized) {
3285 			return(DB_SUCCESS);
3286 		}
3287 
3288 		/* InnoDB internal tables (e.g. SYS_TABLES) cannot have
3289 		persistent stats enabled */
3290 		ut_a(strchr(table->name.m_name, '/') != NULL);
3291 
3292 		if (!dict_stats_persistent_storage_check(false)) {
3293 			/* persistent statistics storage does not exist
3294 			or is corrupted, calculate the transient stats */
3295 
3296 			if (innodb_table_stats_not_found == false &&
3297 			    table->stats_error_printed == false) {
3298 				ib::error() << "Fetch of persistent statistics"
3299 					" requested for table "
3300 					<< table->name
3301 					<< " but the required system tables "
3302 					<< TABLE_STATS_NAME_PRINT
3303 					<< " and " << INDEX_STATS_NAME_PRINT
3304 					<< " are not present or have unexpected"
3305 					" structure. Using transient stats instead.";
3306 					table->stats_error_printed = true;
3307 			}
3308 
3309 			goto transient;
3310 		}
3311 
3312 		dict_table_t*	t;
3313 
3314 		/* Create a dummy table object with the same name and
3315 		indexes, suitable for fetching the stats into it. */
3316 		t = dict_stats_table_clone_create(table);
3317 
3318 		dberr_t	err = dict_stats_fetch_from_ps(t);
3319 
3320 		t->stats_last_recalc = table->stats_last_recalc;
3321 		t->stat_modified_counter = 0;
3322 		dict_stats_empty_defrag_modified_counter(t);
3323 
3324 		switch (err) {
3325 		case DB_SUCCESS:
3326 
3327 			mutex_enter(&dict_sys.mutex);
3328 
3329 			/* Pass reset_ignored_indexes=true as parameter
3330 			to dict_stats_copy. This will cause statictics
3331 			for corrupted indexes to be set to empty values */
3332 			dict_stats_copy(table, t, true);
3333 
3334 			dict_stats_assert_initialized(table);
3335 
3336 			mutex_exit(&dict_sys.mutex);
3337 
3338 			dict_stats_table_clone_free(t);
3339 
3340 			return(DB_SUCCESS);
3341 		case DB_STATS_DO_NOT_EXIST:
3342 
3343 			dict_stats_table_clone_free(t);
3344 
3345 			if (srv_read_only_mode) {
3346 				goto transient;
3347 			}
3348 
3349 			if (dict_stats_auto_recalc_is_enabled(table)) {
3350 				return(dict_stats_update(
3351 						table,
3352 						DICT_STATS_RECALC_PERSISTENT));
3353 			}
3354 
3355 			ib::info() << "Trying to use table " << table->name
3356 				<< " which has persistent statistics enabled,"
3357 				" but auto recalculation turned off and the"
3358 				" statistics do not exist in "
3359 				TABLE_STATS_NAME_PRINT
3360 				" and " INDEX_STATS_NAME_PRINT
3361 				". Please either run \"ANALYZE TABLE "
3362 				<< table->name << ";\" manually or enable the"
3363 				" auto recalculation with \"ALTER TABLE "
3364 				<< table->name << " STATS_AUTO_RECALC=1;\"."
3365 				" InnoDB will now use transient statistics for "
3366 				<< table->name << ".";
3367 
3368 			goto transient;
3369 		default:
3370 
3371 			dict_stats_table_clone_free(t);
3372 
3373 			if (innodb_table_stats_not_found == false &&
3374 			    table->stats_error_printed == false) {
3375 				ib::error() << "Error fetching persistent statistics"
3376 					" for table "
3377 					<< table->name
3378 					<< " from " TABLE_STATS_NAME_PRINT " and "
3379 					INDEX_STATS_NAME_PRINT ": " << err
3380 					<< ". Using transient stats method instead.";
3381 			}
3382 
3383 			goto transient;
3384 		}
3385 	/* no "default:" in order to produce a compilation warning
3386 	about unhandled enumeration value */
3387 	}
3388 
3389 transient:
3390 	dict_stats_update_transient(table);
3391 
3392 	return(DB_SUCCESS);
3393 }
3394 
3395 /** Remove the information for a particular index's stats from the persistent
3396 storage if it exists and if there is data stored for this index.
3397 This function creates its own trx and commits it.
3398 
3399 We must modify system tables in a separate transaction in order to
3400 adhere to the InnoDB design constraint that dict_sys.latch prevents
3401 lock waits on system tables. If we modified system and user tables in
3402 the same transaction, we should exclusively hold dict_sys.latch until
3403 the transaction is committed, and effectively block other transactions
3404 that will attempt to open any InnoDB tables. Because we have no
3405 guarantee that user transactions will be committed fast, we cannot
3406 afford to keep the system tables locked in a user transaction.
3407 @return DB_SUCCESS or error code */
3408 dberr_t
dict_stats_drop_index(const char * db_and_table,const char * iname,char * errstr,ulint errstr_sz)3409 dict_stats_drop_index(
3410 /*==================*/
3411 	const char*	db_and_table,/*!< in: db and table, e.g. 'db/table' */
3412 	const char*	iname,	/*!< in: index name */
3413 	char*		errstr, /*!< out: error message if != DB_SUCCESS
3414 				is returned */
3415 	ulint		errstr_sz)/*!< in: size of the errstr buffer */
3416 {
3417 	char		db_utf8[MAX_DB_UTF8_LEN];
3418 	char		table_utf8[MAX_TABLE_UTF8_LEN];
3419 	pars_info_t*	pinfo;
3420 	dberr_t		ret;
3421 
3422 	ut_ad(!mutex_own(&dict_sys.mutex));
3423 
3424 	/* skip indexes whose table names do not contain a database name
3425 	e.g. if we are dropping an index from SYS_TABLES */
3426 	if (strchr(db_and_table, '/') == NULL) {
3427 
3428 		return(DB_SUCCESS);
3429 	}
3430 
3431 	dict_fs2utf8(db_and_table, db_utf8, sizeof(db_utf8),
3432 		     table_utf8, sizeof(table_utf8));
3433 
3434 	pinfo = pars_info_create();
3435 
3436 	pars_info_add_str_literal(pinfo, "database_name", db_utf8);
3437 
3438 	pars_info_add_str_literal(pinfo, "table_name", table_utf8);
3439 
3440 	pars_info_add_str_literal(pinfo, "index_name", iname);
3441 
3442 	dict_sys_lock();
3443 
3444 	ret = dict_stats_exec_sql(
3445 		pinfo,
3446 		"PROCEDURE DROP_INDEX_STATS () IS\n"
3447 		"BEGIN\n"
3448 		"DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
3449 		"database_name = :database_name AND\n"
3450 		"table_name = :table_name AND\n"
3451 		"index_name = :index_name;\n"
3452 		"END;\n", NULL);
3453 
3454 	dict_sys_unlock();
3455 
3456 	if (ret == DB_STATS_DO_NOT_EXIST) {
3457 		ret = DB_SUCCESS;
3458 	}
3459 
3460 	if (ret != DB_SUCCESS) {
3461 		snprintf(errstr, errstr_sz,
3462 			 "Unable to delete statistics for index %s"
3463 			 " from %s%s: %s. They can be deleted later using"
3464 			 " DELETE FROM %s WHERE"
3465 			 " database_name = '%s' AND"
3466 			 " table_name = '%s' AND"
3467 			 " index_name = '%s';",
3468 			 iname,
3469 			 INDEX_STATS_NAME_PRINT,
3470 			 (ret == DB_LOCK_WAIT_TIMEOUT
3471 			  ? " because the rows are locked"
3472 			  : ""),
3473 			 ut_strerr(ret),
3474 			 INDEX_STATS_NAME_PRINT,
3475 			 db_utf8,
3476 			 table_utf8,
3477 			 iname);
3478 
3479 		ut_print_timestamp(stderr);
3480 		fprintf(stderr, " InnoDB: %s\n", errstr);
3481 	}
3482 
3483 	return(ret);
3484 }
3485 
3486 /*********************************************************************//**
3487 Executes
3488 DELETE FROM mysql.innodb_table_stats
3489 WHERE database_name = '...' AND table_name = '...';
3490 Creates its own transaction and commits it.
3491 @return DB_SUCCESS or error code */
3492 UNIV_INLINE
3493 dberr_t
dict_stats_delete_from_table_stats(const char * database_name,const char * table_name)3494 dict_stats_delete_from_table_stats(
3495 /*===============================*/
3496 	const char*	database_name,	/*!< in: database name, e.g. 'db' */
3497 	const char*	table_name)	/*!< in: table name, e.g. 'table' */
3498 {
3499 	pars_info_t*	pinfo;
3500 	dberr_t		ret;
3501 
3502 	ut_d(dict_sys.assert_locked());
3503 
3504 	pinfo = pars_info_create();
3505 
3506 	pars_info_add_str_literal(pinfo, "database_name", database_name);
3507 	pars_info_add_str_literal(pinfo, "table_name", table_name);
3508 
3509 	ret = dict_stats_exec_sql(
3510 		pinfo,
3511 		"PROCEDURE DELETE_FROM_TABLE_STATS () IS\n"
3512 		"BEGIN\n"
3513 		"DELETE FROM \"" TABLE_STATS_NAME "\" WHERE\n"
3514 		"database_name = :database_name AND\n"
3515 		"table_name = :table_name;\n"
3516 		"END;\n", NULL);
3517 
3518 	return(ret);
3519 }
3520 
3521 /*********************************************************************//**
3522 Executes
3523 DELETE FROM mysql.innodb_index_stats
3524 WHERE database_name = '...' AND table_name = '...';
3525 Creates its own transaction and commits it.
3526 @return DB_SUCCESS or error code */
3527 UNIV_INLINE
3528 dberr_t
dict_stats_delete_from_index_stats(const char * database_name,const char * table_name)3529 dict_stats_delete_from_index_stats(
3530 /*===============================*/
3531 	const char*	database_name,	/*!< in: database name, e.g. 'db' */
3532 	const char*	table_name)	/*!< in: table name, e.g. 'table' */
3533 {
3534 	pars_info_t*	pinfo;
3535 	dberr_t		ret;
3536 
3537 	ut_d(dict_sys.assert_locked());
3538 
3539 	pinfo = pars_info_create();
3540 
3541 	pars_info_add_str_literal(pinfo, "database_name", database_name);
3542 	pars_info_add_str_literal(pinfo, "table_name", table_name);
3543 
3544 	ret = dict_stats_exec_sql(
3545 		pinfo,
3546 		"PROCEDURE DELETE_FROM_INDEX_STATS () IS\n"
3547 		"BEGIN\n"
3548 		"DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
3549 		"database_name = :database_name AND\n"
3550 		"table_name = :table_name;\n"
3551 		"END;\n", NULL);
3552 
3553 	return(ret);
3554 }
3555 
3556 /*********************************************************************//**
3557 Removes the statistics for a table and all of its indexes from the
3558 persistent statistics storage if it exists and if there is data stored for
3559 the table. This function creates its own transaction and commits it.
3560 @return DB_SUCCESS or error code */
3561 dberr_t
dict_stats_drop_table(const char * db_and_table,char * errstr,ulint errstr_sz)3562 dict_stats_drop_table(
3563 /*==================*/
3564 	const char*	db_and_table,	/*!< in: db and table, e.g. 'db/table' */
3565 	char*		errstr,		/*!< out: error message
3566 					if != DB_SUCCESS is returned */
3567 	ulint		errstr_sz)	/*!< in: size of errstr buffer */
3568 {
3569 	char		db_utf8[MAX_DB_UTF8_LEN];
3570 	char		table_utf8[MAX_TABLE_UTF8_LEN];
3571 	dberr_t		ret;
3572 
3573 	ut_d(dict_sys.assert_locked());
3574 
3575 	/* skip tables that do not contain a database name
3576 	e.g. if we are dropping SYS_TABLES */
3577 	if (strchr(db_and_table, '/') == NULL) {
3578 
3579 		return(DB_SUCCESS);
3580 	}
3581 
3582 	/* skip innodb_table_stats and innodb_index_stats themselves */
3583 	if (strcmp(db_and_table, TABLE_STATS_NAME) == 0
3584 	    || strcmp(db_and_table, INDEX_STATS_NAME) == 0) {
3585 
3586 		return(DB_SUCCESS);
3587 	}
3588 
3589 	dict_fs2utf8(db_and_table, db_utf8, sizeof(db_utf8),
3590 		     table_utf8, sizeof(table_utf8));
3591 
3592 	ret = dict_stats_delete_from_table_stats(db_utf8, table_utf8);
3593 
3594 	if (ret == DB_SUCCESS) {
3595 		ret = dict_stats_delete_from_index_stats(db_utf8, table_utf8);
3596 	}
3597 
3598 	if (ret == DB_STATS_DO_NOT_EXIST) {
3599 		ret = DB_SUCCESS;
3600 	}
3601 
3602 	if (ret != DB_SUCCESS) {
3603 
3604 		snprintf(errstr, errstr_sz,
3605 			 "Unable to delete statistics for table %s.%s: %s."
3606 			 " They can be deleted later using"
3607 
3608 			 " DELETE FROM %s WHERE"
3609 			 " database_name = '%s' AND"
3610 			 " table_name = '%s';"
3611 
3612 			 " DELETE FROM %s WHERE"
3613 			 " database_name = '%s' AND"
3614 			 " table_name = '%s';",
3615 
3616 			 db_utf8, table_utf8,
3617 			 ut_strerr(ret),
3618 
3619 			 INDEX_STATS_NAME_PRINT,
3620 			 db_utf8, table_utf8,
3621 
3622 			 TABLE_STATS_NAME_PRINT,
3623 			 db_utf8, table_utf8);
3624 	}
3625 
3626 	return(ret);
3627 }
3628 
3629 /*********************************************************************//**
3630 Executes
3631 UPDATE mysql.innodb_table_stats SET
3632 database_name = '...', table_name = '...'
3633 WHERE database_name = '...' AND table_name = '...';
3634 Creates its own transaction and commits it.
3635 @return DB_SUCCESS or error code */
3636 UNIV_INLINE
3637 dberr_t
dict_stats_rename_table_in_table_stats(const char * old_dbname_utf8,const char * old_tablename_utf8,const char * new_dbname_utf8,const char * new_tablename_utf8)3638 dict_stats_rename_table_in_table_stats(
3639 /*===================================*/
3640 	const char*	old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */
3641 	const char*	old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */
3642 	const char*	new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */
3643 	const char*	new_tablename_utf8)/*!< in: table name, e.g. 'newtable' */
3644 {
3645 	pars_info_t*	pinfo;
3646 	dberr_t		ret;
3647 
3648 	ut_d(dict_sys.assert_locked());
3649 
3650 	pinfo = pars_info_create();
3651 
3652 	pars_info_add_str_literal(pinfo, "old_dbname_utf8", old_dbname_utf8);
3653 	pars_info_add_str_literal(pinfo, "old_tablename_utf8", old_tablename_utf8);
3654 	pars_info_add_str_literal(pinfo, "new_dbname_utf8", new_dbname_utf8);
3655 	pars_info_add_str_literal(pinfo, "new_tablename_utf8", new_tablename_utf8);
3656 
3657 	ret = dict_stats_exec_sql(
3658 		pinfo,
3659 		"PROCEDURE RENAME_TABLE_IN_TABLE_STATS () IS\n"
3660 		"BEGIN\n"
3661 		"UPDATE \"" TABLE_STATS_NAME "\" SET\n"
3662 		"database_name = :new_dbname_utf8,\n"
3663 		"table_name = :new_tablename_utf8\n"
3664 		"WHERE\n"
3665 		"database_name = :old_dbname_utf8 AND\n"
3666 		"table_name = :old_tablename_utf8;\n"
3667 		"END;\n", NULL);
3668 
3669 	return(ret);
3670 }
3671 
3672 /*********************************************************************//**
3673 Executes
3674 UPDATE mysql.innodb_index_stats SET
3675 database_name = '...', table_name = '...'
3676 WHERE database_name = '...' AND table_name = '...';
3677 Creates its own transaction and commits it.
3678 @return DB_SUCCESS or error code */
3679 UNIV_INLINE
3680 dberr_t
dict_stats_rename_table_in_index_stats(const char * old_dbname_utf8,const char * old_tablename_utf8,const char * new_dbname_utf8,const char * new_tablename_utf8)3681 dict_stats_rename_table_in_index_stats(
3682 /*===================================*/
3683 	const char*	old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */
3684 	const char*	old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */
3685 	const char*	new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */
3686 	const char*	new_tablename_utf8)/*!< in: table name, e.g. 'newtable' */
3687 {
3688 	pars_info_t*	pinfo;
3689 	dberr_t		ret;
3690 
3691 	ut_d(dict_sys.assert_locked());
3692 
3693 	pinfo = pars_info_create();
3694 
3695 	pars_info_add_str_literal(pinfo, "old_dbname_utf8", old_dbname_utf8);
3696 	pars_info_add_str_literal(pinfo, "old_tablename_utf8", old_tablename_utf8);
3697 	pars_info_add_str_literal(pinfo, "new_dbname_utf8", new_dbname_utf8);
3698 	pars_info_add_str_literal(pinfo, "new_tablename_utf8", new_tablename_utf8);
3699 
3700 	ret = dict_stats_exec_sql(
3701 		pinfo,
3702 		"PROCEDURE RENAME_TABLE_IN_INDEX_STATS () IS\n"
3703 		"BEGIN\n"
3704 		"UPDATE \"" INDEX_STATS_NAME "\" SET\n"
3705 		"database_name = :new_dbname_utf8,\n"
3706 		"table_name = :new_tablename_utf8\n"
3707 		"WHERE\n"
3708 		"database_name = :old_dbname_utf8 AND\n"
3709 		"table_name = :old_tablename_utf8;\n"
3710 		"END;\n", NULL);
3711 
3712 	return(ret);
3713 }
3714 
3715 /*********************************************************************//**
3716 Renames a table in InnoDB persistent stats storage.
3717 This function creates its own transaction and commits it.
3718 @return DB_SUCCESS or error code */
3719 dberr_t
dict_stats_rename_table(const char * old_name,const char * new_name,char * errstr,size_t errstr_sz)3720 dict_stats_rename_table(
3721 /*====================*/
3722 	const char*	old_name,	/*!< in: old name, e.g. 'db/table' */
3723 	const char*	new_name,	/*!< in: new name, e.g. 'db/table' */
3724 	char*		errstr,		/*!< out: error string if != DB_SUCCESS
3725 					is returned */
3726 	size_t		errstr_sz)	/*!< in: errstr size */
3727 {
3728 	char		old_db_utf8[MAX_DB_UTF8_LEN];
3729 	char		new_db_utf8[MAX_DB_UTF8_LEN];
3730 	char		old_table_utf8[MAX_TABLE_UTF8_LEN];
3731 	char		new_table_utf8[MAX_TABLE_UTF8_LEN];
3732 	dberr_t		ret;
3733 
3734 	/* skip innodb_table_stats and innodb_index_stats themselves */
3735 	if (strcmp(old_name, TABLE_STATS_NAME) == 0
3736 	    || strcmp(old_name, INDEX_STATS_NAME) == 0
3737 	    || strcmp(new_name, TABLE_STATS_NAME) == 0
3738 	    || strcmp(new_name, INDEX_STATS_NAME) == 0) {
3739 
3740 		return(DB_SUCCESS);
3741 	}
3742 
3743 	dict_fs2utf8(old_name, old_db_utf8, sizeof(old_db_utf8),
3744 		     old_table_utf8, sizeof(old_table_utf8));
3745 
3746 	dict_fs2utf8(new_name, new_db_utf8, sizeof(new_db_utf8),
3747 		     new_table_utf8, sizeof(new_table_utf8));
3748 
3749 	dict_sys_lock();
3750 
3751 	ulint	n_attempts = 0;
3752 	do {
3753 		n_attempts++;
3754 
3755 		ret = dict_stats_rename_table_in_table_stats(
3756 			old_db_utf8, old_table_utf8,
3757 			new_db_utf8, new_table_utf8);
3758 
3759 		if (ret == DB_DUPLICATE_KEY) {
3760 			dict_stats_delete_from_table_stats(
3761 				new_db_utf8, new_table_utf8);
3762 		}
3763 
3764 		if (ret == DB_STATS_DO_NOT_EXIST) {
3765 			ret = DB_SUCCESS;
3766 		}
3767 
3768 		if (ret != DB_SUCCESS) {
3769 			dict_sys_unlock();
3770 			os_thread_sleep(200000 /* 0.2 sec */);
3771 			dict_sys_lock();
3772 		}
3773 	} while ((ret == DB_DEADLOCK
3774 		  || ret == DB_DUPLICATE_KEY
3775 		  || ret == DB_LOCK_WAIT_TIMEOUT)
3776 		 && n_attempts < 5);
3777 
3778 	if (ret != DB_SUCCESS) {
3779 		snprintf(errstr, errstr_sz,
3780 			 "Unable to rename statistics from"
3781 			 " %s.%s to %s.%s in %s: %s."
3782 			 " They can be renamed later using"
3783 
3784 			 " UPDATE %s SET"
3785 			 " database_name = '%s',"
3786 			 " table_name = '%s'"
3787 			 " WHERE"
3788 			 " database_name = '%s' AND"
3789 			 " table_name = '%s';",
3790 
3791 			 old_db_utf8, old_table_utf8,
3792 			 new_db_utf8, new_table_utf8,
3793 			 TABLE_STATS_NAME_PRINT,
3794 			 ut_strerr(ret),
3795 
3796 			 TABLE_STATS_NAME_PRINT,
3797 			 new_db_utf8, new_table_utf8,
3798 			 old_db_utf8, old_table_utf8);
3799 		dict_sys_unlock();
3800 		return(ret);
3801 	}
3802 	/* else */
3803 
3804 	n_attempts = 0;
3805 	do {
3806 		n_attempts++;
3807 
3808 		ret = dict_stats_rename_table_in_index_stats(
3809 			old_db_utf8, old_table_utf8,
3810 			new_db_utf8, new_table_utf8);
3811 
3812 		if (ret == DB_DUPLICATE_KEY) {
3813 			dict_stats_delete_from_index_stats(
3814 				new_db_utf8, new_table_utf8);
3815 		}
3816 
3817 		if (ret == DB_STATS_DO_NOT_EXIST) {
3818 			ret = DB_SUCCESS;
3819 		}
3820 
3821 		if (ret != DB_SUCCESS) {
3822 			dict_sys_unlock();
3823 			os_thread_sleep(200000 /* 0.2 sec */);
3824 			dict_sys_lock();
3825 		}
3826 	} while ((ret == DB_DEADLOCK
3827 		  || ret == DB_DUPLICATE_KEY
3828 		  || ret == DB_LOCK_WAIT_TIMEOUT)
3829 		 && n_attempts < 5);
3830 
3831 	dict_sys_unlock();
3832 
3833 	if (ret != DB_SUCCESS) {
3834 		snprintf(errstr, errstr_sz,
3835 			 "Unable to rename statistics from"
3836 			 " %s.%s to %s.%s in %s: %s."
3837 			 " They can be renamed later using"
3838 
3839 			 " UPDATE %s SET"
3840 			 " database_name = '%s',"
3841 			 " table_name = '%s'"
3842 			 " WHERE"
3843 			 " database_name = '%s' AND"
3844 			 " table_name = '%s';",
3845 
3846 			 old_db_utf8, old_table_utf8,
3847 			 new_db_utf8, new_table_utf8,
3848 			 INDEX_STATS_NAME_PRINT,
3849 			 ut_strerr(ret),
3850 
3851 			 INDEX_STATS_NAME_PRINT,
3852 			 new_db_utf8, new_table_utf8,
3853 			 old_db_utf8, old_table_utf8);
3854 	}
3855 
3856 	return(ret);
3857 }
3858 
3859 /*********************************************************************//**
3860 Renames an index in InnoDB persistent stats storage.
3861 This function creates its own transaction and commits it.
3862 @return DB_SUCCESS or error code. DB_STATS_DO_NOT_EXIST will be returned
3863 if the persistent stats do not exist. */
3864 dberr_t
dict_stats_rename_index(const dict_table_t * table,const char * old_index_name,const char * new_index_name)3865 dict_stats_rename_index(
3866 /*====================*/
3867 	const dict_table_t*	table,		/*!< in: table whose index
3868 						is renamed */
3869 	const char*		old_index_name,	/*!< in: old index name */
3870 	const char*		new_index_name)	/*!< in: new index name */
3871 {
3872 	dict_sys_lock();
3873 
3874 	if (!dict_stats_persistent_storage_check(true)) {
3875 		dict_sys_unlock();
3876 		return(DB_STATS_DO_NOT_EXIST);
3877 	}
3878 
3879 	char	dbname_utf8[MAX_DB_UTF8_LEN];
3880 	char	tablename_utf8[MAX_TABLE_UTF8_LEN];
3881 
3882 	dict_fs2utf8(table->name.m_name, dbname_utf8, sizeof(dbname_utf8),
3883 		     tablename_utf8, sizeof(tablename_utf8));
3884 
3885 	pars_info_t*	pinfo;
3886 
3887 	pinfo = pars_info_create();
3888 
3889 	pars_info_add_str_literal(pinfo, "dbname_utf8", dbname_utf8);
3890 	pars_info_add_str_literal(pinfo, "tablename_utf8", tablename_utf8);
3891 	pars_info_add_str_literal(pinfo, "new_index_name", new_index_name);
3892 	pars_info_add_str_literal(pinfo, "old_index_name", old_index_name);
3893 
3894 	dberr_t	ret;
3895 
3896 	ret = dict_stats_exec_sql(
3897 		pinfo,
3898 		"PROCEDURE RENAME_INDEX_IN_INDEX_STATS () IS\n"
3899 		"BEGIN\n"
3900 		"UPDATE \"" INDEX_STATS_NAME "\" SET\n"
3901 		"index_name = :new_index_name\n"
3902 		"WHERE\n"
3903 		"database_name = :dbname_utf8 AND\n"
3904 		"table_name = :tablename_utf8 AND\n"
3905 		"index_name = :old_index_name;\n"
3906 		"END;\n", NULL);
3907 
3908 	dict_sys_unlock();
3909 
3910 	return(ret);
3911 }
3912 
3913 /* tests @{ */
3914 #ifdef UNIV_ENABLE_UNIT_TEST_DICT_STATS
3915 
3916 /* The following unit tests test some of the functions in this file
3917 individually, such testing cannot be performed by the mysql-test framework
3918 via SQL. */
3919 
3920 /* test_dict_table_schema_check() @{ */
3921 void
test_dict_table_schema_check()3922 test_dict_table_schema_check()
3923 {
3924 	/*
3925 	CREATE TABLE tcheck (
3926 		c01 VARCHAR(123),
3927 		c02 INT,
3928 		c03 INT NOT NULL,
3929 		c04 INT UNSIGNED,
3930 		c05 BIGINT,
3931 		c06 BIGINT UNSIGNED NOT NULL,
3932 		c07 TIMESTAMP
3933 	) ENGINE=INNODB;
3934 	*/
3935 	/* definition for the table 'test/tcheck' */
3936 	dict_col_meta_t	columns[] = {
3937 		{"c01", DATA_VARCHAR, 0, 123},
3938 		{"c02", DATA_INT, 0, 4},
3939 		{"c03", DATA_INT, DATA_NOT_NULL, 4},
3940 		{"c04", DATA_INT, DATA_UNSIGNED, 4},
3941 		{"c05", DATA_INT, 0, 8},
3942 		{"c06", DATA_INT, DATA_NOT_NULL | DATA_UNSIGNED, 8},
3943 		{"c07", DATA_INT, 0, 4},
3944 		{"c_extra", DATA_INT, 0, 4}
3945 	};
3946 	dict_table_schema_t	schema = {
3947 		"test/tcheck",
3948 		0 /* will be set individually for each test below */,
3949 		columns
3950 	};
3951 	char	errstr[512];
3952 
3953 	snprintf(errstr, sizeof(errstr), "Table not found");
3954 
3955 	/* prevent any data dictionary modifications while we are checking
3956 	the tables' structure */
3957 
3958 	mutex_enter(&dict_sys.mutex);
3959 
3960 	/* check that a valid table is reported as valid */
3961 	schema.n_cols = 7;
3962 	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3963 	    == DB_SUCCESS) {
3964 		printf("OK: test.tcheck ok\n");
3965 	} else {
3966 		printf("ERROR: %s\n", errstr);
3967 		printf("ERROR: test.tcheck not present or corrupted\n");
3968 		goto test_dict_table_schema_check_end;
3969 	}
3970 
3971 	/* check columns with wrong length */
3972 	schema.columns[1].len = 8;
3973 	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3974 	    != DB_SUCCESS) {
3975 		printf("OK: test.tcheck.c02 has different length and is"
3976 		       " reported as corrupted\n");
3977 	} else {
3978 		printf("OK: test.tcheck.c02 has different length but is"
3979 		       " reported as ok\n");
3980 		goto test_dict_table_schema_check_end;
3981 	}
3982 	schema.columns[1].len = 4;
3983 
3984 	/* request that c02 is NOT NULL while actually it does not have
3985 	this flag set */
3986 	schema.columns[1].prtype_mask |= DATA_NOT_NULL;
3987 	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3988 	    != DB_SUCCESS) {
3989 		printf("OK: test.tcheck.c02 does not have NOT NULL while"
3990 		       " it should and is reported as corrupted\n");
3991 	} else {
3992 		printf("ERROR: test.tcheck.c02 does not have NOT NULL while"
3993 		       " it should and is not reported as corrupted\n");
3994 		goto test_dict_table_schema_check_end;
3995 	}
3996 	schema.columns[1].prtype_mask &= ~DATA_NOT_NULL;
3997 
3998 	/* check a table that contains some extra columns */
3999 	schema.n_cols = 6;
4000 	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
4001 	    == DB_SUCCESS) {
4002 		printf("ERROR: test.tcheck has more columns but is not"
4003 		       " reported as corrupted\n");
4004 		goto test_dict_table_schema_check_end;
4005 	} else {
4006 		printf("OK: test.tcheck has more columns and is"
4007 		       " reported as corrupted\n");
4008 	}
4009 
4010 	/* check a table that has some columns missing */
4011 	schema.n_cols = 8;
4012 	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
4013 	    != DB_SUCCESS) {
4014 		printf("OK: test.tcheck has missing columns and is"
4015 		       " reported as corrupted\n");
4016 	} else {
4017 		printf("ERROR: test.tcheck has missing columns but is"
4018 		       " reported as ok\n");
4019 		goto test_dict_table_schema_check_end;
4020 	}
4021 
4022 	/* check non-existent table */
4023 	schema.table_name = "test/tcheck_nonexistent";
4024 	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
4025 	    != DB_SUCCESS) {
4026 		printf("OK: test.tcheck_nonexistent is not present\n");
4027 	} else {
4028 		printf("ERROR: test.tcheck_nonexistent is present!?\n");
4029 		goto test_dict_table_schema_check_end;
4030 	}
4031 
4032 test_dict_table_schema_check_end:
4033 
4034 	mutex_exit(&dict_sys.mutex);
4035 }
4036 /* @} */
4037 
4038 /* save/fetch aux macros @{ */
4039 #define TEST_DATABASE_NAME		"foobardb"
4040 #define TEST_TABLE_NAME			"test_dict_stats"
4041 
4042 #define TEST_N_ROWS			111
4043 #define TEST_CLUSTERED_INDEX_SIZE	222
4044 #define TEST_SUM_OF_OTHER_INDEX_SIZES	333
4045 
4046 #define TEST_IDX1_NAME			"tidx1"
4047 #define TEST_IDX1_COL1_NAME		"tidx1_col1"
4048 #define TEST_IDX1_INDEX_SIZE		123
4049 #define TEST_IDX1_N_LEAF_PAGES		234
4050 #define TEST_IDX1_N_DIFF1		50
4051 #define TEST_IDX1_N_DIFF1_SAMPLE_SIZE	500
4052 
4053 #define TEST_IDX2_NAME			"tidx2"
4054 #define TEST_IDX2_COL1_NAME		"tidx2_col1"
4055 #define TEST_IDX2_COL2_NAME		"tidx2_col2"
4056 #define TEST_IDX2_COL3_NAME		"tidx2_col3"
4057 #define TEST_IDX2_COL4_NAME		"tidx2_col4"
4058 #define TEST_IDX2_INDEX_SIZE		321
4059 #define TEST_IDX2_N_LEAF_PAGES		432
4060 #define TEST_IDX2_N_DIFF1		60
4061 #define TEST_IDX2_N_DIFF1_SAMPLE_SIZE	600
4062 #define TEST_IDX2_N_DIFF2		61
4063 #define TEST_IDX2_N_DIFF2_SAMPLE_SIZE	610
4064 #define TEST_IDX2_N_DIFF3		62
4065 #define TEST_IDX2_N_DIFF3_SAMPLE_SIZE	620
4066 #define TEST_IDX2_N_DIFF4		63
4067 #define TEST_IDX2_N_DIFF4_SAMPLE_SIZE	630
4068 /* @} */
4069 
4070 /* test_dict_stats_save() @{ */
4071 void
test_dict_stats_save()4072 test_dict_stats_save()
4073 {
4074 	dict_table_t	table;
4075 	dict_index_t	index1;
4076 	dict_field_t	index1_fields[1];
4077 	ib_uint64_t	index1_stat_n_diff_key_vals[1];
4078 	ib_uint64_t	index1_stat_n_sample_sizes[1];
4079 	dict_index_t	index2;
4080 	dict_field_t	index2_fields[4];
4081 	ib_uint64_t	index2_stat_n_diff_key_vals[4];
4082 	ib_uint64_t	index2_stat_n_sample_sizes[4];
4083 	dberr_t		ret;
4084 
4085 	/* craft a dummy dict_table_t */
4086 	table.name.m_name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
4087 	table.stat_n_rows = TEST_N_ROWS;
4088 	table.stat_clustered_index_size = TEST_CLUSTERED_INDEX_SIZE;
4089 	table.stat_sum_of_other_index_sizes = TEST_SUM_OF_OTHER_INDEX_SIZES;
4090 	UT_LIST_INIT(table.indexes, &dict_index_t::indexes);
4091 #ifdef BTR_CUR_HASH_ADAPT
4092 	UT_LIST_INIT(table.freed_indexes, &dict_index_t::indexes);
4093 #endif /* BTR_CUR_HASH_ADAPT */
4094 	UT_LIST_ADD_LAST(table.indexes, &index1);
4095 	UT_LIST_ADD_LAST(table.indexes, &index2);
4096 	ut_d(table.magic_n = DICT_TABLE_MAGIC_N);
4097 	ut_d(index1.magic_n = DICT_INDEX_MAGIC_N);
4098 
4099 	index1.name = TEST_IDX1_NAME;
4100 	index1.table = &table;
4101 	index1.cached = 1;
4102 	index1.n_uniq = 1;
4103 	index1.fields = index1_fields;
4104 	index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals;
4105 	index1.stat_n_sample_sizes = index1_stat_n_sample_sizes;
4106 	index1.stat_index_size = TEST_IDX1_INDEX_SIZE;
4107 	index1.stat_n_leaf_pages = TEST_IDX1_N_LEAF_PAGES;
4108 	index1_fields[0].name = TEST_IDX1_COL1_NAME;
4109 	index1_stat_n_diff_key_vals[0] = TEST_IDX1_N_DIFF1;
4110 	index1_stat_n_sample_sizes[0] = TEST_IDX1_N_DIFF1_SAMPLE_SIZE;
4111 
4112 	ut_d(index2.magic_n = DICT_INDEX_MAGIC_N);
4113 	index2.name = TEST_IDX2_NAME;
4114 	index2.table = &table;
4115 	index2.cached = 1;
4116 	index2.n_uniq = 4;
4117 	index2.fields = index2_fields;
4118 	index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals;
4119 	index2.stat_n_sample_sizes = index2_stat_n_sample_sizes;
4120 	index2.stat_index_size = TEST_IDX2_INDEX_SIZE;
4121 	index2.stat_n_leaf_pages = TEST_IDX2_N_LEAF_PAGES;
4122 	index2_fields[0].name = TEST_IDX2_COL1_NAME;
4123 	index2_fields[1].name = TEST_IDX2_COL2_NAME;
4124 	index2_fields[2].name = TEST_IDX2_COL3_NAME;
4125 	index2_fields[3].name = TEST_IDX2_COL4_NAME;
4126 	index2_stat_n_diff_key_vals[0] = TEST_IDX2_N_DIFF1;
4127 	index2_stat_n_diff_key_vals[1] = TEST_IDX2_N_DIFF2;
4128 	index2_stat_n_diff_key_vals[2] = TEST_IDX2_N_DIFF3;
4129 	index2_stat_n_diff_key_vals[3] = TEST_IDX2_N_DIFF4;
4130 	index2_stat_n_sample_sizes[0] = TEST_IDX2_N_DIFF1_SAMPLE_SIZE;
4131 	index2_stat_n_sample_sizes[1] = TEST_IDX2_N_DIFF2_SAMPLE_SIZE;
4132 	index2_stat_n_sample_sizes[2] = TEST_IDX2_N_DIFF3_SAMPLE_SIZE;
4133 	index2_stat_n_sample_sizes[3] = TEST_IDX2_N_DIFF4_SAMPLE_SIZE;
4134 
4135 	ret = dict_stats_save(&table, NULL);
4136 
4137 	ut_a(ret == DB_SUCCESS);
4138 
4139 	printf("\nOK: stats saved successfully, now go ahead and read"
4140 	       " what's inside %s and %s:\n\n",
4141 	       TABLE_STATS_NAME_PRINT,
4142 	       INDEX_STATS_NAME_PRINT);
4143 
4144 	printf("SELECT COUNT(*) = 1 AS table_stats_saved_successfully\n"
4145 	       "FROM %s\n"
4146 	       "WHERE\n"
4147 	       "database_name = '%s' AND\n"
4148 	       "table_name = '%s' AND\n"
4149 	       "n_rows = %d AND\n"
4150 	       "clustered_index_size = %d AND\n"
4151 	       "sum_of_other_index_sizes = %d;\n"
4152 	       "\n",
4153 	       TABLE_STATS_NAME_PRINT,
4154 	       TEST_DATABASE_NAME,
4155 	       TEST_TABLE_NAME,
4156 	       TEST_N_ROWS,
4157 	       TEST_CLUSTERED_INDEX_SIZE,
4158 	       TEST_SUM_OF_OTHER_INDEX_SIZES);
4159 
4160 	printf("SELECT COUNT(*) = 3 AS tidx1_stats_saved_successfully\n"
4161 	       "FROM %s\n"
4162 	       "WHERE\n"
4163 	       "database_name = '%s' AND\n"
4164 	       "table_name = '%s' AND\n"
4165 	       "index_name = '%s' AND\n"
4166 	       "(\n"
4167 	       " (stat_name = 'size' AND stat_value = %d AND"
4168 	       "  sample_size IS NULL) OR\n"
4169 	       " (stat_name = 'n_leaf_pages' AND stat_value = %d AND"
4170 	       "  sample_size IS NULL) OR\n"
4171 	       " (stat_name = 'n_diff_pfx01' AND stat_value = %d AND"
4172 	       "  sample_size = '%d' AND stat_description = '%s')\n"
4173 	       ");\n"
4174 	       "\n",
4175 	       INDEX_STATS_NAME_PRINT,
4176 	       TEST_DATABASE_NAME,
4177 	       TEST_TABLE_NAME,
4178 	       TEST_IDX1_NAME,
4179 	       TEST_IDX1_INDEX_SIZE,
4180 	       TEST_IDX1_N_LEAF_PAGES,
4181 	       TEST_IDX1_N_DIFF1,
4182 	       TEST_IDX1_N_DIFF1_SAMPLE_SIZE,
4183 	       TEST_IDX1_COL1_NAME);
4184 
4185 	printf("SELECT COUNT(*) = 6 AS tidx2_stats_saved_successfully\n"
4186 	       "FROM %s\n"
4187 	       "WHERE\n"
4188 	       "database_name = '%s' AND\n"
4189 	       "table_name = '%s' AND\n"
4190 	       "index_name = '%s' AND\n"
4191 	       "(\n"
4192 	       " (stat_name = 'size' AND stat_value = %d AND"
4193 	       "  sample_size IS NULL) OR\n"
4194 	       " (stat_name = 'n_leaf_pages' AND stat_value = %d AND"
4195 	       "  sample_size IS NULL) OR\n"
4196 	       " (stat_name = 'n_diff_pfx01' AND stat_value = %d AND"
4197 	       "  sample_size = '%d' AND stat_description = '%s') OR\n"
4198 	       " (stat_name = 'n_diff_pfx02' AND stat_value = %d AND"
4199 	       "  sample_size = '%d' AND stat_description = '%s,%s') OR\n"
4200 	       " (stat_name = 'n_diff_pfx03' AND stat_value = %d AND"
4201 	       "  sample_size = '%d' AND stat_description = '%s,%s,%s') OR\n"
4202 	       " (stat_name = 'n_diff_pfx04' AND stat_value = %d AND"
4203 	       "  sample_size = '%d' AND stat_description = '%s,%s,%s,%s')\n"
4204 	       ");\n"
4205 	       "\n",
4206 	       INDEX_STATS_NAME_PRINT,
4207 	       TEST_DATABASE_NAME,
4208 	       TEST_TABLE_NAME,
4209 	       TEST_IDX2_NAME,
4210 	       TEST_IDX2_INDEX_SIZE,
4211 	       TEST_IDX2_N_LEAF_PAGES,
4212 	       TEST_IDX2_N_DIFF1,
4213 	       TEST_IDX2_N_DIFF1_SAMPLE_SIZE, TEST_IDX2_COL1_NAME,
4214 	       TEST_IDX2_N_DIFF2,
4215 	       TEST_IDX2_N_DIFF2_SAMPLE_SIZE,
4216 	       TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME,
4217 	       TEST_IDX2_N_DIFF3,
4218 	       TEST_IDX2_N_DIFF3_SAMPLE_SIZE,
4219 	       TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, TEST_IDX2_COL3_NAME,
4220 	       TEST_IDX2_N_DIFF4,
4221 	       TEST_IDX2_N_DIFF4_SAMPLE_SIZE,
4222 	       TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, TEST_IDX2_COL3_NAME,
4223 	       TEST_IDX2_COL4_NAME);
4224 }
4225 /* @} */
4226 
4227 /* test_dict_stats_fetch_from_ps() @{ */
4228 void
test_dict_stats_fetch_from_ps()4229 test_dict_stats_fetch_from_ps()
4230 {
4231 	dict_table_t	table;
4232 	dict_index_t	index1;
4233 	ib_uint64_t	index1_stat_n_diff_key_vals[1];
4234 	ib_uint64_t	index1_stat_n_sample_sizes[1];
4235 	dict_index_t	index2;
4236 	ib_uint64_t	index2_stat_n_diff_key_vals[4];
4237 	ib_uint64_t	index2_stat_n_sample_sizes[4];
4238 	dberr_t		ret;
4239 
4240 	/* craft a dummy dict_table_t */
4241 	table.name.m_name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
4242 	UT_LIST_INIT(table.indexes, &dict_index_t::indexes);
4243 #ifdef BTR_CUR_HASH_ADAPT
4244 	UT_LIST_INIT(table.freed_indexes, &dict_index_t::indexes);
4245 #endif /* BTR_CUR_HASH_ADAPT */
4246 	UT_LIST_ADD_LAST(table.indexes, &index1);
4247 	UT_LIST_ADD_LAST(table.indexes, &index2);
4248 	ut_d(table.magic_n = DICT_TABLE_MAGIC_N);
4249 
4250 	index1.name = TEST_IDX1_NAME;
4251 	ut_d(index1.magic_n = DICT_INDEX_MAGIC_N);
4252 	index1.cached = 1;
4253 	index1.n_uniq = 1;
4254 	index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals;
4255 	index1.stat_n_sample_sizes = index1_stat_n_sample_sizes;
4256 
4257 	index2.name = TEST_IDX2_NAME;
4258 	ut_d(index2.magic_n = DICT_INDEX_MAGIC_N);
4259 	index2.cached = 1;
4260 	index2.n_uniq = 4;
4261 	index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals;
4262 	index2.stat_n_sample_sizes = index2_stat_n_sample_sizes;
4263 
4264 	ret = dict_stats_fetch_from_ps(&table);
4265 
4266 	ut_a(ret == DB_SUCCESS);
4267 
4268 	ut_a(table.stat_n_rows == TEST_N_ROWS);
4269 	ut_a(table.stat_clustered_index_size == TEST_CLUSTERED_INDEX_SIZE);
4270 	ut_a(table.stat_sum_of_other_index_sizes
4271 	     == TEST_SUM_OF_OTHER_INDEX_SIZES);
4272 
4273 	ut_a(index1.stat_index_size == TEST_IDX1_INDEX_SIZE);
4274 	ut_a(index1.stat_n_leaf_pages == TEST_IDX1_N_LEAF_PAGES);
4275 	ut_a(index1_stat_n_diff_key_vals[0] == TEST_IDX1_N_DIFF1);
4276 	ut_a(index1_stat_n_sample_sizes[0] == TEST_IDX1_N_DIFF1_SAMPLE_SIZE);
4277 
4278 	ut_a(index2.stat_index_size == TEST_IDX2_INDEX_SIZE);
4279 	ut_a(index2.stat_n_leaf_pages == TEST_IDX2_N_LEAF_PAGES);
4280 	ut_a(index2_stat_n_diff_key_vals[0] == TEST_IDX2_N_DIFF1);
4281 	ut_a(index2_stat_n_sample_sizes[0] == TEST_IDX2_N_DIFF1_SAMPLE_SIZE);
4282 	ut_a(index2_stat_n_diff_key_vals[1] == TEST_IDX2_N_DIFF2);
4283 	ut_a(index2_stat_n_sample_sizes[1] == TEST_IDX2_N_DIFF2_SAMPLE_SIZE);
4284 	ut_a(index2_stat_n_diff_key_vals[2] == TEST_IDX2_N_DIFF3);
4285 	ut_a(index2_stat_n_sample_sizes[2] == TEST_IDX2_N_DIFF3_SAMPLE_SIZE);
4286 	ut_a(index2_stat_n_diff_key_vals[3] == TEST_IDX2_N_DIFF4);
4287 	ut_a(index2_stat_n_sample_sizes[3] == TEST_IDX2_N_DIFF4_SAMPLE_SIZE);
4288 
4289 	printf("OK: fetch successful\n");
4290 }
4291 /* @} */
4292 
4293 /* test_dict_stats_all() @{ */
4294 void
test_dict_stats_all()4295 test_dict_stats_all()
4296 {
4297 	test_dict_table_schema_check();
4298 
4299 	test_dict_stats_save();
4300 
4301 	test_dict_stats_fetch_from_ps();
4302 }
4303 /* @} */
4304 
4305 #endif /* UNIV_ENABLE_UNIT_TEST_DICT_STATS */
4306 /* @} */
4307