1 /*****************************************************************************
2 
3 Copyright (c) 2009, 2021, Oracle and/or its affiliates.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file dict/dict0stats.cc
29 Code used for calculating and manipulating table statistics.
30 
31 Created Jan 06, 2010 Vasil Dimov
32 *******************************************************/
33 
34 #ifndef UNIV_HOTBACKUP
35 
36 #include "univ.i"
37 
38 #include "ut0ut.h"
39 #include "ut0rnd.h"
40 #include "dyn0buf.h"
41 #include "row0sel.h"
42 #include "trx0trx.h"
43 #include "pars0pars.h"
44 #include "dict0stats.h"
45 #include "ha_prototypes.h"
46 #include "ut0new.h"
47 #include <mysql_com.h>
48 #include "row0mysql.h"
49 
50 #include <algorithm>
51 #include <map>
52 #include <vector>
53 
54 /* Sampling algorithm description @{
55 
56 The algorithm is controlled by one number - N_SAMPLE_PAGES(index),
57 let it be A, which is the number of leaf pages to analyze for a given index
58 for each n-prefix (if the index is on 3 columns, then 3*A leaf pages will be
59 analyzed).
60 
61 Let the total number of leaf pages in the table be T.
62 Level 0 - leaf pages, level H - root.
63 
64 Definition: N-prefix-boring record is a record on a non-leaf page that equals
65 the next (to the right, cross page boundaries, skipping the supremum and
66 infimum) record on the same level when looking at the fist n-prefix columns.
67 The last (user) record on a level is not boring (it does not match the
68 non-existent user record to the right). We call the records boring because all
69 the records on the page below a boring record are equal to that boring record.
70 
71 We avoid diving below boring records when searching for a leaf page to
72 estimate the number of distinct records because we know that such a leaf
73 page will have number of distinct records == 1.
74 
75 For each n-prefix: start from the root level and full scan subsequent lower
76 levels until a level that contains at least A*10 distinct records is found.
77 Lets call this level LA.
78 As an optimization the search is canceled if it has reached level 1 (never
79 descend to the level 0 (leaf)) and also if the next level to be scanned
80 would contain more than A pages. The latter is because the user has asked
81 to analyze A leaf pages and it does not make sense to scan much more than
82 A non-leaf pages with the sole purpose of finding a good sample of A leaf
83 pages.
84 
85 After finding the appropriate level LA with >A*10 distinct records (or less in
86 the exceptions described above), divide it into groups of equal records and
87 pick A such groups. Then pick the last record from each group. For example,
88 let the level be:
89 
90 index:  0,1,2,3,4,5,6,7,8,9,10
91 record: 1,1,1,2,2,7,7,7,7,7,9
92 
93 There are 4 groups of distinct records and if A=2 random ones are selected,
94 e.g. 1,1,1 and 7,7,7,7,7, then records with indexes 2 and 9 will be selected.
95 
96 After selecting A records as described above, dive below them to find A leaf
97 pages and analyze them, finding the total number of distinct records. The
98 dive to the leaf level is performed by selecting a non-boring record from
99 each page and diving below it.
100 
101 This way, a total of A leaf pages are analyzed for the given n-prefix.
102 
103 Let the number of different key values found in each leaf page i be Pi (i=1..A).
104 Let N_DIFF_AVG_LEAF be (P1 + P2 + ... + PA) / A.
105 Let the number of different key values on level LA be N_DIFF_LA.
106 Let the total number of records on level LA be TOTAL_LA.
107 Let R be N_DIFF_LA / TOTAL_LA, we assume this ratio is the same on the
108 leaf level.
109 Let the number of leaf pages be N.
110 Then the total number of different key values on the leaf level is:
111 N * R * N_DIFF_AVG_LEAF.
112 See REF01 for the implementation.
113 
114 The above describes how to calculate the cardinality of an index.
115 This algorithm is executed for each n-prefix of a multi-column index
116 where n=1..n_uniq.
117 @} */
118 
119 /* names of the tables from the persistent statistics storage */
120 #define TABLE_STATS_NAME	"mysql/innodb_table_stats"
121 #define TABLE_STATS_NAME_PRINT	"mysql.innodb_table_stats"
122 #define INDEX_STATS_NAME	"mysql/innodb_index_stats"
123 #define INDEX_STATS_NAME_PRINT	"mysql.innodb_index_stats"
124 
125 #ifdef UNIV_STATS_DEBUG
126 #define DEBUG_PRINTF(fmt, ...)	printf(fmt, ## __VA_ARGS__)
127 #else /* UNIV_STATS_DEBUG */
128 #define DEBUG_PRINTF(fmt, ...)	/* noop */
129 #endif /* UNIV_STATS_DEBUG */
130 
131 /* Gets the number of leaf pages to sample in persistent stats estimation */
132 #define N_SAMPLE_PAGES(index)					\
133 	static_cast<ib_uint64_t>(				\
134 		(index)->table->stats_sample_pages != 0		\
135 		? (index)->table->stats_sample_pages		\
136 		: srv_stats_persistent_sample_pages)
137 
138 /* number of distinct records on a given level that are required to stop
139 descending to lower levels and fetch N_SAMPLE_PAGES(index) records
140 from that level */
141 #define N_DIFF_REQUIRED(index)	(N_SAMPLE_PAGES(index) * 10)
142 
143 /* A dynamic array where we store the boundaries of each distinct group
144 of keys. For example if a btree level is:
145 index: 0,1,2,3,4,5,6,7,8,9,10,11,12
146 data:  b,b,b,b,b,b,g,g,j,j,j, x, y
147 then we would store 5,7,10,11,12 in the array. */
148 typedef std::vector<ib_uint64_t, ut_allocator<ib_uint64_t> >	boundaries_t;
149 
150 /** Allocator type used for index_map_t. */
151 typedef ut_allocator<std::pair<const char* const, dict_index_t*> >
152 	index_map_t_allocator;
153 
154 /** Auxiliary map used for sorting indexes by name in dict_stats_save(). */
155 typedef std::map<const char*, dict_index_t*, ut_strcmp_functor,
156 		index_map_t_allocator>	index_map_t;
157 
158 /*********************************************************************//**
159 Checks whether an index should be ignored in stats manipulations:
160 * stats fetch
161 * stats recalc
162 * stats save
163 @return true if exists and all tables are ok */
164 UNIV_INLINE
165 bool
dict_stats_should_ignore_index(const dict_index_t * index)166 dict_stats_should_ignore_index(
167 /*===========================*/
168 	const dict_index_t*	index)	/*!< in: index */
169 {
170 	return((index->type & DICT_FTS)
171 	       || dict_index_is_corrupted(index)
172 	       || dict_index_is_spatial(index)
173 	       || index->to_be_dropped
174 	       || !index->is_committed());
175 }
176 
177 /*********************************************************************//**
178 Checks whether the persistent statistics storage exists and that all
179 tables have the proper structure.
180 @return true if exists and all tables are ok */
181 static
182 bool
dict_stats_persistent_storage_check(bool caller_has_dict_sys_mutex)183 dict_stats_persistent_storage_check(
184 /*================================*/
185 	bool	caller_has_dict_sys_mutex)	/*!< in: true if the caller
186 						owns dict_sys->mutex */
187 {
188 	/* definition for the table TABLE_STATS_NAME */
189 	dict_col_meta_t	table_stats_columns[] = {
190 		{"database_name", DATA_VARMYSQL,
191 			DATA_NOT_NULL, 192},
192 
193 		{"table_name", DATA_VARMYSQL,
194 			DATA_NOT_NULL, 597},
195 
196 		{"last_update", DATA_FIXBINARY,
197 			DATA_NOT_NULL, 4},
198 
199 		{"n_rows", DATA_INT,
200 			DATA_NOT_NULL | DATA_UNSIGNED, 8},
201 
202 		{"clustered_index_size", DATA_INT,
203 			DATA_NOT_NULL | DATA_UNSIGNED, 8},
204 
205 		{"sum_of_other_index_sizes", DATA_INT,
206 			DATA_NOT_NULL | DATA_UNSIGNED, 8}
207 	};
208 	dict_table_schema_t	table_stats_schema = {
209 		TABLE_STATS_NAME,
210 		UT_ARR_SIZE(table_stats_columns),
211 		table_stats_columns,
212 		0 /* n_foreign */,
213 		0 /* n_referenced */
214 	};
215 
216 	/* definition for the table INDEX_STATS_NAME */
217 	dict_col_meta_t	index_stats_columns[] = {
218 		{"database_name", DATA_VARMYSQL,
219 			DATA_NOT_NULL, 192},
220 
221 		{"table_name", DATA_VARMYSQL,
222 			DATA_NOT_NULL, 597},
223 
224 		{"index_name", DATA_VARMYSQL,
225 			DATA_NOT_NULL, 192},
226 
227 		{"last_update", DATA_FIXBINARY,
228 			DATA_NOT_NULL, 4},
229 
230 		{"stat_name", DATA_VARMYSQL,
231 			DATA_NOT_NULL, 64*3},
232 
233 		{"stat_value", DATA_INT,
234 			DATA_NOT_NULL | DATA_UNSIGNED, 8},
235 
236 		{"sample_size", DATA_INT,
237 			DATA_UNSIGNED, 8},
238 
239 		{"stat_description", DATA_VARMYSQL,
240 			DATA_NOT_NULL, 1024*3}
241 	};
242 	dict_table_schema_t	index_stats_schema = {
243 		INDEX_STATS_NAME,
244 		UT_ARR_SIZE(index_stats_columns),
245 		index_stats_columns,
246 		0 /* n_foreign */,
247 		0 /* n_referenced */
248 	};
249 
250 	char		errstr[512];
251 	dberr_t		ret;
252 
253 	if (!caller_has_dict_sys_mutex) {
254 		mutex_enter(&dict_sys->mutex);
255 	}
256 
257 	ut_ad(mutex_own(&dict_sys->mutex));
258 
259 	/* first check table_stats */
260 	ret = dict_table_schema_check(&table_stats_schema, errstr,
261 				      sizeof(errstr));
262 	if (ret == DB_SUCCESS) {
263 		/* if it is ok, then check index_stats */
264 		ret = dict_table_schema_check(&index_stats_schema, errstr,
265 					      sizeof(errstr));
266 	}
267 
268 	if (!caller_has_dict_sys_mutex) {
269 		mutex_exit(&dict_sys->mutex);
270 	}
271 
272 	if (ret != DB_SUCCESS) {
273 		ib::error() << errstr;
274 		return(false);
275 	}
276 	/* else */
277 
278 	return(true);
279 }
280 
281 /** Executes a given SQL statement using the InnoDB internal SQL parser.
282 This function will free the pinfo object.
283 @param[in,out]	pinfo	pinfo to pass to que_eval_sql() must already
284 have any literals bound to it
285 @param[in]	sql	SQL string to execute
286 @param[in,out]	trx	in case of NULL the function will allocate and
287 free the trx object. If it is not NULL then it will be rolled back
288 only in the case of error, but not freed.
289 @return DB_SUCCESS or error code */
290 static
291 dberr_t
dict_stats_exec_sql(pars_info_t * pinfo,const char * sql,trx_t * trx)292 dict_stats_exec_sql(
293 	pars_info_t*	pinfo,
294 	const char*	sql,
295 	trx_t*		trx)
296 {
297 	dberr_t	err;
298 	bool	trx_started = false;
299 
300 	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
301 	ut_ad(mutex_own(&dict_sys->mutex));
302 
303 	if (!dict_stats_persistent_storage_check(true)) {
304 		pars_info_free(pinfo);
305 		return(DB_STATS_DO_NOT_EXIST);
306 	}
307 
308 	if (trx == NULL) {
309 		trx = trx_allocate_for_background();
310 		trx_started = true;
311 
312 		if (srv_read_only_mode) {
313 			trx_start_internal_read_only(trx);
314 		} else {
315 			trx_start_internal(trx);
316 		}
317 	}
318 
319 	err = que_eval_sql(pinfo, sql, FALSE, trx); /* pinfo is freed here */
320 
321 	DBUG_EXECUTE_IF("stats_index_error",
322 		if (!trx_started) {
323 			err = DB_STATS_DO_NOT_EXIST;
324 			trx->error_state = DB_STATS_DO_NOT_EXIST;
325 		});
326 
327 	if (!trx_started && err == DB_SUCCESS) {
328 		return(DB_SUCCESS);
329 	}
330 
331 	if (err == DB_SUCCESS) {
332 		trx_commit_for_mysql(trx);
333 	} else {
334 		trx->op_info = "rollback of internal trx on stats tables";
335 		trx->dict_operation_lock_mode = RW_X_LATCH;
336 		trx_rollback_to_savepoint(trx, NULL);
337 		trx->dict_operation_lock_mode = 0;
338 		trx->op_info = "";
339 		ut_a(trx->error_state == DB_SUCCESS);
340 	}
341 
342 	if (trx_started) {
343 		trx_free_for_background(trx);
344 	}
345 
346 	return(err);
347 }
348 
349 /*********************************************************************//**
350 Duplicate a table object and its indexes.
351 This function creates a dummy dict_table_t object and initializes the
352 following table and index members:
353 dict_table_t::id (copied)
354 dict_table_t::heap (newly created)
355 dict_table_t::name (copied)
356 dict_table_t::corrupted (copied)
357 dict_table_t::indexes<> (newly created)
358 dict_table_t::magic_n
359 for each entry in dict_table_t::indexes, the following are initialized:
360 (indexes that have DICT_FTS set in index->type are skipped)
361 dict_index_t::id (copied)
362 dict_index_t::name (copied)
363 dict_index_t::table_name (points to the copied table name)
364 dict_index_t::table (points to the above semi-initialized object)
365 dict_index_t::type (copied)
366 dict_index_t::to_be_dropped (copied)
367 dict_index_t::online_status (copied)
368 dict_index_t::n_uniq (copied)
369 dict_index_t::fields[] (newly created, only first n_uniq, only fields[i].name)
370 dict_index_t::indexes<> (newly created)
371 dict_index_t::stat_n_diff_key_vals[] (only allocated, left uninitialized)
372 dict_index_t::stat_n_sample_sizes[] (only allocated, left uninitialized)
373 dict_index_t::stat_n_non_null_key_vals[] (only allocated, left uninitialized)
374 dict_index_t::magic_n
375 The returned object should be freed with dict_stats_table_clone_free()
376 when no longer needed.
377 @return incomplete table object */
378 static
379 dict_table_t*
dict_stats_table_clone_create(const dict_table_t * table)380 dict_stats_table_clone_create(
381 /*==========================*/
382 	const dict_table_t*	table)	/*!< in: table whose stats to copy */
383 {
384 	size_t		heap_size;
385 	dict_index_t*	index;
386 
387 	/* Estimate the size needed for the table and all of its indexes */
388 
389 	heap_size = 0;
390 	heap_size += sizeof(dict_table_t);
391 	heap_size += strlen(table->name.m_name) + 1;
392 
393 	for (index = dict_table_get_first_index(table);
394 	     index != NULL;
395 	     index = dict_table_get_next_index(index)) {
396 
397 		if (dict_stats_should_ignore_index(index)) {
398 			continue;
399 		}
400 
401 		ut_ad(!dict_index_is_ibuf(index));
402 
403 		ulint	n_uniq = dict_index_get_n_unique(index);
404 
405 		heap_size += sizeof(dict_index_t);
406 		heap_size += strlen(index->name) + 1;
407 		heap_size += n_uniq * sizeof(index->fields[0]);
408 		for (ulint i = 0; i < n_uniq; i++) {
409 			heap_size += strlen(index->fields[i].name) + 1;
410 		}
411 		heap_size += n_uniq * sizeof(index->stat_n_diff_key_vals[0]);
412 		heap_size += n_uniq * sizeof(index->stat_n_sample_sizes[0]);
413 		heap_size += n_uniq * sizeof(index->stat_n_non_null_key_vals[0]);
414 	}
415 
416 	/* Allocate the memory and copy the members */
417 
418 	mem_heap_t*	heap;
419 
420 	heap = mem_heap_create(heap_size);
421 
422 	dict_table_t*	t;
423 
424 	t = (dict_table_t*) mem_heap_alloc(heap, sizeof(*t));
425 
426 	UNIV_MEM_ASSERT_RW_ABORT(&table->id, sizeof(table->id));
427 	t->id = table->id;
428 
429 	t->heap = heap;
430 
431 	t->name.m_name = mem_heap_strdup(heap, table->name.m_name);
432 
433 	t->corrupted = table->corrupted;
434 
435 	/* This private object "t" is not shared with other threads, so
436 	we do not need the stats_latch (thus we pass false below). The
437 	dict_table_stats_lock()/unlock() routines will do nothing. */
438 	dict_table_stats_latch_create(t, false);
439 
440 	UT_LIST_INIT(t->indexes, &dict_index_t::indexes);
441 
442 	for (index = dict_table_get_first_index(table);
443 	     index != NULL;
444 	     index = dict_table_get_next_index(index)) {
445 
446 		if (dict_stats_should_ignore_index(index)) {
447 			continue;
448 		}
449 
450 		ut_ad(!dict_index_is_ibuf(index));
451 
452 		dict_index_t*	idx;
453 
454 		idx = (dict_index_t*) mem_heap_alloc(heap, sizeof(*idx));
455 
456 		UNIV_MEM_ASSERT_RW_ABORT(&index->id, sizeof(index->id));
457 		idx->id = index->id;
458 
459 		idx->name = mem_heap_strdup(heap, index->name);
460 
461 		idx->table_name = t->name.m_name;
462 
463 		idx->table = t;
464 
465 		idx->type = index->type;
466 
467 		idx->to_be_dropped = 0;
468 
469 		idx->online_status = ONLINE_INDEX_COMPLETE;
470 		idx->set_committed(true);
471 
472 		idx->n_uniq = index->n_uniq;
473 
474 		idx->fields = (dict_field_t*) mem_heap_alloc(
475 			heap, idx->n_uniq * sizeof(idx->fields[0]));
476 
477 		for (ulint i = 0; i < idx->n_uniq; i++) {
478 			idx->fields[i].name = mem_heap_strdup(
479 				heap, index->fields[i].name);
480 		}
481 
482 		/* hook idx into t->indexes */
483 		UT_LIST_ADD_LAST(t->indexes, idx);
484 
485 		idx->stat_n_diff_key_vals = (ib_uint64_t*) mem_heap_alloc(
486 			heap,
487 			idx->n_uniq * sizeof(idx->stat_n_diff_key_vals[0]));
488 
489 		idx->stat_n_sample_sizes = (ib_uint64_t*) mem_heap_alloc(
490 			heap,
491 			idx->n_uniq * sizeof(idx->stat_n_sample_sizes[0]));
492 
493 		idx->stat_n_non_null_key_vals = (ib_uint64_t*) mem_heap_alloc(
494 			heap,
495 			idx->n_uniq * sizeof(idx->stat_n_non_null_key_vals[0]));
496 		ut_d(idx->magic_n = DICT_INDEX_MAGIC_N);
497 	}
498 
499 	ut_d(t->magic_n = DICT_TABLE_MAGIC_N);
500 
501 	return(t);
502 }
503 
504 /*********************************************************************//**
505 Free the resources occupied by an object returned by
506 dict_stats_table_clone_create(). */
507 static
508 void
dict_stats_table_clone_free(dict_table_t * t)509 dict_stats_table_clone_free(
510 /*========================*/
511 	dict_table_t*	t)	/*!< in: dummy table object to free */
512 {
513 	dict_table_stats_latch_destroy(t);
514 	mem_heap_free(t->heap);
515 }
516 
517 /*********************************************************************//**
518 Write all zeros (or 1 where it makes sense) into an index
519 statistics members. The resulting stats correspond to an empty index.
520 The caller must own index's table stats latch in X mode
521 (dict_table_stats_lock(table, RW_X_LATCH)) */
522 static
523 void
dict_stats_empty_index(dict_index_t * index)524 dict_stats_empty_index(
525 /*===================*/
526 	dict_index_t*	index)	/*!< in/out: index */
527 {
528 	ut_ad(!(index->type & DICT_FTS));
529 	ut_ad(!dict_index_is_ibuf(index));
530 
531 	ulint	n_uniq = index->n_uniq;
532 
533 	for (ulint i = 0; i < n_uniq; i++) {
534 		index->stat_n_diff_key_vals[i] = 0;
535 		index->stat_n_sample_sizes[i] = 1;
536 		index->stat_n_non_null_key_vals[i] = 0;
537 	}
538 
539 	index->stat_index_size = 1;
540 	index->stat_n_leaf_pages = 1;
541 }
542 
543 /*********************************************************************//**
544 Write all zeros (or 1 where it makes sense) into a table and its indexes'
545 statistics members. The resulting stats correspond to an empty table. */
546 static
547 void
dict_stats_empty_table(dict_table_t * table)548 dict_stats_empty_table(
549 /*===================*/
550 	dict_table_t*	table)	/*!< in/out: table */
551 {
552 	/* Zero the stats members */
553 
554 	dict_table_stats_lock(table, RW_X_LATCH);
555 
556 	table->stat_n_rows = 0;
557 	table->stat_clustered_index_size = 1;
558 	/* 1 page for each index, not counting the clustered */
559 	table->stat_sum_of_other_index_sizes
560 		= UT_LIST_GET_LEN(table->indexes) - 1;
561 	table->stat_modified_counter = 0;
562 
563 	dict_index_t*	index;
564 
565 	for (index = dict_table_get_first_index(table);
566 	     index != NULL;
567 	     index = dict_table_get_next_index(index)) {
568 
569 		if (index->type & DICT_FTS) {
570 			continue;
571 		}
572 
573 		ut_ad(!dict_index_is_ibuf(index));
574 
575 		dict_stats_empty_index(index);
576 	}
577 
578 	table->stat_initialized = TRUE;
579 
580 	dict_table_stats_unlock(table, RW_X_LATCH);
581 }
582 
583 /*********************************************************************//**
584 Check whether index's stats are initialized (assert if they are not). */
585 static
586 void
dict_stats_assert_initialized_index(const dict_index_t * index)587 dict_stats_assert_initialized_index(
588 /*================================*/
589 	const dict_index_t*	index)	/*!< in: index */
590 {
591 	UNIV_MEM_ASSERT_RW_ABORT(
592 		index->stat_n_diff_key_vals,
593 		index->n_uniq * sizeof(index->stat_n_diff_key_vals[0]));
594 
595 	UNIV_MEM_ASSERT_RW_ABORT(
596 		index->stat_n_sample_sizes,
597 		index->n_uniq * sizeof(index->stat_n_sample_sizes[0]));
598 
599 	UNIV_MEM_ASSERT_RW_ABORT(
600 		index->stat_n_non_null_key_vals,
601 		index->n_uniq * sizeof(index->stat_n_non_null_key_vals[0]));
602 
603 	UNIV_MEM_ASSERT_RW_ABORT(
604 		&index->stat_index_size,
605 		sizeof(index->stat_index_size));
606 
607 	UNIV_MEM_ASSERT_RW_ABORT(
608 		&index->stat_n_leaf_pages,
609 		sizeof(index->stat_n_leaf_pages));
610 }
611 
612 /*********************************************************************//**
613 Check whether table's stats are initialized (assert if they are not). */
614 static
615 void
dict_stats_assert_initialized(const dict_table_t * table)616 dict_stats_assert_initialized(
617 /*==========================*/
618 	const dict_table_t*	table)	/*!< in: table */
619 {
620 	ut_a(table->stat_initialized);
621 
622 	UNIV_MEM_ASSERT_RW_ABORT(&table->stats_last_recalc,
623 			   sizeof(table->stats_last_recalc));
624 
625 	UNIV_MEM_ASSERT_RW_ABORT(&table->stat_persistent,
626 			   sizeof(table->stat_persistent));
627 
628 	UNIV_MEM_ASSERT_RW_ABORT(&table->stats_auto_recalc,
629 			   sizeof(table->stats_auto_recalc));
630 
631 	UNIV_MEM_ASSERT_RW_ABORT(&table->stats_sample_pages,
632 			   sizeof(table->stats_sample_pages));
633 
634 	UNIV_MEM_ASSERT_RW_ABORT(&table->stat_n_rows,
635 			   sizeof(table->stat_n_rows));
636 
637 	UNIV_MEM_ASSERT_RW_ABORT(&table->stat_clustered_index_size,
638 			   sizeof(table->stat_clustered_index_size));
639 
640 	UNIV_MEM_ASSERT_RW_ABORT(&table->stat_sum_of_other_index_sizes,
641 			   sizeof(table->stat_sum_of_other_index_sizes));
642 
643 	UNIV_MEM_ASSERT_RW_ABORT(&table->stat_modified_counter,
644 			   sizeof(table->stat_modified_counter));
645 
646 	UNIV_MEM_ASSERT_RW_ABORT(&table->stats_bg_flag,
647 			   sizeof(table->stats_bg_flag));
648 
649 	for (dict_index_t* index = dict_table_get_first_index(table);
650 	     index != NULL;
651 	     index = dict_table_get_next_index(index)) {
652 
653 		if (!dict_stats_should_ignore_index(index)) {
654 			dict_stats_assert_initialized_index(index);
655 		}
656 	}
657 }
658 
659 #define INDEX_EQ(i1, i2) \
660 	((i1) != NULL \
661 	 && (i2) != NULL \
662 	 && (i1)->id == (i2)->id \
663 	 && strcmp((i1)->name, (i2)->name) == 0)
664 
665 /*********************************************************************//**
666 Copy table and index statistics from one table to another, including index
667 stats. Extra indexes in src are ignored and extra indexes in dst are
668 initialized to correspond to an empty index. */
669 static
670 void
dict_stats_copy(dict_table_t * dst,const dict_table_t * src)671 dict_stats_copy(
672 /*============*/
673 	dict_table_t*		dst,	/*!< in/out: destination table */
674 	const dict_table_t*	src)	/*!< in: source table */
675 {
676 	dst->stats_last_recalc = src->stats_last_recalc;
677 	dst->stat_n_rows = src->stat_n_rows;
678 	dst->stat_clustered_index_size = src->stat_clustered_index_size;
679 	dst->stat_sum_of_other_index_sizes = src->stat_sum_of_other_index_sizes;
680 	dst->stat_modified_counter = src->stat_modified_counter;
681 
682 	dict_index_t*	dst_idx;
683 	dict_index_t*	src_idx;
684 
685 	for (dst_idx = dict_table_get_first_index(dst),
686 	     src_idx = dict_table_get_first_index(src);
687 	     dst_idx != NULL;
688 	     dst_idx = dict_table_get_next_index(dst_idx),
689 	     (src_idx != NULL
690 	      && (src_idx = dict_table_get_next_index(src_idx)))) {
691 
692 		if (dict_stats_should_ignore_index(dst_idx)) {
693 			if (!(dst_idx->type & DICT_FTS)) {
694 				dict_stats_empty_index(dst_idx);
695 			}
696 			continue;
697 		}
698 
699 		ut_ad(!dict_index_is_ibuf(dst_idx));
700 
701 		if (!INDEX_EQ(src_idx, dst_idx)) {
702 			for (src_idx = dict_table_get_first_index(src);
703 			     src_idx != NULL;
704 			     src_idx = dict_table_get_next_index(src_idx)) {
705 
706 				if (INDEX_EQ(src_idx, dst_idx)) {
707 					break;
708 				}
709 			}
710 		}
711 
712 		if (!INDEX_EQ(src_idx, dst_idx)) {
713 			dict_stats_empty_index(dst_idx);
714 			continue;
715 		}
716 
717 		ulint	n_copy_el;
718 
719 		if (dst_idx->n_uniq > src_idx->n_uniq) {
720 			n_copy_el = src_idx->n_uniq;
721 			/* Since src is smaller some elements in dst
722 			will remain untouched by the following memmove(),
723 			thus we init all of them here. */
724 			dict_stats_empty_index(dst_idx);
725 		} else {
726 			n_copy_el = dst_idx->n_uniq;
727 		}
728 
729 		memmove(dst_idx->stat_n_diff_key_vals,
730 			src_idx->stat_n_diff_key_vals,
731 			n_copy_el * sizeof(dst_idx->stat_n_diff_key_vals[0]));
732 
733 		memmove(dst_idx->stat_n_sample_sizes,
734 			src_idx->stat_n_sample_sizes,
735 			n_copy_el * sizeof(dst_idx->stat_n_sample_sizes[0]));
736 
737 		memmove(dst_idx->stat_n_non_null_key_vals,
738 			src_idx->stat_n_non_null_key_vals,
739 			n_copy_el * sizeof(dst_idx->stat_n_non_null_key_vals[0]));
740 
741 		dst_idx->stat_index_size = src_idx->stat_index_size;
742 
743 		dst_idx->stat_n_leaf_pages = src_idx->stat_n_leaf_pages;
744 	}
745 
746 	dst->stat_initialized = TRUE;
747 }
748 
749 /** Duplicate the stats of a table and its indexes.
750 This function creates a dummy dict_table_t object and copies the input
751 table's stats into it. The returned table object is not in the dictionary
752 cache and cannot be accessed by any other threads. In addition to the
753 members copied in dict_stats_table_clone_create() this function initializes
754 the following:
755 dict_table_t::stat_initialized
756 dict_table_t::stat_persistent
757 dict_table_t::stat_n_rows
758 dict_table_t::stat_clustered_index_size
759 dict_table_t::stat_sum_of_other_index_sizes
760 dict_table_t::stat_modified_counter
761 dict_index_t::stat_n_diff_key_vals[]
762 dict_index_t::stat_n_sample_sizes[]
763 dict_index_t::stat_n_non_null_key_vals[]
764 dict_index_t::stat_index_size
765 dict_index_t::stat_n_leaf_pages
766 The returned object should be freed with dict_stats_snapshot_free()
767 when no longer needed.
768 @param[in]	table	table whose stats to copy
769 @return incomplete table object */
770 static
771 dict_table_t*
dict_stats_snapshot_create(dict_table_t * table)772 dict_stats_snapshot_create(
773 	dict_table_t*	table)
774 {
775 	mutex_enter(&dict_sys->mutex);
776 
777 	dict_table_stats_lock(table, RW_S_LATCH);
778 
779 	dict_stats_assert_initialized(table);
780 
781 	dict_table_t*	t;
782 
783 	t = dict_stats_table_clone_create(table);
784 
785 	dict_stats_copy(t, table);
786 
787 	t->stat_persistent = table->stat_persistent;
788 	t->stats_auto_recalc = table->stats_auto_recalc;
789 	t->stats_sample_pages = table->stats_sample_pages;
790 	t->stats_bg_flag = table->stats_bg_flag;
791 
792 	dict_table_stats_unlock(table, RW_S_LATCH);
793 
794 	mutex_exit(&dict_sys->mutex);
795 
796 	return(t);
797 }
798 
799 /*********************************************************************//**
800 Free the resources occupied by an object returned by
801 dict_stats_snapshot_create(). */
802 static
803 void
dict_stats_snapshot_free(dict_table_t * t)804 dict_stats_snapshot_free(
805 /*=====================*/
806 	dict_table_t*	t)	/*!< in: dummy table object to free */
807 {
808 	dict_stats_table_clone_free(t);
809 }
810 
811 /*********************************************************************//**
812 Calculates new estimates for index statistics. This function is
813 relatively quick and is used to calculate transient statistics that
814 are not saved on disk. This was the only way to calculate statistics
815 before the Persistent Statistics feature was introduced. */
816 static
817 void
dict_stats_update_transient_for_index(dict_index_t * index)818 dict_stats_update_transient_for_index(
819 /*==================================*/
820 	dict_index_t*	index)	/*!< in/out: index */
821 {
822 	if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO
823 	    && (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO
824 		|| !dict_index_is_clust(index))) {
825 		/* If we have set a high innodb_force_recovery
826 		level, do not calculate statistics, as a badly
827 		corrupted index can cause a crash in it.
828 		Initialize some bogus index cardinality
829 		statistics, so that the data can be queried in
830 		various means, also via secondary indexes. */
831 		dict_stats_empty_index(index);
832 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
833 	} else if (ibuf_debug && !dict_index_is_clust(index)) {
834 		dict_stats_empty_index(index);
835 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
836 	} else {
837 		mtr_t	mtr;
838 		ulint	size;
839 
840 		mtr_start(&mtr);
841 		dict_disable_redo_if_temporary(index->table, &mtr);
842 
843 		mtr_s_lock(dict_index_get_lock(index), &mtr);
844 
845 		size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
846 
847 		if (size != ULINT_UNDEFINED) {
848 			index->stat_index_size = size;
849 
850 			size = btr_get_size(
851 				index, BTR_N_LEAF_PAGES, &mtr);
852 		}
853 
854 		mtr_commit(&mtr);
855 
856 		switch (size) {
857 		case ULINT_UNDEFINED:
858 			dict_stats_empty_index(index);
859 			return;
860 		case 0:
861 			/* The root node of the tree is a leaf */
862 			size = 1;
863 		}
864 
865 		index->stat_n_leaf_pages = size;
866 
867 		/* We don't handle the return value since it will be false
868 		only when some thread is dropping the table and we don't
869 		have to empty the statistics of the to be dropped index */
870 		btr_estimate_number_of_different_key_vals(index);
871 	}
872 }
873 
874 /*********************************************************************//**
875 Calculates new estimates for table and index statistics. This function
876 is relatively quick and is used to calculate transient statistics that
877 are not saved on disk.
878 This was the only way to calculate statistics before the
879 Persistent Statistics feature was introduced. */
880 void
dict_stats_update_transient(dict_table_t * table)881 dict_stats_update_transient(
882 /*========================*/
883 	dict_table_t*	table)	/*!< in/out: table */
884 {
885 	dict_index_t*	index;
886 	ulint		sum_of_index_sizes	= 0;
887 
888 	dict_table_analyze_index_lock(table);
889 
890 	/* Find out the sizes of the indexes and how many different values
891 	for the key they approximately have */
892 
893 	index = dict_table_get_first_index(table);
894 
895 	if (dict_table_is_discarded(table)) {
896 		/* Nothing to do. */
897 		dict_stats_empty_table(table);
898 		dict_table_analyze_index_unlock(table);
899 		return;
900 	} else if (index == NULL) {
901 		/* Table definition is corrupt */
902 
903 		ib::warn() << "Table " << table->name
904 			<< " has no indexes. Cannot calculate statistics.";
905 		dict_stats_empty_table(table);
906 		dict_table_analyze_index_unlock(table);
907 		return;
908 	}
909 
910 	for (; index != NULL; index = dict_table_get_next_index(index)) {
911 
912 		ut_ad(!dict_index_is_ibuf(index));
913 
914 		if (index->type & DICT_FTS || dict_index_is_spatial(index)) {
915 			continue;
916 		}
917 
918 		dict_stats_empty_index(index);
919 
920 		if (dict_stats_should_ignore_index(index)) {
921 			continue;
922 		}
923 
924 		dict_stats_update_transient_for_index(index);
925 
926 		sum_of_index_sizes += index->stat_index_size;
927 	}
928 
929 	dict_table_stats_lock(table, RW_X_LATCH);
930 
931 	index = dict_table_get_first_index(table);
932 
933 	table->stat_n_rows = index->stat_n_diff_key_vals[
934 		dict_index_get_n_unique(index) - 1];
935 
936 	table->stat_clustered_index_size = index->stat_index_size;
937 
938 	table->stat_sum_of_other_index_sizes = sum_of_index_sizes
939 		- index->stat_index_size;
940 
941 	table->stats_last_recalc = ut_time_monotonic();
942 
943 	table->stat_modified_counter = 0;
944 
945 	table->stat_initialized = TRUE;
946 
947 	dict_table_stats_unlock(table, RW_X_LATCH);
948 
949 	dict_table_analyze_index_unlock(table);
950 
951 }
952 
953 /* @{ Pseudo code about the relation between the following functions
954 
955 let N = N_SAMPLE_PAGES(index)
956 
957 dict_stats_analyze_index()
958   for each n_prefix
959     search for good enough level:
960       dict_stats_analyze_index_level() // only called if level has <= N pages
961         // full scan of the level in one mtr
962         collect statistics about the given level
963       if we are not satisfied with the level, search next lower level
964     we have found a good enough level here
965     dict_stats_analyze_index_for_n_prefix(that level, stats collected above)
966       // full scan of the level in one mtr
967       dive below some records and analyze the leaf page there:
968       dict_stats_analyze_index_below_cur()
969 @} */
970 
971 /*********************************************************************//**
972 Find the total number and the number of distinct keys on a given level in
973 an index. Each of the 1..n_uniq prefixes are looked up and the results are
974 saved in the array n_diff[0] .. n_diff[n_uniq - 1]. The total number of
975 records on the level is saved in total_recs.
976 Also, the index of the last record in each group of equal records is saved
977 in n_diff_boundaries[0..n_uniq - 1], records indexing starts from the leftmost
978 record on the level and continues cross pages boundaries, counting from 0. */
979 static
980 void
dict_stats_analyze_index_level(dict_index_t * index,ulint level,ib_uint64_t * n_diff,ib_uint64_t * total_recs,ib_uint64_t * total_pages,boundaries_t * n_diff_boundaries,mtr_t * mtr)981 dict_stats_analyze_index_level(
982 /*===========================*/
983 	dict_index_t*	index,		/*!< in: index */
984 	ulint		level,		/*!< in: level */
985 	ib_uint64_t*	n_diff,		/*!< out: array for number of
986 					distinct keys for all prefixes */
987 	ib_uint64_t*	total_recs,	/*!< out: total number of records */
988 	ib_uint64_t*	total_pages,	/*!< out: total number of pages */
989 	boundaries_t*	n_diff_boundaries,/*!< out: boundaries of the groups
990 					of distinct keys */
991 	mtr_t*		mtr)		/*!< in/out: mini-transaction */
992 {
993 	ulint		n_uniq;
994 	mem_heap_t*	heap;
995 	btr_pcur_t	pcur;
996 	const page_t*	page;
997 	const rec_t*	rec;
998 	const rec_t*	prev_rec;
999 	bool		prev_rec_is_copied;
1000 	byte*		prev_rec_buf = NULL;
1001 	ulint		prev_rec_buf_size = 0;
1002 	ulint*		rec_offsets;
1003 	ulint*		prev_rec_offsets;
1004 	ulint		i;
1005 
1006 	DEBUG_PRINTF("    %s(table=%s, index=%s, level=%lu)\n", __func__,
1007 		     index->table->name, index->name, level);
1008 
1009 	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
1010 				MTR_MEMO_SX_LOCK));
1011 
1012 	n_uniq = dict_index_get_n_unique(index);
1013 
1014 	/* elements in the n_diff array are 0..n_uniq-1 (inclusive) */
1015 	memset(n_diff, 0x0, n_uniq * sizeof(n_diff[0]));
1016 
1017 	/* Allocate space for the offsets header (the allocation size at
1018 	offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_fields + 1,
1019 	so that this will never be less than the size calculated in
1020 	rec_get_offsets_func(). */
1021 	i = (REC_OFFS_HEADER_SIZE + 1 + 1) + index->n_fields;
1022 
1023 	heap = mem_heap_create((2 * sizeof *rec_offsets) * i);
1024 	rec_offsets = static_cast<ulint*>(
1025 		mem_heap_alloc(heap, i * sizeof *rec_offsets));
1026 	prev_rec_offsets = static_cast<ulint*>(
1027 		mem_heap_alloc(heap, i * sizeof *prev_rec_offsets));
1028 	rec_offs_set_n_alloc(rec_offsets, i);
1029 	rec_offs_set_n_alloc(prev_rec_offsets, i);
1030 
1031 	/* reset the dynamic arrays n_diff_boundaries[0..n_uniq-1] */
1032 	if (n_diff_boundaries != NULL) {
1033 		for (i = 0; i < n_uniq; i++) {
1034 			n_diff_boundaries[i].erase(
1035 				n_diff_boundaries[i].begin(),
1036 				n_diff_boundaries[i].end());
1037 		}
1038 	}
1039 
1040 	/* Position pcur on the leftmost record on the leftmost page
1041 	on the desired level. */
1042 
1043 	btr_pcur_open_at_index_side(
1044 		true, index, BTR_SEARCH_TREE | BTR_ALREADY_S_LATCHED,
1045 		&pcur, true, level, mtr);
1046 	btr_pcur_move_to_next_on_page(&pcur);
1047 
1048 	page = btr_pcur_get_page(&pcur);
1049 
1050 	/* The page must not be empty, except when
1051 	it is the root page (and the whole index is empty). */
1052 	ut_ad(btr_pcur_is_on_user_rec(&pcur) || page_is_leaf(page));
1053 	ut_ad(btr_pcur_get_rec(&pcur)
1054 	      == page_rec_get_next_const(page_get_infimum_rec(page)));
1055 
1056 	/* check that we are indeed on the desired level */
1057 	ut_a(btr_page_get_level(page, mtr) == level);
1058 
1059 	/* there should not be any pages on the left */
1060 	ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
1061 
1062 	/* check whether the first record on the leftmost page is marked
1063 	as such, if we are on a non-leaf level */
1064 	ut_a((level == 0)
1065 	     == !(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
1066 			  btr_pcur_get_rec(&pcur), page_is_comp(page))));
1067 
1068 	prev_rec = NULL;
1069 	prev_rec_is_copied = false;
1070 
1071 	/* no records by default */
1072 	*total_recs = 0;
1073 
1074 	*total_pages = 0;
1075 
1076 	/* iterate over all user records on this level
1077 	and compare each two adjacent ones, even the last on page
1078 	X and the fist on page X+1 */
1079 	for (;
1080 	     btr_pcur_is_on_user_rec(&pcur);
1081 	     btr_pcur_move_to_next_user_rec(&pcur, mtr)) {
1082 
1083 		bool	rec_is_last_on_page;
1084 
1085 		rec = btr_pcur_get_rec(&pcur);
1086 
1087 		/* If rec and prev_rec are on different pages, then prev_rec
1088 		must have been copied, because we hold latch only on the page
1089 		where rec resides. */
1090 		if (prev_rec != NULL
1091 		    && page_align(rec) != page_align(prev_rec)) {
1092 
1093 			ut_a(prev_rec_is_copied);
1094 		}
1095 
1096 		rec_is_last_on_page =
1097 			page_rec_is_supremum(page_rec_get_next_const(rec));
1098 
1099 		/* increment the pages counter at the end of each page */
1100 		if (rec_is_last_on_page) {
1101 
1102 			(*total_pages)++;
1103 		}
1104 
1105 		/* Skip delete-marked records on the leaf level. If we
1106 		do not skip them, then ANALYZE quickly after DELETE
1107 		could count them or not (purge may have already wiped
1108 		them away) which brings non-determinism. We skip only
1109 		leaf-level delete marks because delete marks on
1110 		non-leaf level do not make sense. */
1111 
1112 		if (level == 0 && (srv_stats_include_delete_marked ? 0:
1113 		    rec_get_deleted_flag(
1114 			    rec,
1115 			    page_is_comp(btr_pcur_get_page(&pcur))))) {
1116 
1117 			if (rec_is_last_on_page
1118 			    && !prev_rec_is_copied
1119 			    && prev_rec != NULL) {
1120 				/* copy prev_rec */
1121 
1122 				prev_rec_offsets = rec_get_offsets(
1123 					prev_rec, index, prev_rec_offsets,
1124 					n_uniq, &heap);
1125 
1126 				prev_rec = rec_copy_prefix_to_buf(
1127 					prev_rec, index,
1128 					rec_offs_n_fields(prev_rec_offsets),
1129 					&prev_rec_buf, &prev_rec_buf_size);
1130 
1131 				prev_rec_is_copied = true;
1132 			}
1133 
1134 			continue;
1135 		}
1136 		rec_offsets = rec_get_offsets(
1137 			rec, index, rec_offsets, n_uniq, &heap);
1138 
1139 		(*total_recs)++;
1140 
1141 		if (prev_rec != NULL) {
1142 			ulint	matched_fields;
1143 
1144 			prev_rec_offsets = rec_get_offsets(
1145 				prev_rec, index, prev_rec_offsets,
1146 				n_uniq, &heap);
1147 
1148 			cmp_rec_rec_with_match(rec,
1149 					       prev_rec,
1150 					       rec_offsets,
1151 					       prev_rec_offsets,
1152 					       index,
1153 					       false,
1154 					       false,
1155 					       &matched_fields);
1156 
1157 			for (i = matched_fields; i < n_uniq; i++) {
1158 
1159 				if (n_diff_boundaries != NULL) {
1160 					/* push the index of the previous
1161 					record, that is - the last one from
1162 					a group of equal keys */
1163 
1164 					ib_uint64_t	idx;
1165 
1166 					/* the index of the current record
1167 					is total_recs - 1, the index of the
1168 					previous record is total_recs - 2;
1169 					we know that idx is not going to
1170 					become negative here because if we
1171 					are in this branch then there is a
1172 					previous record and thus
1173 					total_recs >= 2 */
1174 					idx = *total_recs - 2;
1175 
1176 					n_diff_boundaries[i].push_back(idx);
1177 				}
1178 
1179 				/* increment the number of different keys
1180 				for n_prefix=i+1 (e.g. if i=0 then we increment
1181 				for n_prefix=1 which is stored in n_diff[0]) */
1182 				n_diff[i]++;
1183 			}
1184 		} else {
1185 			/* this is the first non-delete marked record */
1186 			for (i = 0; i < n_uniq; i++) {
1187 				n_diff[i] = 1;
1188 			}
1189 		}
1190 
1191 		if (rec_is_last_on_page) {
1192 			/* end of a page has been reached */
1193 
1194 			/* we need to copy the record instead of assigning
1195 			like prev_rec = rec; because when we traverse the
1196 			records on this level at some point we will jump from
1197 			one page to the next and then rec and prev_rec will
1198 			be on different pages and
1199 			btr_pcur_move_to_next_user_rec() will release the
1200 			latch on the page that prev_rec is on */
1201 			prev_rec = rec_copy_prefix_to_buf(
1202 				rec, index, rec_offs_n_fields(rec_offsets),
1203 				&prev_rec_buf, &prev_rec_buf_size);
1204 			prev_rec_is_copied = true;
1205 
1206 		} else {
1207 			/* still on the same page, the next call to
1208 			btr_pcur_move_to_next_user_rec() will not jump
1209 			on the next page, we can simply assign pointers
1210 			instead of copying the records like above */
1211 
1212 			prev_rec = rec;
1213 			prev_rec_is_copied = false;
1214 		}
1215 	}
1216 
1217 	/* if *total_pages is left untouched then the above loop was not
1218 	entered at all and there is one page in the whole tree which is
1219 	empty or the loop was entered but this is level 0, contains one page
1220 	and all records are delete-marked */
1221 	if (*total_pages == 0) {
1222 
1223 		ut_ad(level == 0);
1224 		ut_ad(*total_recs == 0);
1225 
1226 		*total_pages = 1;
1227 	}
1228 
1229 	/* if there are records on this level and boundaries
1230 	should be saved */
1231 	if (*total_recs > 0 && n_diff_boundaries != NULL) {
1232 
1233 		/* remember the index of the last record on the level as the
1234 		last one from the last group of equal keys; this holds for
1235 		all possible prefixes */
1236 		for (i = 0; i < n_uniq; i++) {
1237 			ib_uint64_t	idx;
1238 
1239 			idx = *total_recs - 1;
1240 
1241 			n_diff_boundaries[i].push_back(idx);
1242 		}
1243 	}
1244 
1245 	/* now in n_diff_boundaries[i] there are exactly n_diff[i] integers,
1246 	for i=0..n_uniq-1 */
1247 
1248 #ifdef UNIV_STATS_DEBUG
1249 	for (i = 0; i < n_uniq; i++) {
1250 
1251 		DEBUG_PRINTF("    %s(): total recs: " UINT64PF
1252 			     ", total pages: " UINT64PF
1253 			     ", n_diff[%lu]: " UINT64PF "\n",
1254 			     __func__, *total_recs,
1255 			     *total_pages,
1256 			     i, n_diff[i]);
1257 
1258 #if 0
1259 		if (n_diff_boundaries != NULL) {
1260 			ib_uint64_t	j;
1261 
1262 			DEBUG_PRINTF("    %s(): boundaries[%lu]: ",
1263 				     __func__, i);
1264 
1265 			for (j = 0; j < n_diff[i]; j++) {
1266 				ib_uint64_t	idx;
1267 
1268 				idx = n_diff_boundaries[i][j];
1269 
1270 				DEBUG_PRINTF(UINT64PF "=" UINT64PF ", ",
1271 					     j, idx);
1272 			}
1273 			DEBUG_PRINTF("\n");
1274 		}
1275 #endif
1276 	}
1277 #endif /* UNIV_STATS_DEBUG */
1278 
1279 	/* Release the latch on the last page, because that is not done by
1280 	btr_pcur_close(). This function works also for non-leaf pages. */
1281 	btr_leaf_page_release(btr_pcur_get_block(&pcur), BTR_SEARCH_LEAF, mtr);
1282 
1283 	btr_pcur_close(&pcur);
1284 	ut_free(prev_rec_buf);
1285 	mem_heap_free(heap);
1286 }
1287 
1288 /* aux enum for controlling the behavior of dict_stats_scan_page() @{ */
1289 enum page_scan_method_t {
1290 	COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED,/* scan all records on
1291 				the given page and count the number of
1292 				distinct ones, also ignore delete marked
1293 				records */
1294 	QUIT_ON_FIRST_NON_BORING,/* quit when the first record that differs
1295 				from its right neighbor is found */
1296 	COUNT_ALL_NON_BORING_INCLUDE_DEL_MARKED/* scan all records on
1297 				the given page and count the number of
1298 				distinct ones, include delete marked
1299 				records */
1300 };
1301 /* @} */
1302 
1303 /** Scan a page, reading records from left to right and counting the number
1304 of distinct records (looking only at the first n_prefix
1305 columns) and the number of external pages pointed by records from this page.
1306 If scan_method is QUIT_ON_FIRST_NON_BORING then the function
1307 will return as soon as it finds a record that does not match its neighbor
1308 to the right, which means that in the case of QUIT_ON_FIRST_NON_BORING the
1309 returned n_diff can either be 0 (empty page), 1 (the whole page has all keys
1310 equal) or 2 (the function found a non-boring record and returned).
1311 @param[out]	out_rec			record, or NULL
1312 @param[out]	offsets1		rec_get_offsets() working space (must
1313 be big enough)
1314 @param[out]	offsets2		rec_get_offsets() working space (must
1315 be big enough)
1316 @param[in]	index			index of the page
1317 @param[in]	page			the page to scan
1318 @param[in]	n_prefix		look at the first n_prefix columns
1319 @param[in]	scan_method		scan to the end of the page or not
1320 @param[out]	n_diff			number of distinct records encountered
1321 @param[out]	n_external_pages	if this is non-NULL then it will be set
1322 to the number of externally stored pages which were encountered
1323 @return offsets1 or offsets2 (the offsets of *out_rec),
1324 or NULL if the page is empty and does not contain user records. */
1325 UNIV_INLINE
1326 ulint*
dict_stats_scan_page(const rec_t ** out_rec,ulint * offsets1,ulint * offsets2,const dict_index_t * index,const page_t * page,ulint n_prefix,page_scan_method_t scan_method,ib_uint64_t * n_diff,ib_uint64_t * n_external_pages)1327 dict_stats_scan_page(
1328 	const rec_t**		out_rec,
1329 	ulint*			offsets1,
1330 	ulint*			offsets2,
1331 	const dict_index_t*	index,
1332 	const page_t*		page,
1333 	ulint			n_prefix,
1334 	page_scan_method_t	scan_method,
1335 	ib_uint64_t*		n_diff,
1336 	ib_uint64_t*		n_external_pages)
1337 {
1338 	ulint*		offsets_rec		= offsets1;
1339 	ulint*		offsets_next_rec	= offsets2;
1340 	const rec_t*	rec;
1341 	const rec_t*	next_rec;
1342 	/* A dummy heap, to be passed to rec_get_offsets().
1343 	Because offsets1,offsets2 should be big enough,
1344 	this memory heap should never be used. */
1345 	mem_heap_t*	heap			= NULL;
1346 	const rec_t*	(*get_next)(const rec_t*);
1347 
1348 	if (scan_method == COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED) {
1349 		get_next = page_rec_get_next_non_del_marked;
1350 	} else {
1351 		get_next = page_rec_get_next_const;
1352 	}
1353 
1354 	const bool	should_count_external_pages = n_external_pages != NULL;
1355 
1356 	if (should_count_external_pages) {
1357 		*n_external_pages = 0;
1358 	}
1359 
1360 	rec = get_next(page_get_infimum_rec(page));
1361 
1362 	if (page_rec_is_supremum(rec)) {
1363 		/* the page is empty or contains only delete-marked records */
1364 		*n_diff = 0;
1365 		*out_rec = NULL;
1366 		return(NULL);
1367 	}
1368 
1369 	offsets_rec = rec_get_offsets(rec, index, offsets_rec,
1370 				      ULINT_UNDEFINED, &heap);
1371 
1372 	if (should_count_external_pages) {
1373 		*n_external_pages += btr_rec_get_externally_stored_len(
1374 			rec, offsets_rec);
1375 	}
1376 
1377 	next_rec = get_next(rec);
1378 
1379 	*n_diff = 1;
1380 
1381 	while (!page_rec_is_supremum(next_rec)) {
1382 
1383 		ulint	matched_fields;
1384 
1385 		offsets_next_rec = rec_get_offsets(next_rec, index,
1386 						   offsets_next_rec,
1387 						   ULINT_UNDEFINED,
1388 						   &heap);
1389 
1390 		/* check whether rec != next_rec when looking at
1391 		the first n_prefix fields */
1392 		cmp_rec_rec_with_match(rec, next_rec,
1393 				       offsets_rec, offsets_next_rec,
1394 				       index, false, false, &matched_fields);
1395 
1396 		if (matched_fields < n_prefix) {
1397 			/* rec != next_rec, => rec is non-boring */
1398 
1399 			(*n_diff)++;
1400 
1401 			if (scan_method == QUIT_ON_FIRST_NON_BORING) {
1402 				break;
1403 			}
1404 		}
1405 
1406 		rec = next_rec;
1407 		{
1408 			/* Assign offsets_rec = offsets_next_rec
1409 			so that offsets_rec matches with rec which
1410 			was just assigned rec = next_rec above.
1411 			Also need to point offsets_next_rec to the
1412 			place where offsets_rec was pointing before
1413 			because we have just 2 placeholders where
1414 			data is actually stored:
1415 			offsets1 and offsets2 and we
1416 			are using them in circular fashion
1417 			(offsets[_next]_rec are just pointers to
1418 			those placeholders). */
1419 			ulint*	offsets_tmp;
1420 			offsets_tmp = offsets_rec;
1421 			offsets_rec = offsets_next_rec;
1422 			offsets_next_rec = offsets_tmp;
1423 		}
1424 
1425 		if (should_count_external_pages) {
1426 			*n_external_pages += btr_rec_get_externally_stored_len(
1427 				rec, offsets_rec);
1428 		}
1429 
1430 		next_rec = get_next(next_rec);
1431 	}
1432 
1433 	/* offsets1,offsets2 should have been big enough */
1434 	ut_a(heap == NULL);
1435 	*out_rec = rec;
1436 	return(offsets_rec);
1437 }
1438 
1439 /** Dive below the current position of a cursor and calculate the number of
1440 distinct records on the leaf page, when looking at the fist n_prefix
1441 columns. Also calculate the number of external pages pointed by records
1442 on the leaf page.
1443 @param[in]	cur			cursor
1444 @param[in]	n_prefix		look at the first n_prefix columns
1445 when comparing records
1446 @param[out]	n_diff			number of distinct records
1447 @param[out]	n_external_pages	number of external pages
1448 @return number of distinct records on the leaf page */
1449 static
1450 void
dict_stats_analyze_index_below_cur(const btr_cur_t * cur,ulint n_prefix,ib_uint64_t * n_diff,ib_uint64_t * n_external_pages)1451 dict_stats_analyze_index_below_cur(
1452 	const btr_cur_t*	cur,
1453 	ulint			n_prefix,
1454 	ib_uint64_t*		n_diff,
1455 	ib_uint64_t*		n_external_pages)
1456 {
1457 	dict_index_t*	index;
1458 	buf_block_t*	block;
1459 	const page_t*	page;
1460 	mem_heap_t*	heap;
1461 	const rec_t*	rec;
1462 	ulint*		offsets1;
1463 	ulint*		offsets2;
1464 	ulint*		offsets_rec;
1465 	ulint		size;
1466 	mtr_t		mtr;
1467 
1468 	index = btr_cur_get_index(cur);
1469 
1470 	/* Allocate offsets for the record and the node pointer, for
1471 	node pointer records. In a secondary index, the node pointer
1472 	record will consist of all index fields followed by a child
1473 	page number.
1474 	Allocate space for the offsets header (the allocation size at
1475 	offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_fields + 1,
1476 	so that this will never be less than the size calculated in
1477 	rec_get_offsets_func(). */
1478 	size = (1 + REC_OFFS_HEADER_SIZE) + 1 + dict_index_get_n_fields(index);
1479 
1480 	heap = mem_heap_create(size * (sizeof *offsets1 + sizeof *offsets2));
1481 
1482 	offsets1 = static_cast<ulint*>(mem_heap_alloc(
1483 			heap, size * sizeof *offsets1));
1484 
1485 	offsets2 = static_cast<ulint*>(mem_heap_alloc(
1486 			heap, size * sizeof *offsets2));
1487 
1488 	rec_offs_set_n_alloc(offsets1, size);
1489 	rec_offs_set_n_alloc(offsets2, size);
1490 
1491 	rec = btr_cur_get_rec(cur);
1492 
1493 	offsets_rec = rec_get_offsets(rec, index, offsets1,
1494 				      ULINT_UNDEFINED, &heap);
1495 
1496 	page_id_t		page_id(dict_index_get_space(index),
1497 					btr_node_ptr_get_child_page_no(
1498 						rec, offsets_rec));
1499 	const page_size_t	page_size(dict_table_page_size(index->table));
1500 
1501 	/* assume no external pages by default - in case we quit from this
1502 	function without analyzing any leaf pages */
1503 	*n_external_pages = 0;
1504 
1505 	mtr_start(&mtr);
1506 
1507 	/* descend to the leaf level on the B-tree */
1508 	for (;;) {
1509 
1510 		block = buf_page_get_gen(page_id, page_size, RW_S_LATCH,
1511 					 NULL /* no guessed block */,
1512 					 BUF_GET, __FILE__, __LINE__, &mtr);
1513 
1514 		page = buf_block_get_frame(block);
1515 
1516 		if (btr_page_get_level(page, mtr) == 0) {
1517 			/* leaf level */
1518 			break;
1519 		}
1520 		/* else */
1521 
1522 		/* search for the first non-boring record on the page */
1523 		offsets_rec = dict_stats_scan_page(
1524 			&rec, offsets1, offsets2, index, page, n_prefix,
1525 			QUIT_ON_FIRST_NON_BORING, n_diff, NULL);
1526 
1527 		/* pages on level > 0 are not allowed to be empty */
1528 		ut_a(offsets_rec != NULL);
1529 		/* if page is not empty (offsets_rec != NULL) then n_diff must
1530 		be > 0, otherwise there is a bug in dict_stats_scan_page() */
1531 		ut_a(*n_diff > 0);
1532 
1533 		if (*n_diff == 1) {
1534 			mtr_commit(&mtr);
1535 
1536 			/* page has all keys equal and the end of the page
1537 			was reached by dict_stats_scan_page(), no need to
1538 			descend to the leaf level */
1539 			mem_heap_free(heap);
1540 			/* can't get an estimate for n_external_pages here
1541 			because we do not dive to the leaf level, assume no
1542 			external pages (*n_external_pages was assigned to 0
1543 			above). */
1544 			return;
1545 		}
1546 		/* else */
1547 
1548 		/* when we instruct dict_stats_scan_page() to quit on the
1549 		first non-boring record it finds, then the returned n_diff
1550 		can either be 0 (empty page), 1 (page has all keys equal) or
1551 		2 (non-boring record was found) */
1552 		ut_a(*n_diff == 2);
1553 
1554 		/* we have a non-boring record in rec, descend below it */
1555 
1556 		page_id.set_page_no(
1557 			btr_node_ptr_get_child_page_no(rec, offsets_rec));
1558 	}
1559 
1560 	/* make sure we got a leaf page as a result from the above loop */
1561 	ut_ad(btr_page_get_level(page, &mtr) == 0);
1562 
1563 	/* scan the leaf page and find the number of distinct keys,
1564 	when looking only at the first n_prefix columns; also estimate
1565 	the number of externally stored pages pointed by records on this
1566 	page */
1567 
1568 	offsets_rec = dict_stats_scan_page(
1569 		&rec, offsets1, offsets2, index, page, n_prefix,
1570 		srv_stats_include_delete_marked ?
1571 		COUNT_ALL_NON_BORING_INCLUDE_DEL_MARKED:
1572 		COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED, n_diff,
1573 		n_external_pages);
1574 
1575 #if 0
1576 	DEBUG_PRINTF("      %s(): n_diff below page_no=%lu: " UINT64PF "\n",
1577 		     __func__, page_no, n_diff);
1578 #endif
1579 
1580 	mtr_commit(&mtr);
1581 	mem_heap_free(heap);
1582 }
1583 
1584 /** Input data that is used to calculate dict_index_t::stat_n_diff_key_vals[]
1585 for each n-columns prefix (n from 1 to n_uniq). */
1586 struct n_diff_data_t {
1587 	/** Index of the level on which the descent through the btree
1588 	stopped. level 0 is the leaf level. This is >= 1 because we
1589 	avoid scanning the leaf level because it may contain too many
1590 	pages and doing so is useless when combined with the random dives -
1591 	if we are to scan the leaf level, this means a full scan and we can
1592 	simply do that instead of fiddling with picking random records higher
1593 	in the tree and to dive below them. At the start of the analyzing
1594 	we may decide to do full scan of the leaf level, but then this
1595 	structure is not used in that code path. */
1596 	ulint		level;
1597 
1598 	/** Number of records on the level where the descend through the btree
1599 	stopped. When we scan the btree from the root, we stop at some mid
1600 	level, choose some records from it and dive below them towards a leaf
1601 	page to analyze. */
1602 	ib_uint64_t	n_recs_on_level;
1603 
1604 	/** Number of different key values that were found on the mid level. */
1605 	ib_uint64_t	n_diff_on_level;
1606 
1607 	/** Number of leaf pages that are analyzed. This is also the same as
1608 	the number of records that we pick from the mid level and dive below
1609 	them. */
1610 	ib_uint64_t	n_leaf_pages_to_analyze;
1611 
1612 	/** Cumulative sum of the number of different key values that were
1613 	found on all analyzed pages. */
1614 	ib_uint64_t	n_diff_all_analyzed_pages;
1615 
1616 	/** Cumulative sum of the number of external pages (stored outside of
1617 	the btree but in the same file segment). */
1618 	ib_uint64_t	n_external_pages_sum;
1619 };
1620 
1621 /** Estimate the number of different key values in an index when looking at
1622 the first n_prefix columns. For a given level in an index select
1623 n_diff_data->n_leaf_pages_to_analyze records from that level and dive below
1624 them to the corresponding leaf pages, then scan those leaf pages and save the
1625 sampling results in n_diff_data->n_diff_all_analyzed_pages.
1626 @param[in]	index			index
1627 @param[in]	n_prefix		look at first 'n_prefix' columns when
1628 comparing records
1629 @param[in]	boundaries		a vector that contains
1630 n_diff_data->n_diff_on_level integers each of which represents the index (on
1631 level 'level', counting from left/smallest to right/biggest from 0) of the
1632 last record from each group of distinct keys
1633 @param[in,out]	n_diff_data		n_diff_all_analyzed_pages and
1634 n_external_pages_sum in this structure will be set by this function. The
1635 members level, n_diff_on_level and n_leaf_pages_to_analyze must be set by the
1636 caller in advance - they are used by some calculations inside this function
1637 @param[in,out]	mtr			mini-transaction */
1638 static
1639 void
dict_stats_analyze_index_for_n_prefix(dict_index_t * index,ulint n_prefix,const boundaries_t * boundaries,n_diff_data_t * n_diff_data,mtr_t * mtr)1640 dict_stats_analyze_index_for_n_prefix(
1641 	dict_index_t*		index,
1642 	ulint			n_prefix,
1643 	const boundaries_t*	boundaries,
1644 	n_diff_data_t*		n_diff_data,
1645 	mtr_t*			mtr)
1646 {
1647 	btr_pcur_t	pcur;
1648 	const page_t*	page;
1649 	ib_uint64_t	rec_idx;
1650 	ib_uint64_t	i;
1651 
1652 #if 0
1653 	DEBUG_PRINTF("    %s(table=%s, index=%s, level=%lu, n_prefix=%lu,"
1654 		     " n_diff_on_level=" UINT64PF ")\n",
1655 		     __func__, index->table->name, index->name, level,
1656 		     n_prefix, n_diff_data->n_diff_on_level);
1657 #endif
1658 
1659 	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
1660 				MTR_MEMO_SX_LOCK));
1661 
1662 	/* Position pcur on the leftmost record on the leftmost page
1663 	on the desired level. */
1664 
1665 	btr_pcur_open_at_index_side(
1666 		true, index, BTR_SEARCH_TREE | BTR_ALREADY_S_LATCHED,
1667 		&pcur, true, n_diff_data->level, mtr);
1668 	btr_pcur_move_to_next_on_page(&pcur);
1669 
1670 	page = btr_pcur_get_page(&pcur);
1671 
1672 	const rec_t*	first_rec = btr_pcur_get_rec(&pcur);
1673 
1674 	/* We shouldn't be scanning the leaf level. The caller of this function
1675 	should have stopped the descend on level 1 or higher. */
1676 	ut_ad(n_diff_data->level > 0);
1677 	ut_ad(!page_is_leaf(page));
1678 
1679 	/* The page must not be empty, except when
1680 	it is the root page (and the whole index is empty). */
1681 	ut_ad(btr_pcur_is_on_user_rec(&pcur));
1682 	ut_ad(first_rec == page_rec_get_next_const(page_get_infimum_rec(page)));
1683 
1684 	/* check that we are indeed on the desired level */
1685 	ut_a(btr_page_get_level(page, mtr) == n_diff_data->level);
1686 
1687 	/* there should not be any pages on the left */
1688 	ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
1689 
1690 	/* check whether the first record on the leftmost page is marked
1691 	as such; we are on a non-leaf level */
1692 	ut_a(rec_get_info_bits(first_rec, page_is_comp(page))
1693 	     & REC_INFO_MIN_REC_FLAG);
1694 
1695 	const ib_uint64_t	last_idx_on_level = boundaries->at(
1696 		static_cast<unsigned>(n_diff_data->n_diff_on_level - 1));
1697 
1698 	rec_idx = 0;
1699 
1700 	n_diff_data->n_diff_all_analyzed_pages = 0;
1701 	n_diff_data->n_external_pages_sum = 0;
1702 
1703 	for (i = 0; i < n_diff_data->n_leaf_pages_to_analyze; i++) {
1704 		/* there are n_diff_on_level elements
1705 		in 'boundaries' and we divide those elements
1706 		into n_leaf_pages_to_analyze segments, for example:
1707 
1708 		let n_diff_on_level=100, n_leaf_pages_to_analyze=4, then:
1709 		segment i=0:  [0, 24]
1710 		segment i=1: [25, 49]
1711 		segment i=2: [50, 74]
1712 		segment i=3: [75, 99] or
1713 
1714 		let n_diff_on_level=1, n_leaf_pages_to_analyze=1, then:
1715 		segment i=0: [0, 0] or
1716 
1717 		let n_diff_on_level=2, n_leaf_pages_to_analyze=2, then:
1718 		segment i=0: [0, 0]
1719 		segment i=1: [1, 1] or
1720 
1721 		let n_diff_on_level=13, n_leaf_pages_to_analyze=7, then:
1722 		segment i=0:  [0,  0]
1723 		segment i=1:  [1,  2]
1724 		segment i=2:  [3,  4]
1725 		segment i=3:  [5,  6]
1726 		segment i=4:  [7,  8]
1727 		segment i=5:  [9, 10]
1728 		segment i=6: [11, 12]
1729 
1730 		then we select a random record from each segment and dive
1731 		below it */
1732 		const ib_uint64_t	n_diff = n_diff_data->n_diff_on_level;
1733 		const ib_uint64_t	n_pick
1734 			= n_diff_data->n_leaf_pages_to_analyze;
1735 
1736 		const ib_uint64_t	left = n_diff * i / n_pick;
1737 		const ib_uint64_t	right = n_diff * (i + 1) / n_pick - 1;
1738 
1739 		ut_a(left <= right);
1740 		ut_a(right <= last_idx_on_level);
1741 
1742 		/* we do not pass (left, right) because we do not want to ask
1743 		ut_rnd_interval() to work with too big numbers since
1744 		ib_uint64_t could be bigger than ulint */
1745 		const ulint	rnd = ut_rnd_interval(
1746 			0, static_cast<ulint>(right - left));
1747 
1748 		const ib_uint64_t	dive_below_idx
1749 			= boundaries->at(static_cast<unsigned>(left + rnd));
1750 
1751 #if 0
1752 		DEBUG_PRINTF("    %s(): dive below record with index="
1753 			     UINT64PF "\n", __func__, dive_below_idx);
1754 #endif
1755 
1756 		/* seek to the record with index dive_below_idx */
1757 		while (rec_idx < dive_below_idx
1758 		       && btr_pcur_is_on_user_rec(&pcur)) {
1759 
1760 			btr_pcur_move_to_next_user_rec(&pcur, mtr);
1761 			rec_idx++;
1762 		}
1763 
1764 		/* if the level has finished before the record we are
1765 		searching for, this means that the B-tree has changed in
1766 		the meantime, quit our sampling and use whatever stats
1767 		we have collected so far */
1768 		if (rec_idx < dive_below_idx) {
1769 
1770 			ut_ad(!btr_pcur_is_on_user_rec(&pcur));
1771 			break;
1772 		}
1773 
1774 		/* it could be that the tree has changed in such a way that
1775 		the record under dive_below_idx is the supremum record, in
1776 		this case rec_idx == dive_below_idx and pcur is positioned
1777 		on the supremum, we do not want to dive below it */
1778 		if (!btr_pcur_is_on_user_rec(&pcur)) {
1779 			break;
1780 		}
1781 
1782 		ut_a(rec_idx == dive_below_idx);
1783 
1784 		ib_uint64_t	n_diff_on_leaf_page;
1785 		ib_uint64_t	n_external_pages;
1786 
1787 		dict_stats_analyze_index_below_cur(btr_pcur_get_btr_cur(&pcur),
1788 						   n_prefix,
1789 						   &n_diff_on_leaf_page,
1790 						   &n_external_pages);
1791 
1792 		/* We adjust n_diff_on_leaf_page here to avoid counting
1793 		one value twice - once as the last on some page and once
1794 		as the first on another page. Consider the following example:
1795 		Leaf level:
1796 		page: (2,2,2,2,3,3)
1797 		... many pages like (3,3,3,3,3,3) ...
1798 		page: (3,3,3,3,5,5)
1799 		... many pages like (5,5,5,5,5,5) ...
1800 		page: (5,5,5,5,8,8)
1801 		page: (8,8,8,8,9,9)
1802 		our algo would (correctly) get an estimate that there are
1803 		2 distinct records per page (average). Having 4 pages below
1804 		non-boring records, it would (wrongly) estimate the number
1805 		of distinct records to 8. */
1806 		if (n_diff_on_leaf_page > 0) {
1807 			n_diff_on_leaf_page--;
1808 		}
1809 
1810 		n_diff_data->n_diff_all_analyzed_pages += n_diff_on_leaf_page;
1811 
1812 		n_diff_data->n_external_pages_sum += n_external_pages;
1813 	}
1814 
1815 	btr_pcur_close(&pcur);
1816 }
1817 
1818 /** Set dict_index_t::stat_n_diff_key_vals[] and stat_n_sample_sizes[].
1819 @param[in]	n_diff_data	input data to use to derive the results
1820 @param[in,out]	index		index whose stat_n_diff_key_vals[] to set */
1821 UNIV_INLINE
1822 void
dict_stats_index_set_n_diff(const n_diff_data_t * n_diff_data,dict_index_t * index)1823 dict_stats_index_set_n_diff(
1824 	const n_diff_data_t*	n_diff_data,
1825 	dict_index_t*		index)
1826 {
1827 	for (ulint n_prefix = dict_index_get_n_unique(index);
1828 	     n_prefix >= 1;
1829 	     n_prefix--) {
1830 		/* n_diff_all_analyzed_pages can be 0 here if
1831 		all the leaf pages sampled contained only
1832 		delete-marked records. In this case we should assign
1833 		0 to index->stat_n_diff_key_vals[n_prefix - 1], which
1834 		the formula below does. */
1835 
1836 		const n_diff_data_t*	data = &n_diff_data[n_prefix - 1];
1837 
1838 		ut_ad(data->n_leaf_pages_to_analyze > 0);
1839 		ut_ad(data->n_recs_on_level > 0);
1840 
1841 		ib_uint64_t	n_ordinary_leaf_pages;
1842 
1843 		if (data->level == 1) {
1844 			/* If we know the number of records on level 1, then
1845 			this number is the same as the number of pages on
1846 			level 0 (leaf). */
1847 			n_ordinary_leaf_pages = data->n_recs_on_level;
1848 		} else {
1849 			/* If we analyzed D ordinary leaf pages and found E
1850 			external pages in total linked from those D ordinary
1851 			leaf pages, then this means that the ratio
1852 			ordinary/external is D/E. Then the ratio ordinary/total
1853 			is D / (D + E). Knowing that the total number of pages
1854 			is T (including ordinary and external) then we estimate
1855 			that the total number of ordinary leaf pages is
1856 			T * D / (D + E). */
1857 			n_ordinary_leaf_pages
1858 				= index->stat_n_leaf_pages
1859 				* data->n_leaf_pages_to_analyze
1860 				/ (data->n_leaf_pages_to_analyze
1861 				   + data->n_external_pages_sum);
1862 		}
1863 
1864 		/* See REF01 for an explanation of the algorithm */
1865 		index->stat_n_diff_key_vals[n_prefix - 1]
1866 			= n_ordinary_leaf_pages
1867 
1868 			* data->n_diff_on_level
1869 			/ data->n_recs_on_level
1870 
1871 			* data->n_diff_all_analyzed_pages
1872 			/ data->n_leaf_pages_to_analyze;
1873 
1874 		index->stat_n_sample_sizes[n_prefix - 1]
1875 			= data->n_leaf_pages_to_analyze;
1876 
1877 		DEBUG_PRINTF("    %s(): n_diff=" UINT64PF " for n_prefix=%lu"
1878 			     " (%lu"
1879 			     " * " UINT64PF " / " UINT64PF
1880 			     " * " UINT64PF " / " UINT64PF ")\n",
1881 			     __func__,
1882 			     index->stat_n_diff_key_vals[n_prefix - 1],
1883 			     n_prefix,
1884 			     index->stat_n_leaf_pages,
1885 			     data->n_diff_on_level,
1886 			     data->n_recs_on_level,
1887 			     data->n_diff_all_analyzed_pages,
1888 			     data->n_leaf_pages_to_analyze);
1889 	}
1890 }
1891 
1892 /*********************************************************************//**
1893 Calculates new statistics for a given index and saves them to the index
1894 members stat_n_diff_key_vals[], stat_n_sample_sizes[], stat_index_size and
1895 stat_n_leaf_pages. This function could be slow. */
1896 static
1897 void
dict_stats_analyze_index(dict_index_t * index)1898 dict_stats_analyze_index(
1899 /*=====================*/
1900 	dict_index_t*	index)	/*!< in/out: index to analyze */
1901 {
1902 	ulint		root_level;
1903 	ulint		level;
1904 	bool		level_is_analyzed;
1905 	ulint		n_uniq;
1906 	ulint		n_prefix;
1907 	ib_uint64_t	total_recs;
1908 	ib_uint64_t	total_pages;
1909 	mtr_t		mtr;
1910 	ulint		size;
1911 	DBUG_ENTER("dict_stats_analyze_index");
1912 
1913 	/* stats_latch is created on 1st lock. */
1914 	ut_ad(!(index->table->stats_latch_created) ||
1915 		!rw_lock_own(index->table->stats_latch, RW_X_LATCH));
1916 
1917 	DBUG_PRINT("info", ("index: %s, online status: %d", index->name(),
1918 			    dict_index_get_online_status(index)));
1919 
1920 	/* Disable update statistic for Rtree */
1921 	if (dict_index_is_spatial(index)) {
1922 		DBUG_VOID_RETURN;
1923 	}
1924 
1925 	DEBUG_PRINTF("  %s(index=%s)\n", __func__, index->name());
1926 
1927 	dict_stats_empty_index(index);
1928 
1929 	mtr_start(&mtr);
1930 
1931 	mtr_s_lock(dict_index_get_lock(index), &mtr);
1932 
1933 	size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
1934 
1935 	if (size != ULINT_UNDEFINED) {
1936 		index->stat_index_size = size;
1937 		size = btr_get_size(index, BTR_N_LEAF_PAGES, &mtr);
1938 	}
1939 
1940 	/* Release the X locks on the root page taken by btr_get_size() */
1941 	mtr_commit(&mtr);
1942 
1943 	switch (size) {
1944 	case ULINT_UNDEFINED:
1945 		dict_stats_assert_initialized_index(index);
1946 		DBUG_VOID_RETURN;
1947 	case 0:
1948 		/* The root node of the tree is a leaf */
1949 		size = 1;
1950 	}
1951 
1952 	index->stat_n_leaf_pages = size;
1953 
1954 	mtr_start(&mtr);
1955 
1956 	mtr_sx_lock(dict_index_get_lock(index), &mtr);
1957 
1958 	root_level = btr_height_get(index, &mtr);
1959 
1960 	n_uniq = dict_index_get_n_unique(index);
1961 
1962 	/* If the tree has just one level (and one page) or if the user
1963 	has requested to sample too many pages then do full scan.
1964 
1965 	For each n-column prefix (for n=1..n_uniq) N_SAMPLE_PAGES(index)
1966 	will be sampled, so in total N_SAMPLE_PAGES(index) * n_uniq leaf
1967 	pages will be sampled. If that number is bigger than the total
1968 	number of leaf pages then do full scan of the leaf level instead
1969 	since it will be faster and will give better results. */
1970 
1971 	if (root_level == 0
1972 	    || N_SAMPLE_PAGES(index) * n_uniq > index->stat_n_leaf_pages) {
1973 
1974 		if (root_level == 0) {
1975 			DEBUG_PRINTF("  %s(): just one page,"
1976 				     " doing full scan\n", __func__);
1977 		} else {
1978 			DEBUG_PRINTF("  %s(): too many pages requested for"
1979 				     " sampling, doing full scan\n", __func__);
1980 		}
1981 
1982 		/* do full scan of level 0; save results directly
1983 		into the index */
1984 
1985 		dict_stats_analyze_index_level(index,
1986 					       0 /* leaf level */,
1987 					       index->stat_n_diff_key_vals,
1988 					       &total_recs,
1989 					       &total_pages,
1990 					       NULL /* boundaries not needed */,
1991 					       &mtr);
1992 
1993 		for (ulint i = 0; i < n_uniq; i++) {
1994 			index->stat_n_sample_sizes[i] = total_pages;
1995 		}
1996 
1997 		mtr_commit(&mtr);
1998 
1999 		dict_stats_assert_initialized_index(index);
2000 		DBUG_VOID_RETURN;
2001 	}
2002 
2003 	/* For each level that is being scanned in the btree, this contains the
2004 	number of different key values for all possible n-column prefixes. */
2005 	ib_uint64_t*	n_diff_on_level = UT_NEW_ARRAY(
2006 		ib_uint64_t, n_uniq, mem_key_dict_stats_n_diff_on_level);
2007 
2008 	/* For each level that is being scanned in the btree, this contains the
2009 	index of the last record from each group of equal records (when
2010 	comparing only the first n columns, n=1..n_uniq). */
2011 	boundaries_t*	n_diff_boundaries = UT_NEW_ARRAY_NOKEY(boundaries_t,
2012 							       n_uniq);
2013 
2014 	/* For each n-column prefix this array contains the input data that is
2015 	used to calculate dict_index_t::stat_n_diff_key_vals[]. */
2016 	n_diff_data_t*	n_diff_data = UT_NEW_ARRAY_NOKEY(n_diff_data_t, n_uniq);
2017 
2018 	/* total_recs is also used to estimate the number of pages on one
2019 	level below, so at the start we have 1 page (the root) */
2020 	total_recs = 1;
2021 
2022 	/* Here we use the following optimization:
2023 	If we find that level L is the first one (searching from the
2024 	root) that contains at least D distinct keys when looking at
2025 	the first n_prefix columns, then:
2026 	if we look at the first n_prefix-1 columns then the first
2027 	level that contains D distinct keys will be either L or a
2028 	lower one.
2029 	So if we find that the first level containing D distinct
2030 	keys (on n_prefix columns) is L, we continue from L when
2031 	searching for D distinct keys on n_prefix-1 columns. */
2032 	level = root_level;
2033 	level_is_analyzed = false;
2034 
2035 	for (n_prefix = n_uniq; n_prefix >= 1; n_prefix--) {
2036 
2037 		DEBUG_PRINTF("  %s(): searching level with >=%llu"
2038 			     " distinct records, n_prefix=%lu\n",
2039 			     __func__, N_DIFF_REQUIRED(index), n_prefix);
2040 
2041 		/* Commit the mtr to release the tree S lock to allow
2042 		other threads to do some work too. */
2043 		mtr_commit(&mtr);
2044 		mtr_start(&mtr);
2045 		mtr_sx_lock(dict_index_get_lock(index), &mtr);
2046 		if (root_level != btr_height_get(index, &mtr)) {
2047 			/* Just quit if the tree has changed beyond
2048 			recognition here. The old stats from previous
2049 			runs will remain in the values that we have
2050 			not calculated yet. Initially when the index
2051 			object is created the stats members are given
2052 			some sensible values so leaving them untouched
2053 			here even the first time will not cause us to
2054 			read uninitialized memory later. */
2055 			break;
2056 		}
2057 
2058 		/* check whether we should pick the current level;
2059 		we pick level 1 even if it does not have enough
2060 		distinct records because we do not want to scan the
2061 		leaf level because it may contain too many records */
2062 		if (level_is_analyzed
2063 		    && (n_diff_on_level[n_prefix - 1] >= N_DIFF_REQUIRED(index)
2064 			|| level == 1)) {
2065 
2066 			goto found_level;
2067 		}
2068 
2069 		/* search for a level that contains enough distinct records */
2070 
2071 		if (level_is_analyzed && level > 1) {
2072 
2073 			/* if this does not hold we should be on
2074 			"found_level" instead of here */
2075 			ut_ad(n_diff_on_level[n_prefix - 1]
2076 			      < N_DIFF_REQUIRED(index));
2077 
2078 			level--;
2079 			level_is_analyzed = false;
2080 		}
2081 
2082 		/* descend into the tree, searching for "good enough" level */
2083 		for (;;) {
2084 
2085 			/* make sure we do not scan the leaf level
2086 			accidentally, it may contain too many pages */
2087 			ut_ad(level > 0);
2088 
2089 			/* scanning the same level twice is an optimization
2090 			bug */
2091 			ut_ad(!level_is_analyzed);
2092 
2093 			/* Do not scan if this would read too many pages.
2094 			Here we use the following fact:
2095 			the number of pages on level L equals the number
2096 			of records on level L+1, thus we deduce that the
2097 			following call would scan total_recs pages, because
2098 			total_recs is left from the previous iteration when
2099 			we scanned one level upper or we have not scanned any
2100 			levels yet in which case total_recs is 1. */
2101 			if (total_recs > N_SAMPLE_PAGES(index)) {
2102 
2103 				/* if the above cond is true then we are
2104 				not at the root level since on the root
2105 				level total_recs == 1 (set before we
2106 				enter the n-prefix loop) and cannot
2107 				be > N_SAMPLE_PAGES(index) */
2108 				ut_a(level != root_level);
2109 
2110 				/* step one level back and be satisfied with
2111 				whatever it contains */
2112 				level++;
2113 				level_is_analyzed = true;
2114 
2115 				break;
2116 			}
2117 
2118 			dict_stats_analyze_index_level(index,
2119 						       level,
2120 						       n_diff_on_level,
2121 						       &total_recs,
2122 						       &total_pages,
2123 						       n_diff_boundaries,
2124 						       &mtr);
2125 
2126 			level_is_analyzed = true;
2127 
2128 			if (level == 1
2129 			    || n_diff_on_level[n_prefix - 1]
2130 			    >= N_DIFF_REQUIRED(index)) {
2131 				/* we have reached the last level we could scan
2132 				or we found a good level with many distinct
2133 				records */
2134 				break;
2135 			}
2136 
2137 			level--;
2138 			level_is_analyzed = false;
2139 		}
2140 found_level:
2141 
2142 		DEBUG_PRINTF("  %s(): found level %lu that has " UINT64PF
2143 			     " distinct records for n_prefix=%lu\n",
2144 			     __func__, level, n_diff_on_level[n_prefix - 1],
2145 			     n_prefix);
2146 		/* here we are either on level 1 or the level that we are on
2147 		contains >= N_DIFF_REQUIRED distinct keys or we did not scan
2148 		deeper levels because they would contain too many pages */
2149 
2150 		ut_ad(level > 0);
2151 
2152 		ut_ad(level_is_analyzed);
2153 
2154 		/* if any of these is 0 then there is exactly one page in the
2155 		B-tree and it is empty and we should have done full scan and
2156 		should not be here */
2157 		ut_ad(total_recs > 0);
2158 		ut_ad(n_diff_on_level[n_prefix - 1] > 0);
2159 
2160 		ut_ad(N_SAMPLE_PAGES(index) > 0);
2161 
2162 		n_diff_data_t*	data = &n_diff_data[n_prefix - 1];
2163 
2164 		data->level = level;
2165 
2166 		data->n_recs_on_level = total_recs;
2167 
2168 		data->n_diff_on_level = n_diff_on_level[n_prefix - 1];
2169 
2170 		data->n_leaf_pages_to_analyze = std::min(
2171 			N_SAMPLE_PAGES(index),
2172 			n_diff_on_level[n_prefix - 1]);
2173 
2174 		/* pick some records from this level and dive below them for
2175 		the given n_prefix */
2176 
2177 		dict_stats_analyze_index_for_n_prefix(
2178 			index, n_prefix, &n_diff_boundaries[n_prefix - 1],
2179 			data, &mtr);
2180 	}
2181 
2182 	mtr_commit(&mtr);
2183 
2184 	UT_DELETE_ARRAY(n_diff_boundaries);
2185 
2186 	UT_DELETE_ARRAY(n_diff_on_level);
2187 
2188 	/* n_prefix == 0 means that the above loop did not end up prematurely
2189 	due to tree being changed and so n_diff_data[] is set up. */
2190 	if (n_prefix == 0) {
2191 		dict_stats_index_set_n_diff(n_diff_data, index);
2192 	}
2193 
2194 	UT_DELETE_ARRAY(n_diff_data);
2195 
2196 	dict_stats_assert_initialized_index(index);
2197 	DBUG_VOID_RETURN;
2198 }
2199 
2200 /*********************************************************************//**
2201 Calculates new estimates for table and index statistics. This function
2202 is relatively slow and is used to calculate persistent statistics that
2203 will be saved on disk.
2204 @return DB_SUCCESS or error code */
2205 static
2206 dberr_t
dict_stats_update_persistent(dict_table_t * table)2207 dict_stats_update_persistent(
2208 /*=========================*/
2209 	dict_table_t*	table)		/*!< in/out: table */
2210 {
2211 	dict_index_t*	index;
2212 
2213 	DEBUG_PRINTF("%s(table=%s)\n", __func__, table->name);
2214 
2215 	dict_table_analyze_index_lock(table);
2216 
2217 	DEBUG_SYNC_C("innodb_dict_stats_update_persistent");
2218 
2219 	/* analyze the clustered index first */
2220 
2221 	index = dict_table_get_first_index(table);
2222 
2223 	if (index == NULL
2224 	    || dict_index_is_corrupted(index)
2225 	    || (index->type | DICT_UNIQUE) != (DICT_CLUSTERED | DICT_UNIQUE)) {
2226 
2227 		/* Table definition is corrupt */
2228 		dict_stats_empty_table(table);
2229 		dict_table_analyze_index_unlock(table);
2230 
2231 		return(DB_CORRUPTION);
2232 	}
2233 
2234 	ut_ad(!dict_index_is_ibuf(index));
2235 
2236 	dict_stats_analyze_index(index);
2237 
2238 	ulint	n_unique = dict_index_get_n_unique(index);
2239 
2240 	ib_uint64_t stat_n_rows_tmp = index->stat_n_diff_key_vals[n_unique - 1];
2241 
2242 	ib_uint64_t stat_clustered_index_size_tmp = index->stat_index_size;
2243 
2244 	/* analyze other indexes from the table, if any */
2245 
2246 	ib_uint64_t stat_sum_of_other_index_sizes_tmp = 0;
2247 
2248 	for (index = dict_table_get_next_index(index);
2249 	     index != NULL;
2250 	     index = dict_table_get_next_index(index)) {
2251 
2252 		ut_ad(!dict_index_is_ibuf(index));
2253 
2254 		if (index->type & DICT_FTS || dict_index_is_spatial(index)) {
2255 			continue;
2256 		}
2257 
2258 		dict_stats_empty_index(index);
2259 
2260 		if (dict_stats_should_ignore_index(index)) {
2261 			continue;
2262 		}
2263 
2264 		if (!(table->stats_bg_flag & BG_STAT_SHOULD_QUIT)) {
2265 			dict_stats_analyze_index(index);
2266 		}
2267 
2268 		stat_sum_of_other_index_sizes_tmp
2269 			+= index->stat_index_size;
2270 	}
2271 
2272 	dict_table_stats_lock(table, RW_X_LATCH);
2273 
2274 	table->stat_n_rows = stat_n_rows_tmp;
2275 
2276 	table->stat_clustered_index_size = stat_clustered_index_size_tmp;
2277 
2278 	table->stat_sum_of_other_index_sizes = stat_sum_of_other_index_sizes_tmp;
2279 
2280 	table->stats_last_recalc = ut_time_monotonic();
2281 
2282 	table->stat_modified_counter = 0;
2283 
2284 	table->stat_initialized = TRUE;
2285 
2286 	dict_stats_assert_initialized(table);
2287 
2288 	dict_table_stats_unlock(table, RW_X_LATCH);
2289 
2290 	dict_table_analyze_index_unlock(table);
2291 
2292 	return(DB_SUCCESS);
2293 }
2294 
2295 #include "mysql_com.h"
2296 /** Save an individual index's statistic into the persistent statistics
2297 storage.
2298 @param[in]	index			index to be updated
2299 @param[in]	last_update		timestamp of the stat
2300 @param[in]	stat_name		name of the stat
2301 @param[in]	stat_value		value of the stat
2302 @param[in]	sample_size		n pages sampled or NULL
2303 @param[in]	stat_description	description of the stat
2304 @param[in,out]	trx			in case of NULL the function will
2305 allocate and free the trx object. If it is not NULL then it will be
2306 rolled back only in the case of error, but not freed.
2307 @return DB_SUCCESS or error code */
2308 static
2309 dberr_t
dict_stats_save_index_stat(dict_index_t * index,lint last_update,const char * stat_name,ib_uint64_t stat_value,ib_uint64_t * sample_size,const char * stat_description,trx_t * trx)2310 dict_stats_save_index_stat(
2311 	dict_index_t*	index,
2312 	lint		last_update,
2313 	const char*	stat_name,
2314 	ib_uint64_t	stat_value,
2315 	ib_uint64_t*	sample_size,
2316 	const char*	stat_description,
2317 	trx_t*		trx)
2318 {
2319 	dberr_t		ret;
2320 	pars_info_t*	pinfo;
2321 	char		db_utf8[MAX_DB_UTF8_LEN];
2322 	char		table_utf8[MAX_TABLE_UTF8_LEN];
2323 
2324 	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
2325 	ut_ad(mutex_own(&dict_sys->mutex));
2326 
2327 	dict_fs2utf8(index->table->name.m_name, db_utf8, sizeof(db_utf8),
2328 		     table_utf8, sizeof(table_utf8));
2329 
2330 	pinfo = pars_info_create();
2331 	pars_info_add_str_literal(pinfo, "database_name", db_utf8);
2332 	pars_info_add_str_literal(pinfo, "table_name", table_utf8);
2333 	pars_info_add_str_literal(pinfo, "index_name", index->name);
2334 	UNIV_MEM_ASSERT_RW_ABORT(&last_update, 4);
2335 	pars_info_add_int4_literal(pinfo, "last_update", last_update);
2336 	UNIV_MEM_ASSERT_RW_ABORT(stat_name, strlen(stat_name));
2337 	pars_info_add_str_literal(pinfo, "stat_name", stat_name);
2338 	UNIV_MEM_ASSERT_RW_ABORT(&stat_value, 8);
2339 	pars_info_add_ull_literal(pinfo, "stat_value", stat_value);
2340 	if (sample_size != NULL) {
2341 		UNIV_MEM_ASSERT_RW_ABORT(sample_size, 8);
2342 		pars_info_add_ull_literal(pinfo, "sample_size", *sample_size);
2343 	} else {
2344 		pars_info_add_literal(pinfo, "sample_size", NULL,
2345 				      UNIV_SQL_NULL, DATA_FIXBINARY, 0);
2346 	}
2347 	UNIV_MEM_ASSERT_RW_ABORT(stat_description, strlen(stat_description));
2348 	pars_info_add_str_literal(pinfo, "stat_description",
2349 				  stat_description);
2350 
2351 	ret = dict_stats_exec_sql(
2352 		pinfo,
2353 		"PROCEDURE INDEX_STATS_SAVE () IS\n"
2354 		"BEGIN\n"
2355 
2356 		"DELETE FROM \"" INDEX_STATS_NAME "\"\n"
2357 		"WHERE\n"
2358 		"database_name = :database_name AND\n"
2359 		"table_name = :table_name AND\n"
2360 		"index_name = :index_name AND\n"
2361 		"stat_name = :stat_name;\n"
2362 
2363 		"INSERT INTO \"" INDEX_STATS_NAME "\"\n"
2364 		"VALUES\n"
2365 		"(\n"
2366 		":database_name,\n"
2367 		":table_name,\n"
2368 		":index_name,\n"
2369 		":last_update,\n"
2370 		":stat_name,\n"
2371 		":stat_value,\n"
2372 		":sample_size,\n"
2373 		":stat_description\n"
2374 		");\n"
2375 		"END;", trx);
2376 
2377 	if (ret != DB_SUCCESS) {
2378 		ib::error() << "Cannot save index statistics for table "
2379 			<< index->table->name
2380 			<< ", index " << index->name
2381 			<< ", stat name \"" << stat_name << "\": "
2382 			<< ut_strerr(ret);
2383 	}
2384 
2385 	return(ret);
2386 }
2387 
2388 /** Save the table's statistics into the persistent statistics storage.
2389 @param[in]	table_orig	table whose stats to save
2390 @param[in]	only_for_index	if this is non-NULL, then stats for indexes
2391 that are not equal to it will not be saved, if NULL, then all indexes' stats
2392 are saved
2393 @return DB_SUCCESS or error code */
2394 static
2395 dberr_t
dict_stats_save(dict_table_t * table_orig,const index_id_t * only_for_index)2396 dict_stats_save(
2397 	dict_table_t*		table_orig,
2398 	const index_id_t*	only_for_index)
2399 {
2400 	pars_info_t*	pinfo;
2401 	lint		now;
2402 	dberr_t		ret;
2403 	dict_table_t*	table;
2404 	char		db_utf8[MAX_DB_UTF8_LEN];
2405 	char		table_utf8[MAX_TABLE_UTF8_LEN];
2406 
2407 	table = dict_stats_snapshot_create(table_orig);
2408 
2409 	dict_fs2utf8(table->name.m_name, db_utf8, sizeof(db_utf8),
2410 		     table_utf8, sizeof(table_utf8));
2411 
2412 	rw_lock_x_lock(dict_operation_lock);
2413 	mutex_enter(&dict_sys->mutex);
2414 
2415 	/* MySQL's timestamp is 4 byte, so we use
2416 	pars_info_add_int4_literal() which takes a lint arg, so "now" is
2417 	lint */
2418 	now = (lint) ut_time();
2419 
2420 	pinfo = pars_info_create();
2421 
2422 	pars_info_add_str_literal(pinfo, "database_name", db_utf8);
2423 	pars_info_add_str_literal(pinfo, "table_name", table_utf8);
2424 	pars_info_add_int4_literal(pinfo, "last_update", now);
2425 	pars_info_add_ull_literal(pinfo, "n_rows", table->stat_n_rows);
2426 	pars_info_add_ull_literal(pinfo, "clustered_index_size",
2427 		table->stat_clustered_index_size);
2428 	pars_info_add_ull_literal(pinfo, "sum_of_other_index_sizes",
2429 		table->stat_sum_of_other_index_sizes);
2430 
2431 	ret = dict_stats_exec_sql(
2432 		pinfo,
2433 		"PROCEDURE TABLE_STATS_SAVE () IS\n"
2434 		"BEGIN\n"
2435 
2436 		"DELETE FROM \"" TABLE_STATS_NAME "\"\n"
2437 		"WHERE\n"
2438 		"database_name = :database_name AND\n"
2439 		"table_name = :table_name;\n"
2440 
2441 		"INSERT INTO \"" TABLE_STATS_NAME "\"\n"
2442 		"VALUES\n"
2443 		"(\n"
2444 		":database_name,\n"
2445 		":table_name,\n"
2446 		":last_update,\n"
2447 		":n_rows,\n"
2448 		":clustered_index_size,\n"
2449 		":sum_of_other_index_sizes\n"
2450 		");\n"
2451 		"END;", NULL);
2452 
2453 	if (ret != DB_SUCCESS) {
2454 		ib::error() << "Cannot save table statistics for table "
2455 			<< table->name << ": " << ut_strerr(ret);
2456 
2457 		mutex_exit(&dict_sys->mutex);
2458 		rw_lock_x_unlock(dict_operation_lock);
2459 
2460 		dict_stats_snapshot_free(table);
2461 
2462 		return(ret);
2463 	}
2464 
2465 	trx_t*	trx = trx_allocate_for_background();
2466 
2467 	if (srv_read_only_mode) {
2468 		trx_start_internal_read_only(trx);
2469 	} else {
2470 		trx_start_internal(trx);
2471 	}
2472 
2473 	dict_index_t*	index;
2474 	index_map_t	indexes(
2475 		(ut_strcmp_functor()),
2476 		index_map_t_allocator(mem_key_dict_stats_index_map_t));
2477 
2478 	/* Below we do all the modifications in innodb_index_stats in a single
2479 	transaction for performance reasons. Modifying more than one row in a
2480 	single transaction may deadlock with other transactions if they
2481 	lock the rows in different order. Other transaction could be for
2482 	example when we DROP a table and do
2483 	DELETE FROM innodb_index_stats WHERE database_name = '...'
2484 	AND table_name = '...'; which will affect more than one row. To
2485 	prevent deadlocks we always lock the rows in the same order - the
2486 	order of the PK, which is (database_name, table_name, index_name,
2487 	stat_name). This is why below we sort the indexes by name and then
2488 	for each index, do the mods ordered by stat_name. */
2489 
2490 	for (index = dict_table_get_first_index(table);
2491 	     index != NULL;
2492 	     index = dict_table_get_next_index(index)) {
2493 
2494 		indexes[index->name] = index;
2495 	}
2496 
2497 	index_map_t::const_iterator	it;
2498 
2499 	for (it = indexes.begin(); it != indexes.end(); ++it) {
2500 
2501 		index = it->second;
2502 
2503 		if (only_for_index != NULL && index->id != *only_for_index) {
2504 			continue;
2505 		}
2506 
2507 		if (dict_stats_should_ignore_index(index)) {
2508 			continue;
2509 		}
2510 
2511 		ut_ad(!dict_index_is_ibuf(index));
2512 
2513 		for (ulint i = 0; i < index->n_uniq; i++) {
2514 
2515 			char	stat_name[16];
2516 			char	stat_description[1024];
2517 			ulint	j;
2518 
2519 			ut_snprintf(stat_name, sizeof(stat_name),
2520 				    "n_diff_pfx%02lu", i + 1);
2521 
2522 			/* craft a string that contains the column names */
2523 			ut_snprintf(stat_description,
2524 				    sizeof(stat_description),
2525 				    "%s", index->fields[0].name());
2526 			for (j = 1; j <= i; j++) {
2527 				size_t	len;
2528 
2529 				len = strlen(stat_description);
2530 
2531 				ut_snprintf(stat_description + len,
2532 					    sizeof(stat_description) - len,
2533 					    ",%s", index->fields[j].name());
2534 			}
2535 
2536 			ret = dict_stats_save_index_stat(
2537 				index, now, stat_name,
2538 				index->stat_n_diff_key_vals[i],
2539 				&index->stat_n_sample_sizes[i],
2540 				stat_description, trx);
2541 
2542 			if (ret != DB_SUCCESS) {
2543 				goto end;
2544 			}
2545 		}
2546 
2547 		ret = dict_stats_save_index_stat(index, now, "n_leaf_pages",
2548 						 index->stat_n_leaf_pages,
2549 						 NULL,
2550 						 "Number of leaf pages "
2551 						 "in the index", trx);
2552 		if (ret != DB_SUCCESS) {
2553 			goto end;
2554 		}
2555 
2556 		ret = dict_stats_save_index_stat(index, now, "size",
2557 						 index->stat_index_size,
2558 						 NULL,
2559 						 "Number of pages "
2560 						 "in the index", trx);
2561 		if (ret != DB_SUCCESS) {
2562 			goto end;
2563 		}
2564 	}
2565 
2566 	trx_commit_for_mysql(trx);
2567 
2568 end:
2569 	trx_free_for_background(trx);
2570 
2571 	mutex_exit(&dict_sys->mutex);
2572 	rw_lock_x_unlock(dict_operation_lock);
2573 
2574 	dict_stats_snapshot_free(table);
2575 
2576 	return(ret);
2577 }
2578 
2579 /*********************************************************************//**
2580 Called for the row that is selected by
2581 SELECT ... FROM mysql.innodb_table_stats WHERE table='...'
2582 The second argument is a pointer to the table and the fetched stats are
2583 written to it.
2584 @return non-NULL dummy */
2585 static
2586 ibool
dict_stats_fetch_table_stats_step(void * node_void,void * table_void)2587 dict_stats_fetch_table_stats_step(
2588 /*==============================*/
2589 	void*	node_void,	/*!< in: select node */
2590 	void*	table_void)	/*!< out: table */
2591 {
2592 	sel_node_t*	node = (sel_node_t*) node_void;
2593 	dict_table_t*	table = (dict_table_t*) table_void;
2594 	que_common_t*	cnode;
2595 	int		i;
2596 
2597 	/* this should loop exactly 3 times - for
2598 	n_rows,clustered_index_size,sum_of_other_index_sizes */
2599 	for (cnode = static_cast<que_common_t*>(node->select_list), i = 0;
2600 	     cnode != NULL;
2601 	     cnode = static_cast<que_common_t*>(que_node_get_next(cnode)),
2602 	     i++) {
2603 
2604 		const byte*	data;
2605 		dfield_t*	dfield = que_node_get_val(cnode);
2606 		dtype_t*	type = dfield_get_type(dfield);
2607 		ulint		len = dfield_get_len(dfield);
2608 
2609 		data = static_cast<const byte*>(dfield_get_data(dfield));
2610 
2611 		switch (i) {
2612 		case 0: /* mysql.innodb_table_stats.n_rows */
2613 
2614 			ut_a(dtype_get_mtype(type) == DATA_INT);
2615 			ut_a(len == 8);
2616 
2617 			table->stat_n_rows = mach_read_from_8(data);
2618 
2619 			break;
2620 
2621 		case 1: /* mysql.innodb_table_stats.clustered_index_size */
2622 
2623 			ut_a(dtype_get_mtype(type) == DATA_INT);
2624 			ut_a(len == 8);
2625 
2626 			table->stat_clustered_index_size
2627 				= (ulint) mach_read_from_8(data);
2628 
2629 			break;
2630 
2631 		case 2: /* mysql.innodb_table_stats.sum_of_other_index_sizes */
2632 
2633 			ut_a(dtype_get_mtype(type) == DATA_INT);
2634 			ut_a(len == 8);
2635 
2636 			table->stat_sum_of_other_index_sizes
2637 				= (ulint) mach_read_from_8(data);
2638 
2639 			break;
2640 
2641 		default:
2642 
2643 			/* someone changed SELECT
2644 			n_rows,clustered_index_size,sum_of_other_index_sizes
2645 			to select more columns from innodb_table_stats without
2646 			adjusting here */
2647 			ut_error;
2648 		}
2649 	}
2650 
2651 	/* if i < 3 this means someone changed the
2652 	SELECT n_rows,clustered_index_size,sum_of_other_index_sizes
2653 	to select less columns from innodb_table_stats without adjusting here;
2654 	if i > 3 we would have ut_error'ed earlier */
2655 	ut_a(i == 3 /*n_rows,clustered_index_size,sum_of_other_index_sizes*/);
2656 
2657 	/* XXX this is not used but returning non-NULL is necessary */
2658 	return(TRUE);
2659 }
2660 
2661 /** Aux struct used to pass a table and a boolean to
2662 dict_stats_fetch_index_stats_step(). */
2663 struct index_fetch_t {
2664 	dict_table_t*	table;	/*!< table whose indexes are to be modified */
2665 	bool		stats_were_modified; /*!< will be set to true if at
2666 				least one index stats were modified */
2667 };
2668 
2669 /*********************************************************************//**
2670 Called for the rows that are selected by
2671 SELECT ... FROM mysql.innodb_index_stats WHERE table='...'
2672 The second argument is a pointer to the table and the fetched stats are
2673 written to its indexes.
2674 Let a table has N indexes and each index has Ui unique columns for i=1..N,
2675 then mysql.innodb_index_stats will have SUM(Ui) i=1..N rows for that table.
2676 So this function will be called SUM(Ui) times where SUM(Ui) is of magnitude
2677 N*AVG(Ui). In each call it searches for the currently fetched index into
2678 table->indexes linearly, assuming this list is not sorted. Thus, overall,
2679 fetching all indexes' stats from mysql.innodb_index_stats is O(N^2) where N
2680 is the number of indexes.
2681 This can be improved if we sort table->indexes in a temporary area just once
2682 and then search in that sorted list. Then the complexity will be O(N*log(N)).
2683 We assume a table will not have more than 100 indexes, so we go with the
2684 simpler N^2 algorithm.
2685 @return non-NULL dummy */
2686 static
2687 ibool
dict_stats_fetch_index_stats_step(void * node_void,void * arg_void)2688 dict_stats_fetch_index_stats_step(
2689 /*==============================*/
2690 	void*	node_void,	/*!< in: select node */
2691 	void*	arg_void)	/*!< out: table + a flag that tells if we
2692 				modified anything */
2693 {
2694 	sel_node_t*	node = (sel_node_t*) node_void;
2695 	index_fetch_t*	arg = (index_fetch_t*) arg_void;
2696 	dict_table_t*	table = arg->table;
2697 	dict_index_t*	index = NULL;
2698 	que_common_t*	cnode;
2699 	const char*	stat_name = NULL;
2700 	ulint		stat_name_len = ULINT_UNDEFINED;
2701 	ib_uint64_t	stat_value = UINT64_UNDEFINED;
2702 	ib_uint64_t	sample_size = UINT64_UNDEFINED;
2703 	int		i;
2704 
2705 	/* this should loop exactly 4 times - for the columns that
2706 	were selected: index_name,stat_name,stat_value,sample_size */
2707 	for (cnode = static_cast<que_common_t*>(node->select_list), i = 0;
2708 	     cnode != NULL;
2709 	     cnode = static_cast<que_common_t*>(que_node_get_next(cnode)),
2710 	     i++) {
2711 
2712 		const byte*	data;
2713 		dfield_t*	dfield = que_node_get_val(cnode);
2714 		dtype_t*	type = dfield_get_type(dfield);
2715 		ulint		len = dfield_get_len(dfield);
2716 
2717 		data = static_cast<const byte*>(dfield_get_data(dfield));
2718 
2719 		switch (i) {
2720 		case 0: /* mysql.innodb_index_stats.index_name */
2721 
2722 			ut_a(dtype_get_mtype(type) == DATA_VARMYSQL);
2723 
2724 			/* search for index in table's indexes whose name
2725 			matches data; the fetched index name is in data,
2726 			has no terminating '\0' and has length len */
2727 			for (index = dict_table_get_first_index(table);
2728 			     index != NULL;
2729 			     index = dict_table_get_next_index(index)) {
2730 
2731 				if (index->is_committed()
2732 				    && strlen(index->name) == len
2733 				    && memcmp(index->name, data, len) == 0) {
2734 					/* the corresponding index was found */
2735 					break;
2736 				}
2737 			}
2738 
2739 			/* if index is NULL here this means that
2740 			mysql.innodb_index_stats contains more rows than the
2741 			number of indexes in the table; this is ok, we just
2742 			return ignoring those extra rows; in other words
2743 			dict_stats_fetch_index_stats_step() has been called
2744 			for a row from index_stats with unknown index_name
2745 			column */
2746 			if (index == NULL) {
2747 
2748 				return(TRUE);
2749 			}
2750 
2751 			break;
2752 
2753 		case 1: /* mysql.innodb_index_stats.stat_name */
2754 
2755 			ut_a(dtype_get_mtype(type) == DATA_VARMYSQL);
2756 
2757 			ut_a(index != NULL);
2758 
2759 			stat_name = (const char*) data;
2760 			stat_name_len = len;
2761 
2762 			break;
2763 
2764 		case 2: /* mysql.innodb_index_stats.stat_value */
2765 
2766 			ut_a(dtype_get_mtype(type) == DATA_INT);
2767 			ut_a(len == 8);
2768 
2769 			ut_a(index != NULL);
2770 			ut_a(stat_name != NULL);
2771 			ut_a(stat_name_len != ULINT_UNDEFINED);
2772 
2773 			stat_value = mach_read_from_8(data);
2774 
2775 			break;
2776 
2777 		case 3: /* mysql.innodb_index_stats.sample_size */
2778 
2779 			ut_a(dtype_get_mtype(type) == DATA_INT);
2780 			ut_a(len == 8 || len == UNIV_SQL_NULL);
2781 
2782 			ut_a(index != NULL);
2783 			ut_a(stat_name != NULL);
2784 			ut_a(stat_name_len != ULINT_UNDEFINED);
2785 			ut_a(stat_value != UINT64_UNDEFINED);
2786 
2787 			if (len == UNIV_SQL_NULL) {
2788 				break;
2789 			}
2790 			/* else */
2791 
2792 			sample_size = mach_read_from_8(data);
2793 
2794 			break;
2795 
2796 		default:
2797 
2798 			/* someone changed
2799 			SELECT index_name,stat_name,stat_value,sample_size
2800 			to select more columns from innodb_index_stats without
2801 			adjusting here */
2802 			ut_error;
2803 		}
2804 	}
2805 
2806 	/* if i < 4 this means someone changed the
2807 	SELECT index_name,stat_name,stat_value,sample_size
2808 	to select less columns from innodb_index_stats without adjusting here;
2809 	if i > 4 we would have ut_error'ed earlier */
2810 	ut_a(i == 4 /* index_name,stat_name,stat_value,sample_size */);
2811 
2812 	ut_a(index != NULL);
2813 	ut_a(stat_name != NULL);
2814 	ut_a(stat_name_len != ULINT_UNDEFINED);
2815 	ut_a(stat_value != UINT64_UNDEFINED);
2816 	/* sample_size could be UINT64_UNDEFINED here, if it is NULL */
2817 
2818 #define PFX	"n_diff_pfx"
2819 #define PFX_LEN	10
2820 
2821 	if (stat_name_len == 4 /* strlen("size") */
2822 	    && native_strncasecmp("size", stat_name, stat_name_len) == 0) {
2823 		index->stat_index_size = (ulint) stat_value;
2824 		arg->stats_were_modified = true;
2825 	} else if (stat_name_len == 12 /* strlen("n_leaf_pages") */
2826 		   && native_strncasecmp("n_leaf_pages", stat_name, stat_name_len)
2827 		   == 0) {
2828 		index->stat_n_leaf_pages = (ulint) stat_value;
2829 		arg->stats_were_modified = true;
2830 	} else if (stat_name_len > PFX_LEN /* e.g. stat_name=="n_diff_pfx01" */
2831 		   && native_strncasecmp(PFX, stat_name, PFX_LEN) == 0) {
2832 
2833 		const char*	num_ptr;
2834 		unsigned long	n_pfx;
2835 
2836 		/* point num_ptr into "1" from "n_diff_pfx12..." */
2837 		num_ptr = stat_name + PFX_LEN;
2838 
2839 		/* stat_name should have exactly 2 chars appended to PFX
2840 		and they should be digits */
2841 		if (stat_name_len != PFX_LEN + 2
2842 		    || num_ptr[0] < '0' || num_ptr[0] > '9'
2843 		    || num_ptr[1] < '0' || num_ptr[1] > '9') {
2844 
2845 			char	db_utf8[MAX_DB_UTF8_LEN];
2846 			char	table_utf8[MAX_TABLE_UTF8_LEN];
2847 
2848 			dict_fs2utf8(table->name.m_name,
2849 				     db_utf8, sizeof(db_utf8),
2850 				     table_utf8, sizeof(table_utf8));
2851 
2852 			ib::info	out;
2853 			out << "Ignoring strange row from "
2854 				<< INDEX_STATS_NAME_PRINT << " WHERE"
2855 				" database_name = '" << db_utf8
2856 				<< "' AND table_name = '" << table_utf8
2857 				<< "' AND index_name = '" << index->name()
2858 				<< "' AND stat_name = '";
2859 			out.write(stat_name, stat_name_len);
2860 			out << "'; because stat_name is malformed";
2861 			return(TRUE);
2862 		}
2863 		/* else */
2864 
2865 		/* extract 12 from "n_diff_pfx12..." into n_pfx
2866 		note that stat_name does not have a terminating '\0' */
2867 		n_pfx = (num_ptr[0] - '0') * 10 + (num_ptr[1] - '0');
2868 
2869 		ulint	n_uniq = index->n_uniq;
2870 
2871 		if (n_pfx == 0 || n_pfx > n_uniq) {
2872 
2873 			char	db_utf8[MAX_DB_UTF8_LEN];
2874 			char	table_utf8[MAX_TABLE_UTF8_LEN];
2875 
2876 			dict_fs2utf8(table->name.m_name,
2877 				     db_utf8, sizeof(db_utf8),
2878 				     table_utf8, sizeof(table_utf8));
2879 
2880 			ib::info	out;
2881 			out << "Ignoring strange row from "
2882 				<< INDEX_STATS_NAME_PRINT << " WHERE"
2883 				" database_name = '" << db_utf8
2884 				<< "' AND table_name = '" << table_utf8
2885 				<< "' AND index_name = '" << index->name()
2886 				<< "' AND stat_name = '";
2887 			out.write(stat_name, stat_name_len);
2888 			out << "'; because stat_name is out of range, the index"
2889 				" has " << n_uniq << " unique columns";
2890 
2891 			return(TRUE);
2892 		}
2893 		/* else */
2894 
2895 		index->stat_n_diff_key_vals[n_pfx - 1] = stat_value;
2896 
2897 		if (sample_size != UINT64_UNDEFINED) {
2898 			index->stat_n_sample_sizes[n_pfx - 1] = sample_size;
2899 		} else {
2900 			/* hmm, strange... the user must have UPDATEd the
2901 			table manually and SET sample_size = NULL */
2902 			index->stat_n_sample_sizes[n_pfx - 1] = 0;
2903 		}
2904 
2905 		index->stat_n_non_null_key_vals[n_pfx - 1] = 0;
2906 
2907 		arg->stats_were_modified = true;
2908 	} else {
2909 		/* silently ignore rows with unknown stat_name, the
2910 		user may have developed her own stats */
2911 	}
2912 
2913 	/* XXX this is not used but returning non-NULL is necessary */
2914 	return(TRUE);
2915 }
2916 
2917 /*********************************************************************//**
2918 Read table's statistics from the persistent statistics storage.
2919 @return DB_SUCCESS or error code */
2920 static
2921 dberr_t
dict_stats_fetch_from_ps(dict_table_t * table)2922 dict_stats_fetch_from_ps(
2923 /*=====================*/
2924 	dict_table_t*	table)	/*!< in/out: table */
2925 {
2926 	index_fetch_t	index_fetch_arg;
2927 	trx_t*		trx;
2928 	pars_info_t*	pinfo;
2929 	dberr_t		ret;
2930 	char		db_utf8[MAX_DB_UTF8_LEN];
2931 	char		table_utf8[MAX_TABLE_UTF8_LEN];
2932 
2933 	ut_ad(!mutex_own(&dict_sys->mutex));
2934 
2935 	/* Initialize all stats to dummy values before fetching because if
2936 	the persistent storage contains incomplete stats (e.g. missing stats
2937 	for some index) then we would end up with (partially) uninitialized
2938 	stats. */
2939 	dict_stats_empty_table(table);
2940 
2941 	trx = trx_allocate_for_background();
2942 
2943 	/* Use 'read-uncommitted' so that the SELECTs we execute
2944 	do not get blocked in case some user has locked the rows we
2945 	are SELECTing */
2946 
2947 	trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
2948 
2949 	if (srv_read_only_mode) {
2950 		trx_start_internal_read_only(trx);
2951 	} else {
2952 		trx_start_internal(trx);
2953 	}
2954 
2955 	dict_fs2utf8(table->name.m_name, db_utf8, sizeof(db_utf8),
2956 		     table_utf8, sizeof(table_utf8));
2957 
2958 	pinfo = pars_info_create();
2959 
2960 	pars_info_add_str_literal(pinfo, "database_name", db_utf8);
2961 
2962 	pars_info_add_str_literal(pinfo, "table_name", table_utf8);
2963 
2964 	pars_info_bind_function(pinfo,
2965 			       "fetch_table_stats_step",
2966 			       dict_stats_fetch_table_stats_step,
2967 			       table);
2968 
2969 	index_fetch_arg.table = table;
2970 	index_fetch_arg.stats_were_modified = false;
2971 	pars_info_bind_function(pinfo,
2972 			        "fetch_index_stats_step",
2973 			        dict_stats_fetch_index_stats_step,
2974 			        &index_fetch_arg);
2975 
2976 	ret = que_eval_sql(pinfo,
2977 			   "PROCEDURE FETCH_STATS () IS\n"
2978 			   "found INT;\n"
2979 			   "DECLARE FUNCTION fetch_table_stats_step;\n"
2980 			   "DECLARE FUNCTION fetch_index_stats_step;\n"
2981 			   "DECLARE CURSOR table_stats_cur IS\n"
2982 			   "  SELECT\n"
2983 			   /* if you change the selected fields, be
2984 			   sure to adjust
2985 			   dict_stats_fetch_table_stats_step() */
2986 			   "  n_rows,\n"
2987 			   "  clustered_index_size,\n"
2988 			   "  sum_of_other_index_sizes\n"
2989 			   "  FROM \"" TABLE_STATS_NAME "\"\n"
2990 			   "  WHERE\n"
2991 			   "  database_name = :database_name AND\n"
2992 			   "  table_name = :table_name;\n"
2993 			   "DECLARE CURSOR index_stats_cur IS\n"
2994 			   "  SELECT\n"
2995 			   /* if you change the selected fields, be
2996 			   sure to adjust
2997 			   dict_stats_fetch_index_stats_step() */
2998 			   "  index_name,\n"
2999 			   "  stat_name,\n"
3000 			   "  stat_value,\n"
3001 			   "  sample_size\n"
3002 			   "  FROM \"" INDEX_STATS_NAME "\"\n"
3003 			   "  WHERE\n"
3004 			   "  database_name = :database_name AND\n"
3005 			   "  table_name = :table_name;\n"
3006 
3007 			   "BEGIN\n"
3008 
3009 			   "OPEN table_stats_cur;\n"
3010 			   "FETCH table_stats_cur INTO\n"
3011 			   "  fetch_table_stats_step();\n"
3012 			   "IF (SQL % NOTFOUND) THEN\n"
3013 			   "  CLOSE table_stats_cur;\n"
3014 			   "  RETURN;\n"
3015 			   "END IF;\n"
3016 			   "CLOSE table_stats_cur;\n"
3017 
3018 			   "OPEN index_stats_cur;\n"
3019 			   "found := 1;\n"
3020 			   "WHILE found = 1 LOOP\n"
3021 			   "  FETCH index_stats_cur INTO\n"
3022 			   "    fetch_index_stats_step();\n"
3023 			   "  IF (SQL % NOTFOUND) THEN\n"
3024 			   "    found := 0;\n"
3025 			   "  END IF;\n"
3026 			   "END LOOP;\n"
3027 			   "CLOSE index_stats_cur;\n"
3028 
3029 			   "END;",
3030 			   TRUE, trx);
3031 	/* pinfo is freed by que_eval_sql() */
3032 
3033 	trx_commit_for_mysql(trx);
3034 
3035 	trx_free_for_background(trx);
3036 
3037 	if (!index_fetch_arg.stats_were_modified) {
3038 		return(DB_STATS_DO_NOT_EXIST);
3039 	}
3040 
3041 	return(ret);
3042 }
3043 
3044 /*********************************************************************//**
3045 Fetches or calculates new estimates for index statistics. */
3046 void
dict_stats_update_for_index(dict_index_t * index)3047 dict_stats_update_for_index(
3048 /*========================*/
3049 	dict_index_t*	index)	/*!< in/out: index */
3050 {
3051 	DBUG_ENTER("dict_stats_update_for_index");
3052 
3053 	ut_ad(!mutex_own(&dict_sys->mutex));
3054 
3055 	if (dict_stats_is_persistent_enabled(index->table)) {
3056 
3057 		if (dict_stats_persistent_storage_check(false)) {
3058 			dict_table_analyze_index_lock(index->table);
3059 			dict_stats_analyze_index(index);
3060 			ulint stat_sum_of_other_index_sizes_tmp = index->stat_index_size;
3061 			dict_table_stats_lock(index->table, RW_X_LATCH);
3062 			index->table->stat_sum_of_other_index_sizes += stat_sum_of_other_index_sizes_tmp;
3063 			dict_table_stats_unlock(index->table, RW_X_LATCH);
3064 			dict_table_analyze_index_unlock(index->table);
3065 			dict_stats_save(index->table, &index->id);
3066 			DBUG_VOID_RETURN;
3067 		}
3068 		/* else */
3069 
3070 		/* Fall back to transient stats since the persistent
3071 		storage is not present or is corrupted */
3072 
3073 		ib::info() << "Recalculation of persistent statistics"
3074 			" requested for table " << index->table->name
3075 			<< " index " << index->name
3076 			<< " but the required"
3077 			" persistent statistics storage is not present or is"
3078 			" corrupted. Using transient stats instead.";
3079 	}
3080 
3081 	dict_table_stats_lock(index->table, RW_X_LATCH);
3082 	dict_stats_update_transient_for_index(index);
3083 	dict_table_stats_unlock(index->table, RW_X_LATCH);
3084 
3085 	DBUG_VOID_RETURN;
3086 }
3087 
3088 /*********************************************************************//**
3089 Calculates new estimates for table and index statistics. The statistics
3090 are used in query optimization.
3091 @return DB_SUCCESS or error code */
3092 dberr_t
dict_stats_update(dict_table_t * table,dict_stats_upd_option_t stats_upd_option)3093 dict_stats_update(
3094 /*==============*/
3095 	dict_table_t*		table,	/*!< in/out: table */
3096 	dict_stats_upd_option_t	stats_upd_option)
3097 					/*!< in: whether to (re) calc
3098 					the stats or to fetch them from
3099 					the persistent statistics
3100 					storage */
3101 {
3102 	ut_ad(!mutex_own(&dict_sys->mutex));
3103 
3104 	if (table->ibd_file_missing) {
3105 
3106 		ib::warn() << "Cannot calculate statistics for table "
3107 			<< table->name
3108 			<< " because the .ibd file is missing. "
3109 			<< TROUBLESHOOTING_MSG;
3110 
3111 		dict_stats_empty_table(table);
3112 		return(DB_TABLESPACE_DELETED);
3113 	} else if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
3114 		/* If we have set a high innodb_force_recovery level, do
3115 		not calculate statistics, as a badly corrupted index can
3116 		cause a crash in it. */
3117 		dict_stats_empty_table(table);
3118 		return(DB_SUCCESS);
3119 	}
3120 
3121 	switch (stats_upd_option) {
3122 	case DICT_STATS_RECALC_PERSISTENT:
3123 
3124 		if (srv_read_only_mode) {
3125 			goto transient;
3126 		}
3127 
3128 		/* wakes the last purge batch for exact recalculation */
3129 		if (trx_sys->rseg_history_len > 0) {
3130 			srv_wake_purge_thread_if_not_active();
3131 		}
3132 
3133 		/* Persistent recalculation requested, called from
3134 		1) ANALYZE TABLE, or
3135 		2) the auto recalculation background thread, or
3136 		3) open table if stats do not exist on disk and auto recalc
3137 		   is enabled */
3138 
3139 		/* InnoDB internal tables (e.g. SYS_TABLES) cannot have
3140 		persistent stats enabled */
3141 		ut_a(strchr(table->name.m_name, '/') != NULL);
3142 
3143 		/* check if the persistent statistics storage exists
3144 		before calling the potentially slow function
3145 		dict_stats_update_persistent(); that is a
3146 		prerequisite for dict_stats_save() succeeding */
3147 		if (dict_stats_persistent_storage_check(false)) {
3148 
3149 			dberr_t	err;
3150 
3151 			err = dict_stats_update_persistent(table);
3152 
3153 			if (err != DB_SUCCESS) {
3154 				return(err);
3155 			}
3156 
3157 			err = dict_stats_save(table, NULL);
3158 
3159 			return(err);
3160 		}
3161 
3162 		/* Fall back to transient stats since the persistent
3163 		storage is not present or is corrupted */
3164 
3165 		ib::warn() << "Recalculation of persistent statistics"
3166 			" requested for table "
3167 			<< table->name
3168 			<< " but the required persistent"
3169 			" statistics storage is not present or is corrupted."
3170 			" Using transient stats instead.";
3171 
3172 		goto transient;
3173 
3174 	case DICT_STATS_RECALC_TRANSIENT:
3175 
3176 		goto transient;
3177 
3178 	case DICT_STATS_EMPTY_TABLE:
3179 
3180 		dict_stats_empty_table(table);
3181 
3182 		/* If table is using persistent stats,
3183 		then save the stats on disk */
3184 
3185 		if (dict_stats_is_persistent_enabled(table)) {
3186 
3187 			if (dict_stats_persistent_storage_check(false)) {
3188 
3189 				return(dict_stats_save(table, NULL));
3190 			}
3191 
3192 			return(DB_STATS_DO_NOT_EXIST);
3193 		}
3194 
3195 		return(DB_SUCCESS);
3196 
3197 	case DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY:
3198 
3199 		/* fetch requested, either fetch from persistent statistics
3200 		storage or use the old method */
3201 
3202 		if (table->stat_initialized) {
3203 			return(DB_SUCCESS);
3204 		}
3205 
3206 		/* InnoDB internal tables (e.g. SYS_TABLES) cannot have
3207 		persistent stats enabled */
3208 		ut_a(strchr(table->name.m_name, '/') != NULL);
3209 
3210 		if (!dict_stats_persistent_storage_check(false)) {
3211 			/* persistent statistics storage does not exist
3212 			or is corrupted, calculate the transient stats */
3213 
3214 			ib::error() << "Fetch of persistent statistics"
3215 				" requested for table "
3216 				<< table->name
3217 				<< " but the required system tables "
3218 				<< TABLE_STATS_NAME_PRINT
3219 				<< " and " << INDEX_STATS_NAME_PRINT
3220 				<< " are not present or have unexpected"
3221 				" structure. Using transient stats instead.";
3222 
3223 			goto transient;
3224 		}
3225 
3226 		dict_table_t*	t;
3227 
3228 		/* Create a dummy table object with the same name and
3229 		indexes, suitable for fetching the stats into it. */
3230 		t = dict_stats_table_clone_create(table);
3231 
3232 		dberr_t	err = dict_stats_fetch_from_ps(t);
3233 
3234 		t->stats_last_recalc = table->stats_last_recalc;
3235 		t->stat_modified_counter = 0;
3236 
3237 		switch (err) {
3238 		case DB_SUCCESS:
3239 
3240 			dict_table_stats_lock(table, RW_X_LATCH);
3241 
3242 			dict_stats_copy(table, t);
3243 
3244 			dict_stats_assert_initialized(table);
3245 
3246 			dict_table_stats_unlock(table, RW_X_LATCH);
3247 
3248 			dict_stats_table_clone_free(t);
3249 
3250 			return(DB_SUCCESS);
3251 		case DB_STATS_DO_NOT_EXIST:
3252 
3253 			dict_stats_table_clone_free(t);
3254 
3255 			if (srv_read_only_mode) {
3256 				goto transient;
3257 			}
3258 
3259 			if (dict_stats_auto_recalc_is_enabled(table)) {
3260 				return(dict_stats_update(
3261 						table,
3262 						DICT_STATS_RECALC_PERSISTENT));
3263 			}
3264 
3265 			ib::info() << "Trying to use table " << table->name
3266 				<< " which has persistent statistics enabled,"
3267 				" but auto recalculation turned off and the"
3268 				" statistics do not exist in "
3269 				TABLE_STATS_NAME_PRINT
3270 				" and " INDEX_STATS_NAME_PRINT
3271 				". Please either run \"ANALYZE TABLE "
3272 				<< table->name << ";\" manually or enable the"
3273 				" auto recalculation with \"ALTER TABLE "
3274 				<< table->name << " STATS_AUTO_RECALC=1;\"."
3275 				" InnoDB will now use transient statistics for "
3276 				<< table->name << ".";
3277 
3278 			goto transient;
3279 		default:
3280 
3281 			dict_stats_table_clone_free(t);
3282 
3283 			ib::error() << "Error fetching persistent statistics"
3284 				" for table "
3285 				<< table->name
3286 				<< " from " TABLE_STATS_NAME_PRINT " and "
3287 				INDEX_STATS_NAME_PRINT ": " << ut_strerr(err)
3288 				<< ". Using transient stats method instead.";
3289 
3290 			goto transient;
3291 		}
3292 	/* no "default:" in order to produce a compilation warning
3293 	about unhandled enumeration value */
3294 	}
3295 
3296 transient:
3297 
3298 	dict_stats_update_transient(table);
3299 
3300 	return(DB_SUCCESS);
3301 }
3302 
3303 /*********************************************************************//**
3304 Removes the information for a particular index's stats from the persistent
3305 storage if it exists and if there is data stored for this index.
3306 This function creates its own trx and commits it.
3307 A note from Marko why we cannot edit user and sys_* tables in one trx:
3308 marko: The problem is that ibuf merges should be disabled while we are
3309 rolling back dict transactions.
3310 marko: If ibuf merges are not disabled, we need to scan the *.ibd files.
3311 But we shouldn't open *.ibd files before we have rolled back dict
3312 transactions and opened the SYS_* records for the *.ibd files.
3313 @return DB_SUCCESS or error code */
3314 dberr_t
dict_stats_drop_index(const char * db_and_table,const char * iname,char * errstr,ulint errstr_sz)3315 dict_stats_drop_index(
3316 /*==================*/
3317 	const char*	db_and_table,/*!< in: db and table, e.g. 'db/table' */
3318 	const char*	iname,	/*!< in: index name */
3319 	char*		errstr, /*!< out: error message if != DB_SUCCESS
3320 				is returned */
3321 	ulint		errstr_sz)/*!< in: size of the errstr buffer */
3322 {
3323 	char		db_utf8[MAX_DB_UTF8_LEN];
3324 	char		table_utf8[MAX_TABLE_UTF8_LEN];
3325 	pars_info_t*	pinfo;
3326 	dberr_t		ret;
3327 
3328 	ut_ad(!mutex_own(&dict_sys->mutex));
3329 
3330 	/* skip indexes whose table names do not contain a database name
3331 	e.g. if we are dropping an index from SYS_TABLES */
3332 	if (strchr(db_and_table, '/') == NULL) {
3333 
3334 		return(DB_SUCCESS);
3335 	}
3336 
3337 	dict_fs2utf8(db_and_table, db_utf8, sizeof(db_utf8),
3338 		     table_utf8, sizeof(table_utf8));
3339 
3340 	pinfo = pars_info_create();
3341 
3342 	pars_info_add_str_literal(pinfo, "database_name", db_utf8);
3343 
3344 	pars_info_add_str_literal(pinfo, "table_name", table_utf8);
3345 
3346 	pars_info_add_str_literal(pinfo, "index_name", iname);
3347 
3348 	rw_lock_x_lock(dict_operation_lock);
3349 	mutex_enter(&dict_sys->mutex);
3350 
3351 	ret = dict_stats_exec_sql(
3352 		pinfo,
3353 		"PROCEDURE DROP_INDEX_STATS () IS\n"
3354 		"BEGIN\n"
3355 		"DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
3356 		"database_name = :database_name AND\n"
3357 		"table_name = :table_name AND\n"
3358 		"index_name = :index_name;\n"
3359 		"END;\n", NULL);
3360 
3361 	mutex_exit(&dict_sys->mutex);
3362 	rw_lock_x_unlock(dict_operation_lock);
3363 
3364 	if (ret == DB_STATS_DO_NOT_EXIST) {
3365 		ret = DB_SUCCESS;
3366 	}
3367 
3368 	if (ret != DB_SUCCESS) {
3369 		ut_snprintf(errstr, errstr_sz,
3370 			    "Unable to delete statistics for index %s"
3371 			    " from %s%s: %s. They can be deleted later using"
3372 			    " DELETE FROM %s WHERE"
3373 			    " database_name = '%s' AND"
3374 			    " table_name = '%s' AND"
3375 			    " index_name = '%s';",
3376 			    iname,
3377 			    INDEX_STATS_NAME_PRINT,
3378 			    (ret == DB_LOCK_WAIT_TIMEOUT
3379 			     ? " because the rows are locked"
3380 			     : ""),
3381 			    ut_strerr(ret),
3382 			    INDEX_STATS_NAME_PRINT,
3383 			    db_utf8,
3384 			    table_utf8,
3385 			    iname);
3386 
3387 		ut_print_timestamp(stderr);
3388 		fprintf(stderr, " InnoDB: %s\n", errstr);
3389 	}
3390 
3391 	return(ret);
3392 }
3393 
3394 /*********************************************************************//**
3395 Executes
3396 DELETE FROM mysql.innodb_table_stats
3397 WHERE database_name = '...' AND table_name = '...';
3398 Creates its own transaction and commits it.
3399 @return DB_SUCCESS or error code */
3400 UNIV_INLINE
3401 dberr_t
dict_stats_delete_from_table_stats(const char * database_name,const char * table_name)3402 dict_stats_delete_from_table_stats(
3403 /*===============================*/
3404 	const char*	database_name,	/*!< in: database name, e.g. 'db' */
3405 	const char*	table_name)	/*!< in: table name, e.g. 'table' */
3406 {
3407 	pars_info_t*	pinfo;
3408 	dberr_t		ret;
3409 
3410 	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
3411 	ut_ad(mutex_own(&dict_sys->mutex));
3412 
3413 	pinfo = pars_info_create();
3414 
3415 	pars_info_add_str_literal(pinfo, "database_name", database_name);
3416 	pars_info_add_str_literal(pinfo, "table_name", table_name);
3417 
3418 	ret = dict_stats_exec_sql(
3419 		pinfo,
3420 		"PROCEDURE DELETE_FROM_TABLE_STATS () IS\n"
3421 		"BEGIN\n"
3422 		"DELETE FROM \"" TABLE_STATS_NAME "\" WHERE\n"
3423 		"database_name = :database_name AND\n"
3424 		"table_name = :table_name;\n"
3425 		"END;\n", NULL);
3426 
3427 	return(ret);
3428 }
3429 
3430 /*********************************************************************//**
3431 Executes
3432 DELETE FROM mysql.innodb_index_stats
3433 WHERE database_name = '...' AND table_name = '...';
3434 Creates its own transaction and commits it.
3435 @return DB_SUCCESS or error code */
3436 UNIV_INLINE
3437 dberr_t
dict_stats_delete_from_index_stats(const char * database_name,const char * table_name)3438 dict_stats_delete_from_index_stats(
3439 /*===============================*/
3440 	const char*	database_name,	/*!< in: database name, e.g. 'db' */
3441 	const char*	table_name)	/*!< in: table name, e.g. 'table' */
3442 {
3443 	pars_info_t*	pinfo;
3444 	dberr_t		ret;
3445 
3446 	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
3447 	ut_ad(mutex_own(&dict_sys->mutex));
3448 
3449 	pinfo = pars_info_create();
3450 
3451 	pars_info_add_str_literal(pinfo, "database_name", database_name);
3452 	pars_info_add_str_literal(pinfo, "table_name", table_name);
3453 
3454 	ret = dict_stats_exec_sql(
3455 		pinfo,
3456 		"PROCEDURE DELETE_FROM_INDEX_STATS () IS\n"
3457 		"BEGIN\n"
3458 		"DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
3459 		"database_name = :database_name AND\n"
3460 		"table_name = :table_name;\n"
3461 		"END;\n", NULL);
3462 
3463 	return(ret);
3464 }
3465 
3466 /*********************************************************************//**
3467 Removes the statistics for a table and all of its indexes from the
3468 persistent statistics storage if it exists and if there is data stored for
3469 the table. This function creates its own transaction and commits it.
3470 @return DB_SUCCESS or error code */
3471 dberr_t
dict_stats_drop_table(const char * db_and_table,char * errstr,ulint errstr_sz)3472 dict_stats_drop_table(
3473 /*==================*/
3474 	const char*	db_and_table,	/*!< in: db and table, e.g. 'db/table' */
3475 	char*		errstr,		/*!< out: error message
3476 					if != DB_SUCCESS is returned */
3477 	ulint		errstr_sz)	/*!< in: size of errstr buffer */
3478 {
3479 	char		db_utf8[MAX_DB_UTF8_LEN];
3480 	char		table_utf8[MAX_TABLE_UTF8_LEN];
3481 	dberr_t		ret;
3482 
3483 	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
3484 	ut_ad(mutex_own(&dict_sys->mutex));
3485 
3486 	/* skip tables that do not contain a database name
3487 	e.g. if we are dropping SYS_TABLES */
3488 	if (strchr(db_and_table, '/') == NULL) {
3489 
3490 		return(DB_SUCCESS);
3491 	}
3492 
3493 	/* skip innodb_table_stats and innodb_index_stats themselves */
3494 	if (strcmp(db_and_table, TABLE_STATS_NAME) == 0
3495 	    || strcmp(db_and_table, INDEX_STATS_NAME) == 0) {
3496 
3497 		return(DB_SUCCESS);
3498 	}
3499 
3500 	dict_fs2utf8(db_and_table, db_utf8, sizeof(db_utf8),
3501 		     table_utf8, sizeof(table_utf8));
3502 
3503 	ret = dict_stats_delete_from_table_stats(db_utf8, table_utf8);
3504 
3505 	if (ret == DB_SUCCESS) {
3506 		ret = dict_stats_delete_from_index_stats(db_utf8, table_utf8);
3507 	}
3508 
3509 	if (ret == DB_STATS_DO_NOT_EXIST) {
3510 		ret = DB_SUCCESS;
3511 	}
3512 
3513 	if (ret != DB_SUCCESS) {
3514 
3515 		ut_snprintf(errstr, errstr_sz,
3516 			    "Unable to delete statistics for table %s.%s: %s."
3517 			    " They can be deleted later using"
3518 
3519 			    " DELETE FROM %s WHERE"
3520 			    " database_name = '%s' AND"
3521 			    " table_name = '%s';"
3522 
3523 			    " DELETE FROM %s WHERE"
3524 			    " database_name = '%s' AND"
3525 			    " table_name = '%s';",
3526 
3527 			    db_utf8, table_utf8,
3528 			    ut_strerr(ret),
3529 
3530 			    INDEX_STATS_NAME_PRINT,
3531 			    db_utf8, table_utf8,
3532 
3533 			    TABLE_STATS_NAME_PRINT,
3534 			    db_utf8, table_utf8);
3535 	}
3536 
3537 	return(ret);
3538 }
3539 
3540 /*********************************************************************//**
3541 Executes
3542 UPDATE mysql.innodb_table_stats SET
3543 database_name = '...', table_name = '...'
3544 WHERE database_name = '...' AND table_name = '...';
3545 Creates its own transaction and commits it.
3546 @return DB_SUCCESS or error code */
3547 UNIV_INLINE
3548 dberr_t
dict_stats_rename_table_in_table_stats(const char * old_dbname_utf8,const char * old_tablename_utf8,const char * new_dbname_utf8,const char * new_tablename_utf8)3549 dict_stats_rename_table_in_table_stats(
3550 /*===================================*/
3551 	const char*	old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */
3552 	const char*	old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */
3553 	const char*	new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */
3554 	const char*	new_tablename_utf8)/*!< in: table name, e.g. 'newtable' */
3555 {
3556 	pars_info_t*	pinfo;
3557 	dberr_t		ret;
3558 
3559 	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
3560 	ut_ad(mutex_own(&dict_sys->mutex));
3561 
3562 	pinfo = pars_info_create();
3563 
3564 	pars_info_add_str_literal(pinfo, "old_dbname_utf8", old_dbname_utf8);
3565 	pars_info_add_str_literal(pinfo, "old_tablename_utf8", old_tablename_utf8);
3566 	pars_info_add_str_literal(pinfo, "new_dbname_utf8", new_dbname_utf8);
3567 	pars_info_add_str_literal(pinfo, "new_tablename_utf8", new_tablename_utf8);
3568 
3569 	ret = dict_stats_exec_sql(
3570 		pinfo,
3571 		"PROCEDURE RENAME_TABLE_IN_TABLE_STATS () IS\n"
3572 		"BEGIN\n"
3573 		"UPDATE \"" TABLE_STATS_NAME "\" SET\n"
3574 		"database_name = :new_dbname_utf8,\n"
3575 		"table_name = :new_tablename_utf8\n"
3576 		"WHERE\n"
3577 		"database_name = :old_dbname_utf8 AND\n"
3578 		"table_name = :old_tablename_utf8;\n"
3579 		"END;\n", NULL);
3580 
3581 	return(ret);
3582 }
3583 
3584 /*********************************************************************//**
3585 Executes
3586 UPDATE mysql.innodb_index_stats SET
3587 database_name = '...', table_name = '...'
3588 WHERE database_name = '...' AND table_name = '...';
3589 Creates its own transaction and commits it.
3590 @return DB_SUCCESS or error code */
3591 UNIV_INLINE
3592 dberr_t
dict_stats_rename_table_in_index_stats(const char * old_dbname_utf8,const char * old_tablename_utf8,const char * new_dbname_utf8,const char * new_tablename_utf8)3593 dict_stats_rename_table_in_index_stats(
3594 /*===================================*/
3595 	const char*	old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */
3596 	const char*	old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */
3597 	const char*	new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */
3598 	const char*	new_tablename_utf8)/*!< in: table name, e.g. 'newtable' */
3599 {
3600 	pars_info_t*	pinfo;
3601 	dberr_t		ret;
3602 
3603 	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
3604 	ut_ad(mutex_own(&dict_sys->mutex));
3605 
3606 	pinfo = pars_info_create();
3607 
3608 	pars_info_add_str_literal(pinfo, "old_dbname_utf8", old_dbname_utf8);
3609 	pars_info_add_str_literal(pinfo, "old_tablename_utf8", old_tablename_utf8);
3610 	pars_info_add_str_literal(pinfo, "new_dbname_utf8", new_dbname_utf8);
3611 	pars_info_add_str_literal(pinfo, "new_tablename_utf8", new_tablename_utf8);
3612 
3613 	ret = dict_stats_exec_sql(
3614 		pinfo,
3615 		"PROCEDURE RENAME_TABLE_IN_INDEX_STATS () IS\n"
3616 		"BEGIN\n"
3617 		"UPDATE \"" INDEX_STATS_NAME "\" SET\n"
3618 		"database_name = :new_dbname_utf8,\n"
3619 		"table_name = :new_tablename_utf8\n"
3620 		"WHERE\n"
3621 		"database_name = :old_dbname_utf8 AND\n"
3622 		"table_name = :old_tablename_utf8;\n"
3623 		"END;\n", NULL);
3624 
3625 	return(ret);
3626 }
3627 
3628 /*********************************************************************//**
3629 Renames a table in InnoDB persistent stats storage.
3630 This function creates its own transaction and commits it.
3631 @return DB_SUCCESS or error code */
3632 dberr_t
dict_stats_rename_table(bool dict_locked,const char * old_name,const char * new_name,char * errstr,size_t errstr_sz)3633 dict_stats_rename_table(
3634 /*====================*/
3635 	bool		dict_locked,	/*!< in: true if dict_sys mutex
3636 					and dict_operation_lock are held,
3637 					otherwise false*/
3638 	const char*	old_name,	/*!< in: old name, e.g. 'db/table' */
3639 	const char*	new_name,	/*!< in: new name, e.g. 'db/table' */
3640 	char*		errstr,		/*!< out: error string if != DB_SUCCESS
3641 					is returned */
3642 	size_t		errstr_sz)	/*!< in: errstr size */
3643 {
3644 	char		old_db_utf8[MAX_DB_UTF8_LEN];
3645 	char		new_db_utf8[MAX_DB_UTF8_LEN];
3646 	char		old_table_utf8[MAX_TABLE_UTF8_LEN];
3647 	char		new_table_utf8[MAX_TABLE_UTF8_LEN];
3648 	dberr_t		ret;
3649 
3650 	if (!dict_locked) {
3651 		ut_ad(!rw_lock_own(dict_operation_lock, RW_LOCK_X));
3652 		ut_ad(!mutex_own(&dict_sys->mutex));
3653 	}
3654 	/* skip innodb_table_stats and innodb_index_stats themselves */
3655 	if (strcmp(old_name, TABLE_STATS_NAME) == 0
3656 	    || strcmp(old_name, INDEX_STATS_NAME) == 0
3657 	    || strcmp(new_name, TABLE_STATS_NAME) == 0
3658 	    || strcmp(new_name, INDEX_STATS_NAME) == 0) {
3659 
3660 		return(DB_SUCCESS);
3661 	}
3662 
3663 	dict_fs2utf8(old_name, old_db_utf8, sizeof(old_db_utf8),
3664 		     old_table_utf8, sizeof(old_table_utf8));
3665 
3666 	dict_fs2utf8(new_name, new_db_utf8, sizeof(new_db_utf8),
3667 		     new_table_utf8, sizeof(new_table_utf8));
3668 
3669 	if (!dict_locked) {
3670 		rw_lock_x_lock(dict_operation_lock);
3671 		mutex_enter(&dict_sys->mutex);
3672 	}
3673 	ulint	n_attempts = 0;
3674 	do {
3675 		n_attempts++;
3676 
3677 		ret = dict_stats_rename_table_in_table_stats(
3678 			old_db_utf8, old_table_utf8,
3679 			new_db_utf8, new_table_utf8);
3680 
3681 		if (ret == DB_DUPLICATE_KEY) {
3682 			dict_stats_delete_from_table_stats(
3683 				new_db_utf8, new_table_utf8);
3684 		}
3685 
3686 		if (ret == DB_STATS_DO_NOT_EXIST) {
3687 			ret = DB_SUCCESS;
3688 		}
3689 		DBUG_EXECUTE_IF("rename_stats",
3690 				mutex_exit(&dict_sys->mutex);
3691 				rw_lock_x_unlock(dict_operation_lock);
3692 				os_thread_sleep(20000000);
3693 				DEBUG_SYNC_C("rename_stats");
3694 				rw_lock_x_lock(dict_operation_lock);
3695 				mutex_enter(&dict_sys->mutex););
3696 
3697 		if (ret != DB_SUCCESS) {
3698 			mutex_exit(&dict_sys->mutex);
3699 			rw_lock_x_unlock(dict_operation_lock);
3700 			os_thread_sleep(200000 /* 0.2 sec */);
3701 			rw_lock_x_lock(dict_operation_lock);
3702 			mutex_enter(&dict_sys->mutex);
3703 		}
3704 	} while ((ret == DB_DEADLOCK
3705 		  || ret == DB_DUPLICATE_KEY
3706 		  || ret == DB_LOCK_WAIT_TIMEOUT)
3707 		 && n_attempts < 5);
3708 
3709 	if (ret != DB_SUCCESS) {
3710 		ut_snprintf(errstr, errstr_sz,
3711 			    "Unable to rename statistics from"
3712 			    " %s.%s to %s.%s in %s: %s."
3713 			    " They can be renamed later using"
3714 
3715 			    " UPDATE %s SET"
3716 			    " database_name = '%s',"
3717 			    " table_name = '%s'"
3718 			    " WHERE"
3719 			    " database_name = '%s' AND"
3720 			    " table_name = '%s';",
3721 
3722 			    old_db_utf8, old_table_utf8,
3723 			    new_db_utf8, new_table_utf8,
3724 			    TABLE_STATS_NAME_PRINT,
3725 			    ut_strerr(ret),
3726 
3727 			    TABLE_STATS_NAME_PRINT,
3728 			    new_db_utf8, new_table_utf8,
3729 			    old_db_utf8, old_table_utf8);
3730 		mutex_exit(&dict_sys->mutex);
3731 		rw_lock_x_unlock(dict_operation_lock);
3732 		return(ret);
3733 	}
3734 	/* else */
3735 
3736 	n_attempts = 0;
3737 	do {
3738 		n_attempts++;
3739 
3740 		ret = dict_stats_rename_table_in_index_stats(
3741 			old_db_utf8, old_table_utf8,
3742 			new_db_utf8, new_table_utf8);
3743 
3744 		if (ret == DB_DUPLICATE_KEY) {
3745 			dict_stats_delete_from_index_stats(
3746 				new_db_utf8, new_table_utf8);
3747 		}
3748 
3749 		if (ret == DB_STATS_DO_NOT_EXIST) {
3750 			ret = DB_SUCCESS;
3751 		}
3752 
3753 		if (ret != DB_SUCCESS) {
3754 			mutex_exit(&dict_sys->mutex);
3755 			rw_lock_x_unlock(dict_operation_lock);
3756 			os_thread_sleep(200000 /* 0.2 sec */);
3757 			rw_lock_x_lock(dict_operation_lock);
3758 			mutex_enter(&dict_sys->mutex);
3759 		}
3760 	} while ((ret == DB_DEADLOCK
3761 		  || ret == DB_DUPLICATE_KEY
3762 		  || ret == DB_LOCK_WAIT_TIMEOUT)
3763 		 && n_attempts < 5);
3764 
3765 	if(!dict_locked) {
3766 		mutex_exit(&dict_sys->mutex);
3767 		rw_lock_x_unlock(dict_operation_lock);
3768 	}
3769 	if (ret != DB_SUCCESS) {
3770 		ut_snprintf(errstr, errstr_sz,
3771 			    "Unable to rename statistics from"
3772 			    " %s.%s to %s.%s in %s: %s."
3773 			    " They can be renamed later using"
3774 
3775 			    " UPDATE %s SET"
3776 			    " database_name = '%s',"
3777 			    " table_name = '%s'"
3778 			    " WHERE"
3779 			    " database_name = '%s' AND"
3780 			    " table_name = '%s';",
3781 
3782 			    old_db_utf8, old_table_utf8,
3783 			    new_db_utf8, new_table_utf8,
3784 			    INDEX_STATS_NAME_PRINT,
3785 			    ut_strerr(ret),
3786 
3787 			    INDEX_STATS_NAME_PRINT,
3788 			    new_db_utf8, new_table_utf8,
3789 			    old_db_utf8, old_table_utf8);
3790 	}
3791 
3792 	return(ret);
3793 }
3794 
3795 /*********************************************************************//**
3796 Renames an index in InnoDB persistent stats storage.
3797 This function creates its own transaction and commits it.
3798 @return DB_SUCCESS or error code. DB_STATS_DO_NOT_EXIST will be returned
3799 if the persistent stats do not exist. */
3800 dberr_t
dict_stats_rename_index(const dict_table_t * table,const char * old_index_name,const char * new_index_name)3801 dict_stats_rename_index(
3802 /*====================*/
3803 	const dict_table_t*	table,		/*!< in: table whose index
3804 						is renamed */
3805 	const char*		old_index_name,	/*!< in: old index name */
3806 	const char*		new_index_name)	/*!< in: new index name */
3807 {
3808 	rw_lock_x_lock(dict_operation_lock);
3809 	mutex_enter(&dict_sys->mutex);
3810 
3811 	if (!dict_stats_persistent_storage_check(true)) {
3812 		mutex_exit(&dict_sys->mutex);
3813 		rw_lock_x_unlock(dict_operation_lock);
3814 		return(DB_STATS_DO_NOT_EXIST);
3815 	}
3816 
3817 	char	dbname_utf8[MAX_DB_UTF8_LEN];
3818 	char	tablename_utf8[MAX_TABLE_UTF8_LEN];
3819 
3820 	dict_fs2utf8(table->name.m_name, dbname_utf8, sizeof(dbname_utf8),
3821 		     tablename_utf8, sizeof(tablename_utf8));
3822 
3823 	pars_info_t*	pinfo;
3824 
3825 	pinfo = pars_info_create();
3826 
3827 	pars_info_add_str_literal(pinfo, "dbname_utf8", dbname_utf8);
3828 	pars_info_add_str_literal(pinfo, "tablename_utf8", tablename_utf8);
3829 	pars_info_add_str_literal(pinfo, "new_index_name", new_index_name);
3830 	pars_info_add_str_literal(pinfo, "old_index_name", old_index_name);
3831 
3832 	dberr_t	ret;
3833 
3834 	ret = dict_stats_exec_sql(
3835 		pinfo,
3836 		"PROCEDURE RENAME_INDEX_IN_INDEX_STATS () IS\n"
3837 		"BEGIN\n"
3838 		"UPDATE \"" INDEX_STATS_NAME "\" SET\n"
3839 		"index_name = :new_index_name\n"
3840 		"WHERE\n"
3841 		"database_name = :dbname_utf8 AND\n"
3842 		"table_name = :tablename_utf8 AND\n"
3843 		"index_name = :old_index_name;\n"
3844 		"END;\n", NULL);
3845 
3846 	mutex_exit(&dict_sys->mutex);
3847 	rw_lock_x_unlock(dict_operation_lock);
3848 
3849 	return(ret);
3850 }
3851 
3852 /* tests @{ */
3853 #ifdef UNIV_ENABLE_UNIT_TEST_DICT_STATS
3854 
3855 /* The following unit tests test some of the functions in this file
3856 individually, such testing cannot be performed by the mysql-test framework
3857 via SQL. */
3858 
3859 /* test_dict_table_schema_check() @{ */
3860 void
test_dict_table_schema_check()3861 test_dict_table_schema_check()
3862 {
3863 	/*
3864 	CREATE TABLE tcheck (
3865 		c01 VARCHAR(123),
3866 		c02 INT,
3867 		c03 INT NOT NULL,
3868 		c04 INT UNSIGNED,
3869 		c05 BIGINT,
3870 		c06 BIGINT UNSIGNED NOT NULL,
3871 		c07 TIMESTAMP
3872 	) ENGINE=INNODB;
3873 	*/
3874 	/* definition for the table 'test/tcheck' */
3875 	dict_col_meta_t	columns[] = {
3876 		{"c01", DATA_VARCHAR, 0, 123},
3877 		{"c02", DATA_INT, 0, 4},
3878 		{"c03", DATA_INT, DATA_NOT_NULL, 4},
3879 		{"c04", DATA_INT, DATA_UNSIGNED, 4},
3880 		{"c05", DATA_INT, 0, 8},
3881 		{"c06", DATA_INT, DATA_NOT_NULL | DATA_UNSIGNED, 8},
3882 		{"c07", DATA_INT, 0, 4},
3883 		{"c_extra", DATA_INT, 0, 4}
3884 	};
3885 	dict_table_schema_t	schema = {
3886 		"test/tcheck",
3887 		0 /* will be set individually for each test below */,
3888 		columns
3889 	};
3890 	char	errstr[512];
3891 
3892 	ut_snprintf(errstr, sizeof(errstr), "Table not found");
3893 
3894 	/* prevent any data dictionary modifications while we are checking
3895 	the tables' structure */
3896 
3897 	mutex_enter(&dict_sys->mutex);
3898 
3899 	/* check that a valid table is reported as valid */
3900 	schema.n_cols = 7;
3901 	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3902 	    == DB_SUCCESS) {
3903 		printf("OK: test.tcheck ok\n");
3904 	} else {
3905 		printf("ERROR: %s\n", errstr);
3906 		printf("ERROR: test.tcheck not present or corrupted\n");
3907 		goto test_dict_table_schema_check_end;
3908 	}
3909 
3910 	/* check columns with wrong length */
3911 	schema.columns[1].len = 8;
3912 	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3913 	    != DB_SUCCESS) {
3914 		printf("OK: test.tcheck.c02 has different length and is"
3915 		       " reported as corrupted\n");
3916 	} else {
3917 		printf("OK: test.tcheck.c02 has different length but is"
3918 		       " reported as ok\n");
3919 		goto test_dict_table_schema_check_end;
3920 	}
3921 	schema.columns[1].len = 4;
3922 
3923 	/* request that c02 is NOT NULL while actually it does not have
3924 	this flag set */
3925 	schema.columns[1].prtype_mask |= DATA_NOT_NULL;
3926 	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3927 	    != DB_SUCCESS) {
3928 		printf("OK: test.tcheck.c02 does not have NOT NULL while"
3929 		       " it should and is reported as corrupted\n");
3930 	} else {
3931 		printf("ERROR: test.tcheck.c02 does not have NOT NULL while"
3932 		       " it should and is not reported as corrupted\n");
3933 		goto test_dict_table_schema_check_end;
3934 	}
3935 	schema.columns[1].prtype_mask &= ~DATA_NOT_NULL;
3936 
3937 	/* check a table that contains some extra columns */
3938 	schema.n_cols = 6;
3939 	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3940 	    == DB_SUCCESS) {
3941 		printf("ERROR: test.tcheck has more columns but is not"
3942 		       " reported as corrupted\n");
3943 		goto test_dict_table_schema_check_end;
3944 	} else {
3945 		printf("OK: test.tcheck has more columns and is"
3946 		       " reported as corrupted\n");
3947 	}
3948 
3949 	/* check a table that has some columns missing */
3950 	schema.n_cols = 8;
3951 	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3952 	    != DB_SUCCESS) {
3953 		printf("OK: test.tcheck has missing columns and is"
3954 		       " reported as corrupted\n");
3955 	} else {
3956 		printf("ERROR: test.tcheck has missing columns but is"
3957 		       " reported as ok\n");
3958 		goto test_dict_table_schema_check_end;
3959 	}
3960 
3961 	/* check non-existent table */
3962 	schema.table_name = "test/tcheck_nonexistent";
3963 	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3964 	    != DB_SUCCESS) {
3965 		printf("OK: test.tcheck_nonexistent is not present\n");
3966 	} else {
3967 		printf("ERROR: test.tcheck_nonexistent is present!?\n");
3968 		goto test_dict_table_schema_check_end;
3969 	}
3970 
3971 test_dict_table_schema_check_end:
3972 
3973 	mutex_exit(&dict_sys->mutex);
3974 }
3975 /* @} */
3976 
3977 /* save/fetch aux macros @{ */
3978 #define TEST_DATABASE_NAME		"foobardb"
3979 #define TEST_TABLE_NAME			"test_dict_stats"
3980 
3981 #define TEST_N_ROWS			111
3982 #define TEST_CLUSTERED_INDEX_SIZE	222
3983 #define TEST_SUM_OF_OTHER_INDEX_SIZES	333
3984 
3985 #define TEST_IDX1_NAME			"tidx1"
3986 #define TEST_IDX1_COL1_NAME		"tidx1_col1"
3987 #define TEST_IDX1_INDEX_SIZE		123
3988 #define TEST_IDX1_N_LEAF_PAGES		234
3989 #define TEST_IDX1_N_DIFF1		50
3990 #define TEST_IDX1_N_DIFF1_SAMPLE_SIZE	500
3991 
3992 #define TEST_IDX2_NAME			"tidx2"
3993 #define TEST_IDX2_COL1_NAME		"tidx2_col1"
3994 #define TEST_IDX2_COL2_NAME		"tidx2_col2"
3995 #define TEST_IDX2_COL3_NAME		"tidx2_col3"
3996 #define TEST_IDX2_COL4_NAME		"tidx2_col4"
3997 #define TEST_IDX2_INDEX_SIZE		321
3998 #define TEST_IDX2_N_LEAF_PAGES		432
3999 #define TEST_IDX2_N_DIFF1		60
4000 #define TEST_IDX2_N_DIFF1_SAMPLE_SIZE	600
4001 #define TEST_IDX2_N_DIFF2		61
4002 #define TEST_IDX2_N_DIFF2_SAMPLE_SIZE	610
4003 #define TEST_IDX2_N_DIFF3		62
4004 #define TEST_IDX2_N_DIFF3_SAMPLE_SIZE	620
4005 #define TEST_IDX2_N_DIFF4		63
4006 #define TEST_IDX2_N_DIFF4_SAMPLE_SIZE	630
4007 /* @} */
4008 
4009 /* test_dict_stats_save() @{ */
4010 void
test_dict_stats_save()4011 test_dict_stats_save()
4012 {
4013 	dict_table_t	table;
4014 	dict_index_t	index1;
4015 	dict_field_t	index1_fields[1];
4016 	ib_uint64_t	index1_stat_n_diff_key_vals[1];
4017 	ib_uint64_t	index1_stat_n_sample_sizes[1];
4018 	dict_index_t	index2;
4019 	dict_field_t	index2_fields[4];
4020 	ib_uint64_t	index2_stat_n_diff_key_vals[4];
4021 	ib_uint64_t	index2_stat_n_sample_sizes[4];
4022 	dberr_t		ret;
4023 
4024 	/* craft a dummy dict_table_t */
4025 	table.name.m_name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
4026 	table.stat_n_rows = TEST_N_ROWS;
4027 	table.stat_clustered_index_size = TEST_CLUSTERED_INDEX_SIZE;
4028 	table.stat_sum_of_other_index_sizes = TEST_SUM_OF_OTHER_INDEX_SIZES;
4029 	UT_LIST_INIT(table.indexes, &dict_index_t::indexes);
4030 	UT_LIST_ADD_LAST(table.indexes, &index1);
4031 	UT_LIST_ADD_LAST(table.indexes, &index2);
4032 	ut_d(table.magic_n = DICT_TABLE_MAGIC_N);
4033 	ut_d(index1.magic_n = DICT_INDEX_MAGIC_N);
4034 
4035 	index1.name = TEST_IDX1_NAME;
4036 	index1.table = &table;
4037 	index1.cached = 1;
4038 	index1.n_uniq = 1;
4039 	index1.fields = index1_fields;
4040 	index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals;
4041 	index1.stat_n_sample_sizes = index1_stat_n_sample_sizes;
4042 	index1.stat_index_size = TEST_IDX1_INDEX_SIZE;
4043 	index1.stat_n_leaf_pages = TEST_IDX1_N_LEAF_PAGES;
4044 	index1_fields[0].name = TEST_IDX1_COL1_NAME;
4045 	index1_stat_n_diff_key_vals[0] = TEST_IDX1_N_DIFF1;
4046 	index1_stat_n_sample_sizes[0] = TEST_IDX1_N_DIFF1_SAMPLE_SIZE;
4047 
4048 	ut_d(index2.magic_n = DICT_INDEX_MAGIC_N);
4049 	index2.name = TEST_IDX2_NAME;
4050 	index2.table = &table;
4051 	index2.cached = 1;
4052 	index2.n_uniq = 4;
4053 	index2.fields = index2_fields;
4054 	index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals;
4055 	index2.stat_n_sample_sizes = index2_stat_n_sample_sizes;
4056 	index2.stat_index_size = TEST_IDX2_INDEX_SIZE;
4057 	index2.stat_n_leaf_pages = TEST_IDX2_N_LEAF_PAGES;
4058 	index2_fields[0].name = TEST_IDX2_COL1_NAME;
4059 	index2_fields[1].name = TEST_IDX2_COL2_NAME;
4060 	index2_fields[2].name = TEST_IDX2_COL3_NAME;
4061 	index2_fields[3].name = TEST_IDX2_COL4_NAME;
4062 	index2_stat_n_diff_key_vals[0] = TEST_IDX2_N_DIFF1;
4063 	index2_stat_n_diff_key_vals[1] = TEST_IDX2_N_DIFF2;
4064 	index2_stat_n_diff_key_vals[2] = TEST_IDX2_N_DIFF3;
4065 	index2_stat_n_diff_key_vals[3] = TEST_IDX2_N_DIFF4;
4066 	index2_stat_n_sample_sizes[0] = TEST_IDX2_N_DIFF1_SAMPLE_SIZE;
4067 	index2_stat_n_sample_sizes[1] = TEST_IDX2_N_DIFF2_SAMPLE_SIZE;
4068 	index2_stat_n_sample_sizes[2] = TEST_IDX2_N_DIFF3_SAMPLE_SIZE;
4069 	index2_stat_n_sample_sizes[3] = TEST_IDX2_N_DIFF4_SAMPLE_SIZE;
4070 
4071 	ret = dict_stats_save(&table, NULL);
4072 
4073 	ut_a(ret == DB_SUCCESS);
4074 
4075 	printf("\nOK: stats saved successfully, now go ahead and read"
4076 	       " what's inside %s and %s:\n\n",
4077 	       TABLE_STATS_NAME_PRINT,
4078 	       INDEX_STATS_NAME_PRINT);
4079 
4080 	printf("SELECT COUNT(*) = 1 AS table_stats_saved_successfully\n"
4081 	       "FROM %s\n"
4082 	       "WHERE\n"
4083 	       "database_name = '%s' AND\n"
4084 	       "table_name = '%s' AND\n"
4085 	       "n_rows = %d AND\n"
4086 	       "clustered_index_size = %d AND\n"
4087 	       "sum_of_other_index_sizes = %d;\n"
4088 	       "\n",
4089 	       TABLE_STATS_NAME_PRINT,
4090 	       TEST_DATABASE_NAME,
4091 	       TEST_TABLE_NAME,
4092 	       TEST_N_ROWS,
4093 	       TEST_CLUSTERED_INDEX_SIZE,
4094 	       TEST_SUM_OF_OTHER_INDEX_SIZES);
4095 
4096 	printf("SELECT COUNT(*) = 3 AS tidx1_stats_saved_successfully\n"
4097 	       "FROM %s\n"
4098 	       "WHERE\n"
4099 	       "database_name = '%s' AND\n"
4100 	       "table_name = '%s' AND\n"
4101 	       "index_name = '%s' AND\n"
4102 	       "(\n"
4103 	       " (stat_name = 'size' AND stat_value = %d AND"
4104 	       "  sample_size IS NULL) OR\n"
4105 	       " (stat_name = 'n_leaf_pages' AND stat_value = %d AND"
4106 	       "  sample_size IS NULL) OR\n"
4107 	       " (stat_name = 'n_diff_pfx01' AND stat_value = %d AND"
4108 	       "  sample_size = '%d' AND stat_description = '%s')\n"
4109 	       ");\n"
4110 	       "\n",
4111 	       INDEX_STATS_NAME_PRINT,
4112 	       TEST_DATABASE_NAME,
4113 	       TEST_TABLE_NAME,
4114 	       TEST_IDX1_NAME,
4115 	       TEST_IDX1_INDEX_SIZE,
4116 	       TEST_IDX1_N_LEAF_PAGES,
4117 	       TEST_IDX1_N_DIFF1,
4118 	       TEST_IDX1_N_DIFF1_SAMPLE_SIZE,
4119 	       TEST_IDX1_COL1_NAME);
4120 
4121 	printf("SELECT COUNT(*) = 6 AS tidx2_stats_saved_successfully\n"
4122 	       "FROM %s\n"
4123 	       "WHERE\n"
4124 	       "database_name = '%s' AND\n"
4125 	       "table_name = '%s' AND\n"
4126 	       "index_name = '%s' AND\n"
4127 	       "(\n"
4128 	       " (stat_name = 'size' AND stat_value = %d AND"
4129 	       "  sample_size IS NULL) OR\n"
4130 	       " (stat_name = 'n_leaf_pages' AND stat_value = %d AND"
4131 	       "  sample_size IS NULL) OR\n"
4132 	       " (stat_name = 'n_diff_pfx01' AND stat_value = %d AND"
4133 	       "  sample_size = '%d' AND stat_description = '%s') OR\n"
4134 	       " (stat_name = 'n_diff_pfx02' AND stat_value = %d AND"
4135 	       "  sample_size = '%d' AND stat_description = '%s,%s') OR\n"
4136 	       " (stat_name = 'n_diff_pfx03' AND stat_value = %d AND"
4137 	       "  sample_size = '%d' AND stat_description = '%s,%s,%s') OR\n"
4138 	       " (stat_name = 'n_diff_pfx04' AND stat_value = %d AND"
4139 	       "  sample_size = '%d' AND stat_description = '%s,%s,%s,%s')\n"
4140 	       ");\n"
4141 	       "\n",
4142 	       INDEX_STATS_NAME_PRINT,
4143 	       TEST_DATABASE_NAME,
4144 	       TEST_TABLE_NAME,
4145 	       TEST_IDX2_NAME,
4146 	       TEST_IDX2_INDEX_SIZE,
4147 	       TEST_IDX2_N_LEAF_PAGES,
4148 	       TEST_IDX2_N_DIFF1,
4149 	       TEST_IDX2_N_DIFF1_SAMPLE_SIZE, TEST_IDX2_COL1_NAME,
4150 	       TEST_IDX2_N_DIFF2,
4151 	       TEST_IDX2_N_DIFF2_SAMPLE_SIZE,
4152 	       TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME,
4153 	       TEST_IDX2_N_DIFF3,
4154 	       TEST_IDX2_N_DIFF3_SAMPLE_SIZE,
4155 	       TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, TEST_IDX2_COL3_NAME,
4156 	       TEST_IDX2_N_DIFF4,
4157 	       TEST_IDX2_N_DIFF4_SAMPLE_SIZE,
4158 	       TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, TEST_IDX2_COL3_NAME,
4159 	       TEST_IDX2_COL4_NAME);
4160 }
4161 /* @} */
4162 
4163 /* test_dict_stats_fetch_from_ps() @{ */
4164 void
test_dict_stats_fetch_from_ps()4165 test_dict_stats_fetch_from_ps()
4166 {
4167 	dict_table_t	table;
4168 	dict_index_t	index1;
4169 	ib_uint64_t	index1_stat_n_diff_key_vals[1];
4170 	ib_uint64_t	index1_stat_n_sample_sizes[1];
4171 	dict_index_t	index2;
4172 	ib_uint64_t	index2_stat_n_diff_key_vals[4];
4173 	ib_uint64_t	index2_stat_n_sample_sizes[4];
4174 	dberr_t		ret;
4175 
4176 	/* craft a dummy dict_table_t */
4177 	table.name.m_name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
4178 	UT_LIST_INIT(table.indexes, &dict_index_t::indexes);
4179 	UT_LIST_ADD_LAST(table.indexes, &index1);
4180 	UT_LIST_ADD_LAST(table.indexes, &index2);
4181 	ut_d(table.magic_n = DICT_TABLE_MAGIC_N);
4182 
4183 	index1.name = TEST_IDX1_NAME;
4184 	ut_d(index1.magic_n = DICT_INDEX_MAGIC_N);
4185 	index1.cached = 1;
4186 	index1.n_uniq = 1;
4187 	index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals;
4188 	index1.stat_n_sample_sizes = index1_stat_n_sample_sizes;
4189 
4190 	index2.name = TEST_IDX2_NAME;
4191 	ut_d(index2.magic_n = DICT_INDEX_MAGIC_N);
4192 	index2.cached = 1;
4193 	index2.n_uniq = 4;
4194 	index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals;
4195 	index2.stat_n_sample_sizes = index2_stat_n_sample_sizes;
4196 
4197 	ret = dict_stats_fetch_from_ps(&table);
4198 
4199 	ut_a(ret == DB_SUCCESS);
4200 
4201 	ut_a(table.stat_n_rows == TEST_N_ROWS);
4202 	ut_a(table.stat_clustered_index_size == TEST_CLUSTERED_INDEX_SIZE);
4203 	ut_a(table.stat_sum_of_other_index_sizes
4204 	     == TEST_SUM_OF_OTHER_INDEX_SIZES);
4205 
4206 	ut_a(index1.stat_index_size == TEST_IDX1_INDEX_SIZE);
4207 	ut_a(index1.stat_n_leaf_pages == TEST_IDX1_N_LEAF_PAGES);
4208 	ut_a(index1_stat_n_diff_key_vals[0] == TEST_IDX1_N_DIFF1);
4209 	ut_a(index1_stat_n_sample_sizes[0] == TEST_IDX1_N_DIFF1_SAMPLE_SIZE);
4210 
4211 	ut_a(index2.stat_index_size == TEST_IDX2_INDEX_SIZE);
4212 	ut_a(index2.stat_n_leaf_pages == TEST_IDX2_N_LEAF_PAGES);
4213 	ut_a(index2_stat_n_diff_key_vals[0] == TEST_IDX2_N_DIFF1);
4214 	ut_a(index2_stat_n_sample_sizes[0] == TEST_IDX2_N_DIFF1_SAMPLE_SIZE);
4215 	ut_a(index2_stat_n_diff_key_vals[1] == TEST_IDX2_N_DIFF2);
4216 	ut_a(index2_stat_n_sample_sizes[1] == TEST_IDX2_N_DIFF2_SAMPLE_SIZE);
4217 	ut_a(index2_stat_n_diff_key_vals[2] == TEST_IDX2_N_DIFF3);
4218 	ut_a(index2_stat_n_sample_sizes[2] == TEST_IDX2_N_DIFF3_SAMPLE_SIZE);
4219 	ut_a(index2_stat_n_diff_key_vals[3] == TEST_IDX2_N_DIFF4);
4220 	ut_a(index2_stat_n_sample_sizes[3] == TEST_IDX2_N_DIFF4_SAMPLE_SIZE);
4221 
4222 	printf("OK: fetch successful\n");
4223 }
4224 /* @} */
4225 
4226 /* test_dict_stats_all() @{ */
4227 void
test_dict_stats_all()4228 test_dict_stats_all()
4229 {
4230 	test_dict_table_schema_check();
4231 
4232 	test_dict_stats_save();
4233 
4234 	test_dict_stats_fetch_from_ps();
4235 }
4236 /* @} */
4237 
4238 #endif /* UNIV_ENABLE_UNIT_TEST_DICT_STATS */
4239 /* @} */
4240 
4241 #endif /* UNIV_HOTBACKUP */
4242