1 /*****************************************************************************
2 
3 Copyright (c) 2009, 2019, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file dict/dict0stats.cc
29 Code used for calculating and manipulating table statistics.
30 
31 Created Jan 06, 2010 Vasil Dimov
32 *******************************************************/
33 
34 #ifndef UNIV_HOTBACKUP
35 
36 #include "univ.i"
37 
38 #include "btr0btr.h" /* btr_get_size() */
39 #include "btr0cur.h" /* btr_estimate_number_of_different_key_vals() */
40 #include "dict0dict.h" /* dict_table_get_first_index(), dict_fs2utf8() */
41 #include "dict0mem.h" /* DICT_TABLE_MAGIC_N */
42 #include "dict0stats.h"
43 #include "data0type.h" /* dtype_t */
44 #include "db0err.h" /* dberr_t */
45 #include "page0page.h" /* page_align() */
46 #include "pars0pars.h" /* pars_info_create() */
47 #include "pars0types.h" /* pars_info_t */
48 #include "que0que.h" /* que_eval_sql() */
49 #include "rem0cmp.h" /* REC_MAX_N_FIELDS,cmp_rec_rec_with_match() */
50 #include "row0sel.h" /* sel_node_t */
51 #include "row0types.h" /* sel_node_t */
52 #include "trx0trx.h" /* trx_create() */
53 #include "trx0roll.h" /* trx_rollback_to_savepoint() */
54 #include "ut0rnd.h" /* ut_rnd_interval() */
55 #include "ut0ut.h" /* ut_format_name(), ut_time() */
56 
57 #include <algorithm>
58 #include <map>
59 #include <vector>
60 
61 /* Sampling algorithm description @{
62 
63 The algorithm is controlled by one number - N_SAMPLE_PAGES(index),
64 let it be A, which is the number of leaf pages to analyze for a given index
65 for each n-prefix (if the index is on 3 columns, then 3*A leaf pages will be
66 analyzed).
67 
68 Let the total number of leaf pages in the table be T.
69 Level 0 - leaf pages, level H - root.
70 
71 Definition: N-prefix-boring record is a record on a non-leaf page that equals
72 the next (to the right, cross page boundaries, skipping the supremum and
73 infimum) record on the same level when looking at the fist n-prefix columns.
74 The last (user) record on a level is not boring (it does not match the
75 non-existent user record to the right). We call the records boring because all
76 the records on the page below a boring record are equal to that boring record.
77 
78 We avoid diving below boring records when searching for a leaf page to
79 estimate the number of distinct records because we know that such a leaf
80 page will have number of distinct records == 1.
81 
82 For each n-prefix: start from the root level and full scan subsequent lower
83 levels until a level that contains at least A*10 distinct records is found.
84 Lets call this level LA.
85 As an optimization the search is canceled if it has reached level 1 (never
86 descend to the level 0 (leaf)) and also if the next level to be scanned
87 would contain more than A pages. The latter is because the user has asked
88 to analyze A leaf pages and it does not make sense to scan much more than
89 A non-leaf pages with the sole purpose of finding a good sample of A leaf
90 pages.
91 
92 After finding the appropriate level LA with >A*10 distinct records (or less in
93 the exceptions described above), divide it into groups of equal records and
94 pick A such groups. Then pick the last record from each group. For example,
95 let the level be:
96 
97 index:  0,1,2,3,4,5,6,7,8,9,10
98 record: 1,1,1,2,2,7,7,7,7,7,9
99 
100 There are 4 groups of distinct records and if A=2 random ones are selected,
101 e.g. 1,1,1 and 7,7,7,7,7, then records with indexes 2 and 9 will be selected.
102 
103 After selecting A records as described above, dive below them to find A leaf
104 pages and analyze them, finding the total number of distinct records. The
105 dive to the leaf level is performed by selecting a non-boring record from
106 each page and diving below it.
107 
108 This way, a total of A leaf pages are analyzed for the given n-prefix.
109 
110 Let the number of different key values found in each leaf page i be Pi (i=1..A).
111 Let N_DIFF_AVG_LEAF be (P1 + P2 + ... + PA) / A.
112 Let the number of different key values on level LA be N_DIFF_LA.
113 Let the total number of records on level LA be TOTAL_LA.
114 Let R be N_DIFF_LA / TOTAL_LA, we assume this ratio is the same on the
115 leaf level.
116 Let the number of leaf pages be N.
117 Then the total number of different key values on the leaf level is:
118 N * R * N_DIFF_AVG_LEAF.
119 See REF01 for the implementation.
120 
121 The above describes how to calculate the cardinality of an index.
122 This algorithm is executed for each n-prefix of a multi-column index
123 where n=1..n_uniq.
124 @} */
125 
126 /* names of the tables from the persistent statistics storage */
127 #define TABLE_STATS_NAME	"mysql/innodb_table_stats"
128 #define TABLE_STATS_NAME_PRINT	"mysql.innodb_table_stats"
129 #define INDEX_STATS_NAME	"mysql/innodb_index_stats"
130 #define INDEX_STATS_NAME_PRINT	"mysql.innodb_index_stats"
131 
132 #ifdef UNIV_STATS_DEBUG
133 #define DEBUG_PRINTF(fmt, ...)	printf(fmt, ## __VA_ARGS__)
134 #else /* UNIV_STATS_DEBUG */
135 #define DEBUG_PRINTF(fmt, ...)	/* noop */
136 #endif /* UNIV_STATS_DEBUG */
137 
138 /* Gets the number of leaf pages to sample in persistent stats estimation */
139 #define N_SAMPLE_PAGES(index)					\
140 	static_cast<ib_uint64_t>(				\
141 		(index)->table->stats_sample_pages != 0		\
142 		? (index)->table->stats_sample_pages		\
143 		: srv_stats_persistent_sample_pages)
144 
145 /* number of distinct records on a given level that are required to stop
146 descending to lower levels and fetch N_SAMPLE_PAGES(index) records
147 from that level */
148 #define N_DIFF_REQUIRED(index)	(N_SAMPLE_PAGES(index) * 10)
149 
150 /* A dynamic array where we store the boundaries of each distinct group
151 of keys. For example if a btree level is:
152 index: 0,1,2,3,4,5,6,7,8,9,10,11,12
153 data:  b,b,b,b,b,b,g,g,j,j,j, x, y
154 then we would store 5,7,10,11,12 in the array. */
155 typedef std::vector<ib_uint64_t>	boundaries_t;
156 
157 /* This is used to arrange the index based on the index name.
158 @return true if index_name1 is smaller than index_name2. */
159 struct index_cmp
160 {
operator ()index_cmp161 	bool operator()(const char* index_name1, const char* index_name2) const {
162 		return(strcmp(index_name1, index_name2) < 0);
163 	}
164 };
165 
166 typedef std::map<const char*, dict_index_t*, index_cmp>	index_map_t;
167 
168 /*********************************************************************//**
169 Checks whether an index should be ignored in stats manipulations:
170 * stats fetch
171 * stats recalc
172 * stats save
173 @return true if exists and all tables are ok */
174 UNIV_INLINE
175 bool
dict_stats_should_ignore_index(const dict_index_t * index)176 dict_stats_should_ignore_index(
177 /*===========================*/
178 	const dict_index_t*	index)	/*!< in: index */
179 {
180 	return((index->type & DICT_FTS)
181 	       || dict_index_is_corrupted(index)
182 	       || index->to_be_dropped
183 	       || *index->name == TEMP_INDEX_PREFIX);
184 }
185 
186 /*********************************************************************//**
187 Checks whether the persistent statistics storage exists and that all
188 tables have the proper structure.
189 @return true if exists and all tables are ok */
190 static
191 bool
dict_stats_persistent_storage_check(bool caller_has_dict_sys_mutex)192 dict_stats_persistent_storage_check(
193 /*================================*/
194 	bool	caller_has_dict_sys_mutex)	/*!< in: true if the caller
195 						owns dict_sys->mutex */
196 {
197 	/* definition for the table TABLE_STATS_NAME */
198 	dict_col_meta_t	table_stats_columns[] = {
199 		{"database_name", DATA_VARMYSQL,
200 			DATA_NOT_NULL, 192},
201 
202 		{"table_name", DATA_VARMYSQL,
203 			DATA_NOT_NULL, 192},
204 
205 		{"last_update", DATA_FIXBINARY,
206 			DATA_NOT_NULL, 4},
207 
208 		{"n_rows", DATA_INT,
209 			DATA_NOT_NULL | DATA_UNSIGNED, 8},
210 
211 		{"clustered_index_size", DATA_INT,
212 			DATA_NOT_NULL | DATA_UNSIGNED, 8},
213 
214 		{"sum_of_other_index_sizes", DATA_INT,
215 			DATA_NOT_NULL | DATA_UNSIGNED, 8}
216 	};
217 	dict_table_schema_t	table_stats_schema = {
218 		TABLE_STATS_NAME,
219 		UT_ARR_SIZE(table_stats_columns),
220 		table_stats_columns,
221 		0 /* n_foreign */,
222 		0 /* n_referenced */
223 	};
224 
225 	/* definition for the table INDEX_STATS_NAME */
226 	dict_col_meta_t	index_stats_columns[] = {
227 		{"database_name", DATA_VARMYSQL,
228 			DATA_NOT_NULL, 192},
229 
230 		{"table_name", DATA_VARMYSQL,
231 			DATA_NOT_NULL, 192},
232 
233 		{"index_name", DATA_VARMYSQL,
234 			DATA_NOT_NULL, 192},
235 
236 		{"last_update", DATA_FIXBINARY,
237 			DATA_NOT_NULL, 4},
238 
239 		{"stat_name", DATA_VARMYSQL,
240 			DATA_NOT_NULL, 64*3},
241 
242 		{"stat_value", DATA_INT,
243 			DATA_NOT_NULL | DATA_UNSIGNED, 8},
244 
245 		{"sample_size", DATA_INT,
246 			DATA_UNSIGNED, 8},
247 
248 		{"stat_description", DATA_VARMYSQL,
249 			DATA_NOT_NULL, 1024*3}
250 	};
251 	dict_table_schema_t	index_stats_schema = {
252 		INDEX_STATS_NAME,
253 		UT_ARR_SIZE(index_stats_columns),
254 		index_stats_columns,
255 		0 /* n_foreign */,
256 		0 /* n_referenced */
257 	};
258 
259 	char		errstr[512];
260 	dberr_t		ret;
261 
262 	if (!caller_has_dict_sys_mutex) {
263 		mutex_enter(&(dict_sys->mutex));
264 	}
265 
266 	ut_ad(mutex_own(&dict_sys->mutex));
267 
268 	/* first check table_stats */
269 	ret = dict_table_schema_check(&table_stats_schema, errstr,
270 				      sizeof(errstr));
271 	if (ret == DB_SUCCESS) {
272 		/* if it is ok, then check index_stats */
273 		ret = dict_table_schema_check(&index_stats_schema, errstr,
274 					      sizeof(errstr));
275 	}
276 
277 	if (!caller_has_dict_sys_mutex) {
278 		mutex_exit(&(dict_sys->mutex));
279 	}
280 
281 	if (ret != DB_SUCCESS) {
282 		ut_print_timestamp(stderr);
283 		fprintf(stderr, " InnoDB: Error: %s\n", errstr);
284 		return(false);
285 	}
286 	/* else */
287 
288 	return(true);
289 }
290 
291 /** Executes a given SQL statement using the InnoDB internal SQL parser.
292 This function will free the pinfo object.
293 @param[in,out]	pinfo	pinfo to pass to que_eval_sql() must already
294 have any literals bound to it
295 @param[in]	sql	SQL string to execute
296 @param[in,out]	trx	in case of NULL the function will allocate and
297 free the trx object. If it is not NULL then it will be rolled back
298 only in the case of error, but not freed.
299 @return DB_SUCCESS or error code */
300 static
301 dberr_t
dict_stats_exec_sql(pars_info_t * pinfo,const char * sql,trx_t * trx)302 dict_stats_exec_sql(
303 	pars_info_t*	pinfo,
304 	const char*	sql,
305 	trx_t*		trx)
306 {
307 	dberr_t	err;
308 	bool	trx_started = false;
309 #ifdef UNIV_SYNC_DEBUG
310 	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
311 #endif /* UNIV_SYNC_DEBUG */
312 	ut_ad(mutex_own(&dict_sys->mutex));
313 
314 	if (!dict_stats_persistent_storage_check(true)) {
315 		pars_info_free(pinfo);
316 		return(DB_STATS_DO_NOT_EXIST);
317 	}
318 
319 	if (trx == NULL) {
320 		trx = trx_allocate_for_background();
321 		trx_start_if_not_started(trx);
322 		trx_started = true;
323 	}
324 
325 	err = que_eval_sql(pinfo, sql, FALSE, trx); /* pinfo is freed here */
326 
327 	DBUG_EXECUTE_IF("stats_index_error",
328 		if (!trx_started) {
329 			err = DB_STATS_DO_NOT_EXIST;
330 			trx->error_state = DB_STATS_DO_NOT_EXIST;
331 		});
332 
333 	if (!trx_started && err == DB_SUCCESS) {
334 		return(DB_SUCCESS);
335 	}
336 
337 	if (err == DB_SUCCESS) {
338 		trx_commit_for_mysql(trx);
339 	} else {
340 		trx->op_info = "rollback of internal trx on stats tables";
341 		trx->dict_operation_lock_mode = RW_X_LATCH;
342 		trx_rollback_to_savepoint(trx, NULL);
343 		trx->dict_operation_lock_mode = 0;
344 		trx->op_info = "";
345 		ut_a(trx->error_state == DB_SUCCESS);
346 	}
347 
348 	if (trx_started) {
349 		trx_free_for_background(trx);
350 	}
351 
352 	return(err);
353 }
354 
355 /*********************************************************************//**
356 Duplicate a table object and its indexes.
357 This function creates a dummy dict_table_t object and initializes the
358 following table and index members:
359 dict_table_t::id (copied)
360 dict_table_t::heap (newly created)
361 dict_table_t::name (copied)
362 dict_table_t::corrupted (copied)
363 dict_table_t::indexes<> (newly created)
364 dict_table_t::magic_n
365 for each entry in dict_table_t::indexes, the following are initialized:
366 (indexes that have DICT_FTS set in index->type are skipped)
367 dict_index_t::id (copied)
368 dict_index_t::name (copied)
369 dict_index_t::table_name (points to the copied table name)
370 dict_index_t::table (points to the above semi-initialized object)
371 dict_index_t::type (copied)
372 dict_index_t::to_be_dropped (copied)
373 dict_index_t::online_status (copied)
374 dict_index_t::n_uniq (copied)
375 dict_index_t::fields[] (newly created, only first n_uniq, only fields[i].name)
376 dict_index_t::indexes<> (newly created)
377 dict_index_t::stat_n_diff_key_vals[] (only allocated, left uninitialized)
378 dict_index_t::stat_n_sample_sizes[] (only allocated, left uninitialized)
379 dict_index_t::stat_n_non_null_key_vals[] (only allocated, left uninitialized)
380 dict_index_t::magic_n
381 The returned object should be freed with dict_stats_table_clone_free()
382 when no longer needed.
383 @return incomplete table object */
384 static
385 dict_table_t*
dict_stats_table_clone_create(const dict_table_t * table)386 dict_stats_table_clone_create(
387 /*==========================*/
388 	const dict_table_t*	table)	/*!< in: table whose stats to copy */
389 {
390 	size_t		heap_size;
391 	dict_index_t*	index;
392 
393 	/* Estimate the size needed for the table and all of its indexes */
394 
395 	heap_size = 0;
396 	heap_size += sizeof(dict_table_t);
397 	heap_size += strlen(table->name) + 1;
398 
399 	for (index = dict_table_get_first_index(table);
400 	     index != NULL;
401 	     index = dict_table_get_next_index(index)) {
402 
403 		if (dict_stats_should_ignore_index(index)) {
404 			continue;
405 		}
406 
407 		ut_ad(!dict_index_is_univ(index));
408 
409 		ulint	n_uniq = dict_index_get_n_unique(index);
410 
411 		heap_size += sizeof(dict_index_t);
412 		heap_size += strlen(index->name) + 1;
413 		heap_size += n_uniq * sizeof(index->fields[0]);
414 		for (ulint i = 0; i < n_uniq; i++) {
415 			heap_size += strlen(index->fields[i].name) + 1;
416 		}
417 		heap_size += n_uniq * sizeof(index->stat_n_diff_key_vals[0]);
418 		heap_size += n_uniq * sizeof(index->stat_n_sample_sizes[0]);
419 		heap_size += n_uniq * sizeof(index->stat_n_non_null_key_vals[0]);
420 	}
421 
422 	/* Allocate the memory and copy the members */
423 
424 	mem_heap_t*	heap;
425 
426 	heap = mem_heap_create(heap_size);
427 
428 	dict_table_t*	t;
429 
430 	t = (dict_table_t*) mem_heap_alloc(heap, sizeof(*t));
431 
432 	UNIV_MEM_ASSERT_RW_ABORT(&table->id, sizeof(table->id));
433 	t->id = table->id;
434 
435 	t->heap = heap;
436 
437 	UNIV_MEM_ASSERT_RW_ABORT(table->name, strlen(table->name) + 1);
438 	t->name = (char*) mem_heap_strdup(heap, table->name);
439 
440 	t->corrupted = table->corrupted;
441 
442 	/* This private object "t" is not shared with other threads, so
443 	we do not need the stats_latch (thus we pass false below). The
444 	dict_table_stats_lock()/unlock() routines will do nothing. */
445 	dict_table_stats_latch_create(t, false);
446 
447 	UT_LIST_INIT(t->indexes);
448 
449 	for (index = dict_table_get_first_index(table);
450 	     index != NULL;
451 	     index = dict_table_get_next_index(index)) {
452 
453 		if (dict_stats_should_ignore_index(index)) {
454 			continue;
455 		}
456 
457 		ut_ad(!dict_index_is_univ(index));
458 
459 		dict_index_t*	idx;
460 
461 		idx = (dict_index_t*) mem_heap_alloc(heap, sizeof(*idx));
462 
463 		UNIV_MEM_ASSERT_RW_ABORT(&index->id, sizeof(index->id));
464 		idx->id = index->id;
465 
466 		UNIV_MEM_ASSERT_RW_ABORT(index->name, strlen(index->name) + 1);
467 		idx->name = (char*) mem_heap_strdup(heap, index->name);
468 
469 		idx->table_name = t->name;
470 
471 		idx->table = t;
472 
473 		idx->type = index->type;
474 
475 		idx->to_be_dropped = 0;
476 
477 		idx->online_status = ONLINE_INDEX_COMPLETE;
478 
479 		idx->n_uniq = index->n_uniq;
480 
481 		idx->fields = (dict_field_t*) mem_heap_alloc(
482 			heap, idx->n_uniq * sizeof(idx->fields[0]));
483 
484 		for (ulint i = 0; i < idx->n_uniq; i++) {
485 			UNIV_MEM_ASSERT_RW_ABORT(index->fields[i].name, strlen(index->fields[i].name) + 1);
486 			idx->fields[i].name = (char*) mem_heap_strdup(
487 				heap, index->fields[i].name);
488 		}
489 
490 		/* hook idx into t->indexes */
491 		UT_LIST_ADD_LAST(indexes, t->indexes, idx);
492 
493 		idx->stat_n_diff_key_vals = (ib_uint64_t*) mem_heap_alloc(
494 			heap,
495 			idx->n_uniq * sizeof(idx->stat_n_diff_key_vals[0]));
496 
497 		idx->stat_n_sample_sizes = (ib_uint64_t*) mem_heap_alloc(
498 			heap,
499 			idx->n_uniq * sizeof(idx->stat_n_sample_sizes[0]));
500 
501 		idx->stat_n_non_null_key_vals = (ib_uint64_t*) mem_heap_alloc(
502 			heap,
503 			idx->n_uniq * sizeof(idx->stat_n_non_null_key_vals[0]));
504 		ut_d(idx->magic_n = DICT_INDEX_MAGIC_N);
505 	}
506 
507 	ut_d(t->magic_n = DICT_TABLE_MAGIC_N);
508 
509 	return(t);
510 }
511 
512 /*********************************************************************//**
513 Free the resources occupied by an object returned by
514 dict_stats_table_clone_create(). */
515 static
516 void
dict_stats_table_clone_free(dict_table_t * t)517 dict_stats_table_clone_free(
518 /*========================*/
519 	dict_table_t*	t)	/*!< in: dummy table object to free */
520 {
521 	dict_table_stats_latch_destroy(t);
522 	mem_heap_free(t->heap);
523 }
524 
525 /*********************************************************************//**
526 Write all zeros (or 1 where it makes sense) into an index
527 statistics members. The resulting stats correspond to an empty index.
528 The caller must own index's table stats latch in X mode
529 (dict_table_stats_lock(table, RW_X_LATCH)) */
530 static
531 void
dict_stats_empty_index(dict_index_t * index)532 dict_stats_empty_index(
533 /*===================*/
534 	dict_index_t*	index)	/*!< in/out: index */
535 {
536 	ut_ad(!(index->type & DICT_FTS));
537 	ut_ad(!dict_index_is_univ(index));
538 
539 	ulint	n_uniq = index->n_uniq;
540 
541 	for (ulint i = 0; i < n_uniq; i++) {
542 		index->stat_n_diff_key_vals[i] = 0;
543 		index->stat_n_sample_sizes[i] = 1;
544 		index->stat_n_non_null_key_vals[i] = 0;
545 	}
546 
547 	index->stat_index_size = 1;
548 	index->stat_n_leaf_pages = 1;
549 }
550 
551 /*********************************************************************//**
552 Write all zeros (or 1 where it makes sense) into a table and its indexes'
553 statistics members. The resulting stats correspond to an empty table. */
554 static
555 void
dict_stats_empty_table(dict_table_t * table)556 dict_stats_empty_table(
557 /*===================*/
558 	dict_table_t*	table)	/*!< in/out: table */
559 {
560 	/* Zero the stats members */
561 
562 	dict_table_stats_lock(table, RW_X_LATCH);
563 
564 	table->stat_n_rows = 0;
565 	table->stat_clustered_index_size = 1;
566 	/* 1 page for each index, not counting the clustered */
567 	table->stat_sum_of_other_index_sizes
568 		= UT_LIST_GET_LEN(table->indexes) - 1;
569 	table->stat_modified_counter = 0;
570 
571 	dict_index_t*	index;
572 
573 	for (index = dict_table_get_first_index(table);
574 	     index != NULL;
575 	     index = dict_table_get_next_index(index)) {
576 
577 		if (index->type & DICT_FTS) {
578 			continue;
579 		}
580 
581 		ut_ad(!dict_index_is_univ(index));
582 
583 		dict_stats_empty_index(index);
584 	}
585 
586 	table->stat_initialized = TRUE;
587 
588 	dict_table_stats_unlock(table, RW_X_LATCH);
589 }
590 
591 /*********************************************************************//**
592 Check whether index's stats are initialized (assert if they are not). */
593 static
594 void
dict_stats_assert_initialized_index(const dict_index_t * index)595 dict_stats_assert_initialized_index(
596 /*================================*/
597 	const dict_index_t*	index)	/*!< in: index */
598 {
599 	UNIV_MEM_ASSERT_RW_ABORT(
600 		index->stat_n_diff_key_vals,
601 		index->n_uniq * sizeof(index->stat_n_diff_key_vals[0]));
602 
603 	UNIV_MEM_ASSERT_RW_ABORT(
604 		index->stat_n_sample_sizes,
605 		index->n_uniq * sizeof(index->stat_n_sample_sizes[0]));
606 
607 	UNIV_MEM_ASSERT_RW_ABORT(
608 		index->stat_n_non_null_key_vals,
609 		index->n_uniq * sizeof(index->stat_n_non_null_key_vals[0]));
610 
611 	UNIV_MEM_ASSERT_RW_ABORT(
612 		&index->stat_index_size,
613 		sizeof(index->stat_index_size));
614 
615 	UNIV_MEM_ASSERT_RW_ABORT(
616 		&index->stat_n_leaf_pages,
617 		sizeof(index->stat_n_leaf_pages));
618 }
619 
620 /*********************************************************************//**
621 Check whether table's stats are initialized (assert if they are not). */
622 static
623 void
dict_stats_assert_initialized(const dict_table_t * table)624 dict_stats_assert_initialized(
625 /*==========================*/
626 	const dict_table_t*	table)	/*!< in: table */
627 {
628 	ut_a(table->stat_initialized);
629 
630 	UNIV_MEM_ASSERT_RW_ABORT(&table->stats_last_recalc,
631 			   sizeof(table->stats_last_recalc));
632 
633 	UNIV_MEM_ASSERT_RW_ABORT(&table->stat_persistent,
634 			   sizeof(table->stat_persistent));
635 
636 	UNIV_MEM_ASSERT_RW_ABORT(&table->stats_auto_recalc,
637 			   sizeof(table->stats_auto_recalc));
638 
639 	UNIV_MEM_ASSERT_RW_ABORT(&table->stats_sample_pages,
640 			   sizeof(table->stats_sample_pages));
641 
642 	UNIV_MEM_ASSERT_RW_ABORT(&table->stat_n_rows,
643 			   sizeof(table->stat_n_rows));
644 
645 	UNIV_MEM_ASSERT_RW_ABORT(&table->stat_clustered_index_size,
646 			   sizeof(table->stat_clustered_index_size));
647 
648 	UNIV_MEM_ASSERT_RW_ABORT(&table->stat_sum_of_other_index_sizes,
649 			   sizeof(table->stat_sum_of_other_index_sizes));
650 
651 	UNIV_MEM_ASSERT_RW_ABORT(&table->stat_modified_counter,
652 			   sizeof(table->stat_modified_counter));
653 
654 	UNIV_MEM_ASSERT_RW_ABORT(&table->stats_bg_flag,
655 			   sizeof(table->stats_bg_flag));
656 
657 	for (dict_index_t* index = dict_table_get_first_index(table);
658 	     index != NULL;
659 	     index = dict_table_get_next_index(index)) {
660 
661 		if (!dict_stats_should_ignore_index(index)) {
662 			dict_stats_assert_initialized_index(index);
663 		}
664 	}
665 }
666 
667 #define INDEX_EQ(i1, i2) \
668 	((i1) != NULL \
669 	 && (i2) != NULL \
670 	 && (i1)->id == (i2)->id \
671 	 && strcmp((i1)->name, (i2)->name) == 0)
672 
673 /*********************************************************************//**
674 Copy table and index statistics from one table to another, including index
675 stats. Extra indexes in src are ignored and extra indexes in dst are
676 initialized to correspond to an empty index. */
677 static
678 void
dict_stats_copy(dict_table_t * dst,const dict_table_t * src)679 dict_stats_copy(
680 /*============*/
681 	dict_table_t*		dst,	/*!< in/out: destination table */
682 	const dict_table_t*	src)	/*!< in: source table */
683 {
684 	dst->stats_last_recalc = src->stats_last_recalc;
685 	dst->stat_n_rows = src->stat_n_rows;
686 	dst->stat_clustered_index_size = src->stat_clustered_index_size;
687 	dst->stat_sum_of_other_index_sizes = src->stat_sum_of_other_index_sizes;
688 	dst->stat_modified_counter = src->stat_modified_counter;
689 
690 	dict_index_t*	dst_idx;
691 	dict_index_t*	src_idx;
692 
693 	for (dst_idx = dict_table_get_first_index(dst),
694 	     src_idx = dict_table_get_first_index(src);
695 	     dst_idx != NULL;
696 	     dst_idx = dict_table_get_next_index(dst_idx),
697 	     (src_idx != NULL
698 	      && (src_idx = dict_table_get_next_index(src_idx)))) {
699 
700 		if (dict_stats_should_ignore_index(dst_idx)) {
701 			if (!(dst_idx->type & DICT_FTS)) {
702 				dict_stats_empty_index(dst_idx);
703 			}
704 			continue;
705 		}
706 
707 		ut_ad(!dict_index_is_univ(dst_idx));
708 
709 		if (!INDEX_EQ(src_idx, dst_idx)) {
710 			for (src_idx = dict_table_get_first_index(src);
711 			     src_idx != NULL;
712 			     src_idx = dict_table_get_next_index(src_idx)) {
713 
714 				if (INDEX_EQ(src_idx, dst_idx)) {
715 					break;
716 				}
717 			}
718 		}
719 
720 		if (!INDEX_EQ(src_idx, dst_idx)) {
721 			dict_stats_empty_index(dst_idx);
722 			continue;
723 		}
724 
725 		ulint	n_copy_el;
726 
727 		if (dst_idx->n_uniq > src_idx->n_uniq) {
728 			n_copy_el = src_idx->n_uniq;
729 			/* Since src is smaller some elements in dst
730 			will remain untouched by the following memmove(),
731 			thus we init all of them here. */
732 			dict_stats_empty_index(dst_idx);
733 		} else {
734 			n_copy_el = dst_idx->n_uniq;
735 		}
736 
737 		memmove(dst_idx->stat_n_diff_key_vals,
738 			src_idx->stat_n_diff_key_vals,
739 			n_copy_el * sizeof(dst_idx->stat_n_diff_key_vals[0]));
740 
741 		memmove(dst_idx->stat_n_sample_sizes,
742 			src_idx->stat_n_sample_sizes,
743 			n_copy_el * sizeof(dst_idx->stat_n_sample_sizes[0]));
744 
745 		memmove(dst_idx->stat_n_non_null_key_vals,
746 			src_idx->stat_n_non_null_key_vals,
747 			n_copy_el * sizeof(dst_idx->stat_n_non_null_key_vals[0]));
748 
749 		dst_idx->stat_index_size = src_idx->stat_index_size;
750 
751 		dst_idx->stat_n_leaf_pages = src_idx->stat_n_leaf_pages;
752 	}
753 
754 	dst->stat_initialized = TRUE;
755 }
756 
757 /*********************************************************************//**
758 Duplicate the stats of a table and its indexes.
759 This function creates a dummy dict_table_t object and copies the input
760 table's stats into it. The returned table object is not in the dictionary
761 cache and cannot be accessed by any other threads. In addition to the
762 members copied in dict_stats_table_clone_create() this function initializes
763 the following:
764 dict_table_t::stat_initialized
765 dict_table_t::stat_persistent
766 dict_table_t::stat_n_rows
767 dict_table_t::stat_clustered_index_size
768 dict_table_t::stat_sum_of_other_index_sizes
769 dict_table_t::stat_modified_counter
770 dict_index_t::stat_n_diff_key_vals[]
771 dict_index_t::stat_n_sample_sizes[]
772 dict_index_t::stat_n_non_null_key_vals[]
773 dict_index_t::stat_index_size
774 dict_index_t::stat_n_leaf_pages
775 The returned object should be freed with dict_stats_snapshot_free()
776 when no longer needed.
777 @return incomplete table object */
778 static
779 dict_table_t*
dict_stats_snapshot_create(dict_table_t * table)780 dict_stats_snapshot_create(
781 /*=======================*/
782 	dict_table_t*	table)	/*!< in: table whose stats to copy */
783 {
784 	mutex_enter(&dict_sys->mutex);
785 
786 	dict_table_stats_lock(table, RW_S_LATCH);
787 
788 	dict_stats_assert_initialized(table);
789 
790 	dict_table_t*	t;
791 
792 	t = dict_stats_table_clone_create(table);
793 
794 	dict_stats_copy(t, table);
795 
796 	t->stat_persistent = table->stat_persistent;
797 	t->stats_auto_recalc = table->stats_auto_recalc;
798 	t->stats_sample_pages = table->stats_sample_pages;
799 	t->stats_bg_flag = table->stats_bg_flag;
800 
801 	dict_table_stats_unlock(table, RW_S_LATCH);
802 
803 	mutex_exit(&dict_sys->mutex);
804 
805 	return(t);
806 }
807 
808 /*********************************************************************//**
809 Free the resources occupied by an object returned by
810 dict_stats_snapshot_create(). */
811 static
812 void
dict_stats_snapshot_free(dict_table_t * t)813 dict_stats_snapshot_free(
814 /*=====================*/
815 	dict_table_t*	t)	/*!< in: dummy table object to free */
816 {
817 	dict_stats_table_clone_free(t);
818 }
819 
820 /*********************************************************************//**
821 Calculates new estimates for index statistics. This function is
822 relatively quick and is used to calculate transient statistics that
823 are not saved on disk. This was the only way to calculate statistics
824 before the Persistent Statistics feature was introduced. */
825 static
826 void
dict_stats_update_transient_for_index(dict_index_t * index)827 dict_stats_update_transient_for_index(
828 /*==================================*/
829 	dict_index_t*	index)	/*!< in/out: index */
830 {
831 	if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO
832 	    && (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO
833 		|| !dict_index_is_clust(index))) {
834 		/* If we have set a high innodb_force_recovery
835 		level, do not calculate statistics, as a badly
836 		corrupted index can cause a crash in it.
837 		Initialize some bogus index cardinality
838 		statistics, so that the data can be queried in
839 		various means, also via secondary indexes. */
840 		dict_stats_empty_index(index);
841 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
842 	} else if (ibuf_debug && !dict_index_is_clust(index)) {
843 		dict_stats_empty_index(index);
844 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
845 	} else {
846 		mtr_t	mtr;
847 		ulint	size;
848 		mtr_start(&mtr);
849 		mtr_s_lock(dict_index_get_lock(index), &mtr);
850 
851 		size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
852 
853 		if (size != ULINT_UNDEFINED) {
854 			index->stat_index_size = size;
855 
856 			size = btr_get_size(
857 				index, BTR_N_LEAF_PAGES, &mtr);
858 		}
859 
860 		mtr_commit(&mtr);
861 
862 		switch (size) {
863 		case ULINT_UNDEFINED:
864 			dict_stats_empty_index(index);
865 			return;
866 		case 0:
867 			/* The root node of the tree is a leaf */
868 			size = 1;
869 		}
870 
871 		index->stat_n_leaf_pages = size;
872 
873 		btr_estimate_number_of_different_key_vals(index);
874 	}
875 }
876 
877 /*********************************************************************//**
878 Calculates new estimates for table and index statistics. This function
879 is relatively quick and is used to calculate transient statistics that
880 are not saved on disk.
881 This was the only way to calculate statistics before the
882 Persistent Statistics feature was introduced. */
883 UNIV_INTERN
884 void
dict_stats_update_transient(dict_table_t * table)885 dict_stats_update_transient(
886 /*========================*/
887 	dict_table_t*	table)	/*!< in/out: table */
888 {
889 	dict_index_t*	index;
890 	ulint		sum_of_index_sizes	= 0;
891 
892 	/* Find out the sizes of the indexes and how many different values
893 	for the key they approximately have */
894 
895 	index = dict_table_get_first_index(table);
896 
897 	if (dict_table_is_discarded(table)) {
898 		/* Nothing to do. */
899 		dict_stats_empty_table(table);
900 		return;
901 	} else if (index == NULL) {
902 		/* Table definition is corrupt */
903 
904 		char	buf[MAX_FULL_NAME_LEN];
905 		ut_print_timestamp(stderr);
906 		fprintf(stderr, " InnoDB: table %s has no indexes. "
907 			"Cannot calculate statistics.\n",
908 			ut_format_name(table->name, TRUE, buf, sizeof(buf)));
909 		dict_stats_empty_table(table);
910 		return;
911 	}
912 
913 	for (; index != NULL; index = dict_table_get_next_index(index)) {
914 
915 		ut_ad(!dict_index_is_univ(index));
916 
917 		if (index->type & DICT_FTS) {
918 			continue;
919 		}
920 
921 		dict_stats_empty_index(index);
922 
923 		if (dict_stats_should_ignore_index(index)) {
924 			continue;
925 		}
926 
927 		dict_stats_update_transient_for_index(index);
928 
929 		sum_of_index_sizes += index->stat_index_size;
930 	}
931 
932 	index = dict_table_get_first_index(table);
933 
934 	table->stat_n_rows = index->stat_n_diff_key_vals[
935 		dict_index_get_n_unique(index) - 1];
936 
937 	table->stat_clustered_index_size = index->stat_index_size;
938 
939 	table->stat_sum_of_other_index_sizes = sum_of_index_sizes
940 		- index->stat_index_size;
941 
942 	table->stats_last_recalc = ut_time();
943 
944 	table->stat_modified_counter = 0;
945 
946 	table->stat_initialized = TRUE;
947 }
948 
949 /* @{ Pseudo code about the relation between the following functions
950 
951 let N = N_SAMPLE_PAGES(index)
952 
953 dict_stats_analyze_index()
954   for each n_prefix
955     search for good enough level:
956       dict_stats_analyze_index_level() // only called if level has <= N pages
957         // full scan of the level in one mtr
958         collect statistics about the given level
959       if we are not satisfied with the level, search next lower level
960     we have found a good enough level here
961     dict_stats_analyze_index_for_n_prefix(that level, stats collected above)
962       // full scan of the level in one mtr
963       dive below some records and analyze the leaf page there:
964       dict_stats_analyze_index_below_cur()
965 @} */
966 
967 /*********************************************************************//**
968 Find the total number and the number of distinct keys on a given level in
969 an index. Each of the 1..n_uniq prefixes are looked up and the results are
970 saved in the array n_diff[0] .. n_diff[n_uniq - 1]. The total number of
971 records on the level is saved in total_recs.
972 Also, the index of the last record in each group of equal records is saved
973 in n_diff_boundaries[0..n_uniq - 1], records indexing starts from the leftmost
974 record on the level and continues cross pages boundaries, counting from 0. */
975 static
976 void
dict_stats_analyze_index_level(dict_index_t * index,ulint level,ib_uint64_t * n_diff,ib_uint64_t * total_recs,ib_uint64_t * total_pages,boundaries_t * n_diff_boundaries,mtr_t * mtr)977 dict_stats_analyze_index_level(
978 /*===========================*/
979 	dict_index_t*	index,		/*!< in: index */
980 	ulint		level,		/*!< in: level */
981 	ib_uint64_t*	n_diff,		/*!< out: array for number of
982 					distinct keys for all prefixes */
983 	ib_uint64_t*	total_recs,	/*!< out: total number of records */
984 	ib_uint64_t*	total_pages,	/*!< out: total number of pages */
985 	boundaries_t*	n_diff_boundaries,/*!< out: boundaries of the groups
986 					of distinct keys */
987 	mtr_t*		mtr)		/*!< in/out: mini-transaction */
988 {
989 	ulint		n_uniq;
990 	mem_heap_t*	heap;
991 	btr_pcur_t	pcur;
992 	const page_t*	page;
993 	const rec_t*	rec;
994 	const rec_t*	prev_rec;
995 	bool		prev_rec_is_copied;
996 	byte*		prev_rec_buf = NULL;
997 	ulint		prev_rec_buf_size = 0;
998 	ulint*		rec_offsets;
999 	ulint*		prev_rec_offsets;
1000 	ulint		i;
1001 
1002 	DEBUG_PRINTF("    %s(table=%s, index=%s, level=%lu)\n", __func__,
1003 		     index->table->name, index->name, level);
1004 
1005 	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
1006 				MTR_MEMO_S_LOCK));
1007 
1008 	n_uniq = dict_index_get_n_unique(index);
1009 
1010 	/* elements in the n_diff array are 0..n_uniq-1 (inclusive) */
1011 	memset(n_diff, 0x0, n_uniq * sizeof(n_diff[0]));
1012 
1013 	/* Allocate space for the offsets header (the allocation size at
1014 	offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_fields + 1,
1015 	so that this will never be less than the size calculated in
1016 	rec_get_offsets_func(). */
1017 	i = (REC_OFFS_HEADER_SIZE + 1 + 1) + index->n_fields;
1018 
1019 	heap = mem_heap_create((2 * sizeof *rec_offsets) * i);
1020 	rec_offsets = static_cast<ulint*>(
1021 		mem_heap_alloc(heap, i * sizeof *rec_offsets));
1022 	prev_rec_offsets = static_cast<ulint*>(
1023 		mem_heap_alloc(heap, i * sizeof *prev_rec_offsets));
1024 	rec_offs_set_n_alloc(rec_offsets, i);
1025 	rec_offs_set_n_alloc(prev_rec_offsets, i);
1026 
1027 	/* reset the dynamic arrays n_diff_boundaries[0..n_uniq-1] */
1028 	if (n_diff_boundaries != NULL) {
1029 		for (i = 0; i < n_uniq; i++) {
1030 			n_diff_boundaries[i].erase(
1031 				n_diff_boundaries[i].begin(),
1032 				n_diff_boundaries[i].end());
1033 		}
1034 	}
1035 
1036 	/* Position pcur on the leftmost record on the leftmost page
1037 	on the desired level. */
1038 
1039 	btr_pcur_open_at_index_side(
1040 		true, index, BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED,
1041 		&pcur, true, level, mtr);
1042 	btr_pcur_move_to_next_on_page(&pcur);
1043 
1044 	page = btr_pcur_get_page(&pcur);
1045 
1046 	/* The page must not be empty, except when
1047 	it is the root page (and the whole index is empty). */
1048 	ut_ad(btr_pcur_is_on_user_rec(&pcur) || page_is_leaf(page));
1049 	ut_ad(btr_pcur_get_rec(&pcur)
1050 	      == page_rec_get_next_const(page_get_infimum_rec(page)));
1051 
1052 	/* check that we are indeed on the desired level */
1053 	ut_a(btr_page_get_level(page, mtr) == level);
1054 
1055 	/* there should not be any pages on the left */
1056 	ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
1057 
1058 	/* check whether the first record on the leftmost page is marked
1059 	as such, if we are on a non-leaf level */
1060 	ut_a((level == 0)
1061 	     == !(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
1062 			  btr_pcur_get_rec(&pcur), page_is_comp(page))));
1063 
1064 	prev_rec = NULL;
1065 	prev_rec_is_copied = false;
1066 
1067 	/* no records by default */
1068 	*total_recs = 0;
1069 
1070 	*total_pages = 0;
1071 
1072 	/* iterate over all user records on this level
1073 	and compare each two adjacent ones, even the last on page
1074 	X and the fist on page X+1 */
1075 	for (;
1076 	     btr_pcur_is_on_user_rec(&pcur);
1077 	     btr_pcur_move_to_next_user_rec(&pcur, mtr)) {
1078 
1079 		ulint	matched_fields = 0;
1080 		ulint	matched_bytes = 0;
1081 		bool	rec_is_last_on_page;
1082 
1083 		rec = btr_pcur_get_rec(&pcur);
1084 
1085 		/* If rec and prev_rec are on different pages, then prev_rec
1086 		must have been copied, because we hold latch only on the page
1087 		where rec resides. */
1088 		if (prev_rec != NULL
1089 		    && page_align(rec) != page_align(prev_rec)) {
1090 
1091 			ut_a(prev_rec_is_copied);
1092 		}
1093 
1094 		rec_is_last_on_page =
1095 			page_rec_is_supremum(page_rec_get_next_const(rec));
1096 
1097 		/* increment the pages counter at the end of each page */
1098 		if (rec_is_last_on_page) {
1099 
1100 			(*total_pages)++;
1101 		}
1102 
1103 		/* Skip delete-marked records on the leaf level. If we
1104 		do not skip them, then ANALYZE quickly after DELETE
1105 		could count them or not (purge may have already wiped
1106 		them away) which brings non-determinism. We skip only
1107 		leaf-level delete marks because delete marks on
1108 		non-leaf level do not make sense. */
1109 
1110 		if (level == 0 && (srv_stats_include_delete_marked ? 0:
1111 		    rec_get_deleted_flag(
1112 			    rec,
1113 			    page_is_comp(btr_pcur_get_page(&pcur))))) {
1114 
1115 			if (rec_is_last_on_page
1116 			    && !prev_rec_is_copied
1117 			    && prev_rec != NULL) {
1118 				/* copy prev_rec */
1119 
1120 				prev_rec_offsets = rec_get_offsets(
1121 					prev_rec, index, prev_rec_offsets,
1122 					n_uniq, &heap);
1123 
1124 				prev_rec = rec_copy_prefix_to_buf(
1125 					prev_rec, index,
1126 					rec_offs_n_fields(prev_rec_offsets),
1127 					&prev_rec_buf, &prev_rec_buf_size);
1128 
1129 				prev_rec_is_copied = true;
1130 			}
1131 
1132 			continue;
1133 		}
1134 
1135 		rec_offsets = rec_get_offsets(
1136 			rec, index, rec_offsets, n_uniq, &heap);
1137 
1138 		(*total_recs)++;
1139 
1140 		if (prev_rec != NULL) {
1141 			prev_rec_offsets = rec_get_offsets(
1142 				prev_rec, index, prev_rec_offsets,
1143 				n_uniq, &heap);
1144 
1145 			cmp_rec_rec_with_match(rec,
1146 					       prev_rec,
1147 					       rec_offsets,
1148 					       prev_rec_offsets,
1149 					       index,
1150 					       FALSE,
1151 					       &matched_fields,
1152 					       &matched_bytes);
1153 
1154 			for (i = matched_fields; i < n_uniq; i++) {
1155 
1156 				if (n_diff_boundaries != NULL) {
1157 					/* push the index of the previous
1158 					record, that is - the last one from
1159 					a group of equal keys */
1160 
1161 					ib_uint64_t	idx;
1162 
1163 					/* the index of the current record
1164 					is total_recs - 1, the index of the
1165 					previous record is total_recs - 2;
1166 					we know that idx is not going to
1167 					become negative here because if we
1168 					are in this branch then there is a
1169 					previous record and thus
1170 					total_recs >= 2 */
1171 					idx = *total_recs - 2;
1172 
1173 					n_diff_boundaries[i].push_back(idx);
1174 				}
1175 
1176 				/* increment the number of different keys
1177 				for n_prefix=i+1 (e.g. if i=0 then we increment
1178 				for n_prefix=1 which is stored in n_diff[0]) */
1179 				n_diff[i]++;
1180 			}
1181 		} else {
1182 			/* this is the first non-delete marked record */
1183 			for (i = 0; i < n_uniq; i++) {
1184 				n_diff[i] = 1;
1185 			}
1186 		}
1187 
1188 		if (rec_is_last_on_page) {
1189 			/* end of a page has been reached */
1190 
1191 			/* we need to copy the record instead of assigning
1192 			like prev_rec = rec; because when we traverse the
1193 			records on this level at some point we will jump from
1194 			one page to the next and then rec and prev_rec will
1195 			be on different pages and
1196 			btr_pcur_move_to_next_user_rec() will release the
1197 			latch on the page that prev_rec is on */
1198 			prev_rec = rec_copy_prefix_to_buf(
1199 				rec, index, rec_offs_n_fields(rec_offsets),
1200 				&prev_rec_buf, &prev_rec_buf_size);
1201 			prev_rec_is_copied = true;
1202 
1203 		} else {
1204 			/* still on the same page, the next call to
1205 			btr_pcur_move_to_next_user_rec() will not jump
1206 			on the next page, we can simply assign pointers
1207 			instead of copying the records like above */
1208 
1209 			prev_rec = rec;
1210 			prev_rec_is_copied = false;
1211 		}
1212 	}
1213 
1214 	/* if *total_pages is left untouched then the above loop was not
1215 	entered at all and there is one page in the whole tree which is
1216 	empty or the loop was entered but this is level 0, contains one page
1217 	and all records are delete-marked */
1218 	if (*total_pages == 0) {
1219 
1220 		ut_ad(level == 0);
1221 		ut_ad(*total_recs == 0);
1222 
1223 		*total_pages = 1;
1224 	}
1225 
1226 	/* if there are records on this level and boundaries
1227 	should be saved */
1228 	if (*total_recs > 0 && n_diff_boundaries != NULL) {
1229 
1230 		/* remember the index of the last record on the level as the
1231 		last one from the last group of equal keys; this holds for
1232 		all possible prefixes */
1233 		for (i = 0; i < n_uniq; i++) {
1234 			ib_uint64_t	idx;
1235 
1236 			idx = *total_recs - 1;
1237 
1238 			n_diff_boundaries[i].push_back(idx);
1239 		}
1240 	}
1241 
1242 	/* now in n_diff_boundaries[i] there are exactly n_diff[i] integers,
1243 	for i=0..n_uniq-1 */
1244 
1245 #ifdef UNIV_STATS_DEBUG
1246 	for (i = 0; i < n_uniq; i++) {
1247 
1248 		DEBUG_PRINTF("    %s(): total recs: " UINT64PF
1249 			     ", total pages: " UINT64PF
1250 			     ", n_diff[%lu]: " UINT64PF "\n",
1251 			     __func__, *total_recs,
1252 			     *total_pages,
1253 			     i, n_diff[i]);
1254 
1255 #if 0
1256 		if (n_diff_boundaries != NULL) {
1257 			ib_uint64_t	j;
1258 
1259 			DEBUG_PRINTF("    %s(): boundaries[%lu]: ",
1260 				     __func__, i);
1261 
1262 			for (j = 0; j < n_diff[i]; j++) {
1263 				ib_uint64_t	idx;
1264 
1265 				idx = n_diff_boundaries[i][j];
1266 
1267 				DEBUG_PRINTF(UINT64PF "=" UINT64PF ", ",
1268 					     j, idx);
1269 			}
1270 			DEBUG_PRINTF("\n");
1271 		}
1272 #endif
1273 	}
1274 #endif /* UNIV_STATS_DEBUG */
1275 
1276 	/* Release the latch on the last page, because that is not done by
1277 	btr_pcur_close(). This function works also for non-leaf pages. */
1278 	btr_leaf_page_release(btr_pcur_get_block(&pcur), BTR_SEARCH_LEAF, mtr);
1279 
1280 	btr_pcur_close(&pcur);
1281 
1282 	if (prev_rec_buf != NULL) {
1283 
1284 		mem_free(prev_rec_buf);
1285 	}
1286 
1287 	mem_heap_free(heap);
1288 }
1289 
1290 /* aux enum for controlling the behavior of dict_stats_scan_page() @{ */
1291 enum page_scan_method_t {
1292 	COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED,/* scan all records on
1293 				the given page and count the number of
1294 				distinct ones, also ignore delete marked
1295 				records */
1296 	QUIT_ON_FIRST_NON_BORING,/* quit when the first record that differs
1297 				from its right neighbor is found */
1298 	COUNT_ALL_NON_BORING_INCLUDE_DEL_MARKED/* scan all records on
1299 				the given page and count the number of
1300 				distinct ones, include delete marked
1301 				records */
1302 };
1303 /* @} */
1304 
1305 /** Scan a page, reading records from left to right and counting the number
1306 of distinct records (looking only at the first n_prefix
1307 columns) and the number of external pages pointed by records from this page.
1308 If scan_method is QUIT_ON_FIRST_NON_BORING then the function
1309 will return as soon as it finds a record that does not match its neighbor
1310 to the right, which means that in the case of QUIT_ON_FIRST_NON_BORING the
1311 returned n_diff can either be 0 (empty page), 1 (the whole page has all keys
1312 equal) or 2 (the function found a non-boring record and returned).
1313 @param[out]	out_rec			record, or NULL
1314 @param[out]	offsets1		rec_get_offsets() working space (must
1315 be big enough)
1316 @param[out]	offsets2		rec_get_offsets() working space (must
1317 be big enough)
1318 @param[in]	index			index of the page
1319 @param[in]	page			the page to scan
1320 @param[in]	n_prefix		look at the first n_prefix columns
1321 @param[in]	scan_method		scan to the end of the page or not
1322 @param[out]	n_diff			number of distinct records encountered
1323 @param[out]	n_external_pages	if this is non-NULL then it will be set
1324 to the number of externally stored pages which were encountered
1325 @return offsets1 or offsets2 (the offsets of *out_rec),
1326 or NULL if the page is empty and does not contain user records. */
1327 UNIV_INLINE
1328 ulint*
dict_stats_scan_page(const rec_t ** out_rec,ulint * offsets1,ulint * offsets2,dict_index_t * index,const page_t * page,ulint n_prefix,page_scan_method_t scan_method,ib_uint64_t * n_diff,ib_uint64_t * n_external_pages)1329 dict_stats_scan_page(
1330 	const rec_t**		out_rec,
1331 	ulint*			offsets1,
1332 	ulint*			offsets2,
1333 	dict_index_t*		index,
1334 	const page_t*		page,
1335 	ulint			n_prefix,
1336 	page_scan_method_t	scan_method,
1337 	ib_uint64_t*		n_diff,
1338 	ib_uint64_t*		n_external_pages)
1339 {
1340 	ulint*		offsets_rec		= offsets1;
1341 	ulint*		offsets_next_rec	= offsets2;
1342 	const rec_t*	rec;
1343 	const rec_t*	next_rec;
1344 	/* A dummy heap, to be passed to rec_get_offsets().
1345 	Because offsets1,offsets2 should be big enough,
1346 	this memory heap should never be used. */
1347 	mem_heap_t*	heap			= NULL;
1348 	const rec_t*	(*get_next)(const rec_t*);
1349 
1350 	if (scan_method == COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED) {
1351 		get_next = page_rec_get_next_non_del_marked;
1352 	} else {
1353 		get_next = page_rec_get_next_const;
1354 	}
1355 
1356 	const bool	should_count_external_pages = n_external_pages != NULL;
1357 
1358 	if (should_count_external_pages) {
1359 		*n_external_pages = 0;
1360 	}
1361 
1362 	rec = get_next(page_get_infimum_rec(page));
1363 
1364 	if (page_rec_is_supremum(rec)) {
1365 		/* the page is empty or contains only delete-marked records */
1366 		*n_diff = 0;
1367 		*out_rec = NULL;
1368 		return(NULL);
1369 	}
1370 
1371 	offsets_rec = rec_get_offsets(rec, index, offsets_rec,
1372 				      ULINT_UNDEFINED, &heap);
1373 
1374 	if (should_count_external_pages) {
1375 		*n_external_pages += btr_rec_get_externally_stored_len(
1376 			rec, offsets_rec);
1377 	}
1378 
1379 	next_rec = get_next(rec);
1380 
1381 	*n_diff = 1;
1382 
1383 	while (!page_rec_is_supremum(next_rec)) {
1384 
1385 		ulint	matched_fields = 0;
1386 		ulint	matched_bytes = 0;
1387 
1388 		offsets_next_rec = rec_get_offsets(next_rec, index,
1389 						   offsets_next_rec,
1390 						   ULINT_UNDEFINED,
1391 						   &heap);
1392 
1393 		/* check whether rec != next_rec when looking at
1394 		the first n_prefix fields */
1395 		cmp_rec_rec_with_match(rec, next_rec,
1396 				       offsets_rec, offsets_next_rec,
1397 				       index, FALSE, &matched_fields,
1398 				       &matched_bytes);
1399 
1400 		if (matched_fields < n_prefix) {
1401 			/* rec != next_rec, => rec is non-boring */
1402 
1403 			(*n_diff)++;
1404 
1405 			if (scan_method == QUIT_ON_FIRST_NON_BORING) {
1406 				goto func_exit;
1407 			}
1408 		}
1409 
1410 		rec = next_rec;
1411 		{
1412 			/* Assign offsets_rec = offsets_next_rec
1413 			so that offsets_rec matches with rec which
1414 			was just assigned rec = next_rec above.
1415 			Also need to point offsets_next_rec to the
1416 			place where offsets_rec was pointing before
1417 			because we have just 2 placeholders where
1418 			data is actually stored:
1419 			offsets_onstack1 and offsets_onstack2 and we
1420 			are using them in circular fashion
1421 			(offsets[_next]_rec are just pointers to
1422 			those placeholders). */
1423 			ulint*	offsets_tmp;
1424 			offsets_tmp = offsets_rec;
1425 			offsets_rec = offsets_next_rec;
1426 			offsets_next_rec = offsets_tmp;
1427 		}
1428 
1429 		if (should_count_external_pages) {
1430 			*n_external_pages += btr_rec_get_externally_stored_len(
1431 				rec, offsets_rec);
1432 		}
1433 
1434 		next_rec = get_next(next_rec);
1435 	}
1436 
1437 func_exit:
1438 	/* offsets1,offsets2 should have been big enough */
1439 	ut_a(heap == NULL);
1440 	*out_rec = rec;
1441 	return(offsets_rec);
1442 }
1443 
1444 /** Dive below the current position of a cursor and calculate the number of
1445 distinct records on the leaf page, when looking at the fist n_prefix
1446 columns. Also calculate the number of external pages pointed by records
1447 on the leaf page.
1448 @param[in]	cur			cursor
1449 @param[in]	n_prefix		look at the first n_prefix columns
1450 when comparing records
1451 @param[out]	n_diff			number of distinct records
1452 @param[out]	n_external_pages	number of external pages
1453 @return number of distinct records on the leaf page */
1454 static
1455 void
dict_stats_analyze_index_below_cur(const btr_cur_t * cur,ulint n_prefix,ib_uint64_t * n_diff,ib_uint64_t * n_external_pages)1456 dict_stats_analyze_index_below_cur(
1457 	const btr_cur_t*	cur,
1458 	ulint			n_prefix,
1459 	ib_uint64_t*		n_diff,
1460 	ib_uint64_t*		n_external_pages)
1461 {
1462 	dict_index_t*	index;
1463 	ulint		space;
1464 	ulint		zip_size;
1465 	buf_block_t*	block;
1466 	ulint		page_no;
1467 	const page_t*	page;
1468 	mem_heap_t*	heap;
1469 	const rec_t*	rec;
1470 	ulint*		offsets1;
1471 	ulint*		offsets2;
1472 	ulint*		offsets_rec;
1473 	ulint		size;
1474 	mtr_t		mtr;
1475 
1476 	index = btr_cur_get_index(cur);
1477 
1478 	/* Allocate offsets for the record and the node pointer, for
1479 	node pointer records. In a secondary index, the node pointer
1480 	record will consist of all index fields followed by a child
1481 	page number.
1482 	Allocate space for the offsets header (the allocation size at
1483 	offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_fields + 1,
1484 	so that this will never be less than the size calculated in
1485 	rec_get_offsets_func(). */
1486 	size = (1 + REC_OFFS_HEADER_SIZE) + 1 + dict_index_get_n_fields(index);
1487 
1488 	heap = mem_heap_create(size * (sizeof *offsets1 + sizeof *offsets2));
1489 
1490 	offsets1 = static_cast<ulint*>(mem_heap_alloc(
1491 			heap, size * sizeof *offsets1));
1492 
1493 	offsets2 = static_cast<ulint*>(mem_heap_alloc(
1494 			heap, size * sizeof *offsets2));
1495 
1496 	rec_offs_set_n_alloc(offsets1, size);
1497 	rec_offs_set_n_alloc(offsets2, size);
1498 
1499 	space = dict_index_get_space(index);
1500 	zip_size = dict_table_zip_size(index->table);
1501 
1502 	rec = btr_cur_get_rec(cur);
1503 
1504 	offsets_rec = rec_get_offsets(rec, index, offsets1,
1505 				      ULINT_UNDEFINED, &heap);
1506 
1507 	page_no = btr_node_ptr_get_child_page_no(rec, offsets_rec);
1508 
1509 	/* assume no external pages by default - in case we quit from this
1510 	function without analyzing any leaf pages */
1511 	*n_external_pages = 0;
1512 
1513 	mtr_start(&mtr);
1514 
1515 	/* descend to the leaf level on the B-tree */
1516 	for (;;) {
1517 
1518 		block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH,
1519 					 NULL /* no guessed block */,
1520 					 BUF_GET, __FILE__, __LINE__, &mtr);
1521 
1522 		page = buf_block_get_frame(block);
1523 
1524 		if (btr_page_get_level(page, mtr) == 0) {
1525 			/* leaf level */
1526 			break;
1527 		}
1528 		/* else */
1529 
1530 		/* search for the first non-boring record on the page */
1531 		offsets_rec = dict_stats_scan_page(
1532 			&rec, offsets1, offsets2, index, page, n_prefix,
1533 			QUIT_ON_FIRST_NON_BORING, n_diff, NULL);
1534 
1535 		/* pages on level > 0 are not allowed to be empty */
1536 		ut_a(offsets_rec != NULL);
1537 		/* if page is not empty (offsets_rec != NULL) then n_diff must
1538 		be > 0, otherwise there is a bug in dict_stats_scan_page() */
1539 		ut_a(*n_diff > 0);
1540 
1541 		if (*n_diff == 1) {
1542 			mtr_commit(&mtr);
1543 
1544 			/* page has all keys equal and the end of the page
1545 			was reached by dict_stats_scan_page(), no need to
1546 			descend to the leaf level */
1547 			mem_heap_free(heap);
1548 			/* can't get an estimate for n_external_pages here
1549 			because we do not dive to the leaf level, assume no
1550 			external pages (*n_external_pages was assigned to 0
1551 			above). */
1552 			return;
1553 		}
1554 		/* else */
1555 
1556 		/* when we instruct dict_stats_scan_page() to quit on the
1557 		first non-boring record it finds, then the returned n_diff
1558 		can either be 0 (empty page), 1 (page has all keys equal) or
1559 		2 (non-boring record was found) */
1560 		ut_a(*n_diff == 2);
1561 
1562 		/* we have a non-boring record in rec, descend below it */
1563 
1564 		page_no = btr_node_ptr_get_child_page_no(rec, offsets_rec);
1565 	}
1566 
1567 	/* make sure we got a leaf page as a result from the above loop */
1568 	ut_ad(btr_page_get_level(page, &mtr) == 0);
1569 
1570 	/* scan the leaf page and find the number of distinct keys,
1571 	when looking only at the first n_prefix columns; also estimate
1572 	the number of externally stored pages pointed by records on this
1573 	page */
1574 
1575 	offsets_rec = dict_stats_scan_page(
1576 		&rec, offsets1, offsets2, index, page, n_prefix,
1577 		srv_stats_include_delete_marked ?
1578 		COUNT_ALL_NON_BORING_INCLUDE_DEL_MARKED:
1579 		COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED, n_diff,
1580 		n_external_pages);
1581 
1582 #if 0
1583 	DEBUG_PRINTF("      %s(): n_diff below page_no=%lu: " UINT64PF "\n",
1584 		     __func__, page_no, n_diff);
1585 #endif
1586 
1587 	mtr_commit(&mtr);
1588 	mem_heap_free(heap);
1589 }
1590 
1591 /** Input data that is used to calculate dict_index_t::stat_n_diff_key_vals[]
1592 for each n-columns prefix (n from 1 to n_uniq). */
1593 struct n_diff_data_t {
1594 	/** Index of the level on which the descent through the btree
1595 	stopped. level 0 is the leaf level. This is >= 1 because we
1596 	avoid scanning the leaf level because it may contain too many
1597 	pages and doing so is useless when combined with the random dives -
1598 	if we are to scan the leaf level, this means a full scan and we can
1599 	simply do that instead of fiddling with picking random records higher
1600 	in the tree and to dive below them. At the start of the analyzing
1601 	we may decide to do full scan of the leaf level, but then this
1602 	structure is not used in that code path. */
1603 	ulint		level;
1604 
1605 	/** Number of records on the level where the descend through the btree
1606 	stopped. When we scan the btree from the root, we stop at some mid
1607 	level, choose some records from it and dive below them towards a leaf
1608 	page to analyze. */
1609 	ib_uint64_t	n_recs_on_level;
1610 
1611 	/** Number of different key values that were found on the mid level. */
1612 	ib_uint64_t	n_diff_on_level;
1613 
1614 	/** Number of leaf pages that are analyzed. This is also the same as
1615 	the number of records that we pick from the mid level and dive below
1616 	them. */
1617 	ib_uint64_t	n_leaf_pages_to_analyze;
1618 
1619 	/** Cumulative sum of the number of different key values that were
1620 	found on all analyzed pages. */
1621 	ib_uint64_t	n_diff_all_analyzed_pages;
1622 
1623 	/** Cumulative sum of the number of external pages (stored outside of
1624 	the btree but in the same file segment). */
1625 	ib_uint64_t	n_external_pages_sum;
1626 };
1627 
1628 /** Estimate the number of different key values in an index when looking at
1629 the first n_prefix columns. For a given level in an index select
1630 n_diff_data->n_leaf_pages_to_analyze records from that level and dive below
1631 them to the corresponding leaf pages, then scan those leaf pages and save the
1632 sampling results in n_diff_data->n_diff_all_analyzed_pages.
1633 @param[in]	index			index
1634 @param[in]	n_prefix		look at first 'n_prefix' columns when
1635 comparing records
1636 @param[in]	boundaries		a vector that contains
1637 n_diff_data->n_diff_on_level integers each of which represents the index (on
1638 level 'level', counting from left/smallest to right/biggest from 0) of the
1639 last record from each group of distinct keys
1640 @param[in,out]	n_diff_data		n_diff_all_analyzed_pages and
1641 n_external_pages_sum in this structure will be set by this function. The
1642 members level, n_diff_on_level and n_leaf_pages_to_analyze must be set by the
1643 caller in advance - they are used by some calculations inside this function
1644 @param[in,out]	mtr			mini-transaction */
1645 static
1646 void
dict_stats_analyze_index_for_n_prefix(dict_index_t * index,ulint n_prefix,const boundaries_t * boundaries,n_diff_data_t * n_diff_data,mtr_t * mtr)1647 dict_stats_analyze_index_for_n_prefix(
1648 	dict_index_t*		index,
1649 	ulint			n_prefix,
1650 	const boundaries_t*	boundaries,
1651 	n_diff_data_t*		n_diff_data,
1652 	mtr_t*			mtr)
1653 {
1654 	btr_pcur_t	pcur;
1655 	const page_t*	page;
1656 	ib_uint64_t	rec_idx;
1657 	ib_uint64_t	i;
1658 
1659 #if 0
1660 	DEBUG_PRINTF("    %s(table=%s, index=%s, level=%lu, n_prefix=%lu, "
1661 		     "n_diff_on_level=" UINT64PF ")\n",
1662 		     __func__, index->table->name, index->name, level,
1663 		     n_prefix, n_diff_data->n_diff_on_level);
1664 #endif
1665 
1666 	ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
1667 				MTR_MEMO_S_LOCK));
1668 
1669 	/* Position pcur on the leftmost record on the leftmost page
1670 	on the desired level. */
1671 
1672 	btr_pcur_open_at_index_side(
1673 		true, index, BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED,
1674 		&pcur, true, n_diff_data->level, mtr);
1675 	btr_pcur_move_to_next_on_page(&pcur);
1676 
1677 	page = btr_pcur_get_page(&pcur);
1678 
1679 	const rec_t*	first_rec = btr_pcur_get_rec(&pcur);
1680 
1681 	/* We shouldn't be scanning the leaf level. The caller of this function
1682 	should have stopped the descend on level 1 or higher. */
1683 	ut_ad(n_diff_data->level > 0);
1684 	ut_ad(!page_is_leaf(page));
1685 
1686 	/* The page must not be empty, except when
1687 	it is the root page (and the whole index is empty). */
1688 	ut_ad(btr_pcur_is_on_user_rec(&pcur));
1689 	ut_ad(first_rec == page_rec_get_next_const(page_get_infimum_rec(page)));
1690 
1691 	/* check that we are indeed on the desired level */
1692 	ut_a(btr_page_get_level(page, mtr) == n_diff_data->level);
1693 
1694 	/* there should not be any pages on the left */
1695 	ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
1696 
1697 	/* check whether the first record on the leftmost page is marked
1698 	as such; we are on a non-leaf level */
1699 	ut_a(rec_get_info_bits(first_rec, page_is_comp(page))
1700 	     & REC_INFO_MIN_REC_FLAG);
1701 
1702 	const ib_uint64_t	last_idx_on_level = boundaries->at(
1703 		static_cast<unsigned>(n_diff_data->n_diff_on_level - 1));
1704 
1705 	rec_idx = 0;
1706 
1707 	n_diff_data->n_diff_all_analyzed_pages = 0;
1708 	n_diff_data->n_external_pages_sum = 0;
1709 
1710 	for (i = 0; i < n_diff_data->n_leaf_pages_to_analyze; i++) {
1711 		/* there are n_diff_on_level elements
1712 		in 'boundaries' and we divide those elements
1713 		into n_leaf_pages_to_analyze segments, for example:
1714 
1715 		let n_diff_on_level=100, n_leaf_pages_to_analyze=4, then:
1716 		segment i=0:  [0, 24]
1717 		segment i=1: [25, 49]
1718 		segment i=2: [50, 74]
1719 		segment i=3: [75, 99] or
1720 
1721 		let n_diff_on_level=1, n_leaf_pages_to_analyze=1, then:
1722 		segment i=0: [0, 0] or
1723 
1724 		let n_diff_on_level=2, n_leaf_pages_to_analyze=2, then:
1725 		segment i=0: [0, 0]
1726 		segment i=1: [1, 1] or
1727 
1728 		let n_diff_on_level=13, n_leaf_pages_to_analyze=7, then:
1729 		segment i=0:  [0,  0]
1730 		segment i=1:  [1,  2]
1731 		segment i=2:  [3,  4]
1732 		segment i=3:  [5,  6]
1733 		segment i=4:  [7,  8]
1734 		segment i=5:  [9, 10]
1735 		segment i=6: [11, 12]
1736 
1737 		then we select a random record from each segment and dive
1738 		below it */
1739 		const ib_uint64_t	n_diff = n_diff_data->n_diff_on_level;
1740 		const ib_uint64_t	n_pick
1741 			= n_diff_data->n_leaf_pages_to_analyze;
1742 
1743 		const ib_uint64_t	left = n_diff * i / n_pick;
1744 		const ib_uint64_t	right = n_diff * (i + 1) / n_pick - 1;
1745 
1746 		ut_a(left <= right);
1747 		ut_a(right <= last_idx_on_level);
1748 
1749 		/* we do not pass (left, right) because we do not want to ask
1750 		ut_rnd_interval() to work with too big numbers since
1751 		ib_uint64_t could be bigger than ulint */
1752 		const ulint	rnd = ut_rnd_interval(
1753 			0, static_cast<ulint>(right - left));
1754 
1755 		const ib_uint64_t	dive_below_idx
1756 			= boundaries->at(static_cast<unsigned>(left + rnd));
1757 
1758 #if 0
1759 		DEBUG_PRINTF("    %s(): dive below record with index="
1760 			     UINT64PF "\n", __func__, dive_below_idx);
1761 #endif
1762 
1763 		/* seek to the record with index dive_below_idx */
1764 		while (rec_idx < dive_below_idx
1765 		       && btr_pcur_is_on_user_rec(&pcur)) {
1766 
1767 			btr_pcur_move_to_next_user_rec(&pcur, mtr);
1768 			rec_idx++;
1769 		}
1770 
1771 		/* if the level has finished before the record we are
1772 		searching for, this means that the B-tree has changed in
1773 		the meantime, quit our sampling and use whatever stats
1774 		we have collected so far */
1775 		if (rec_idx < dive_below_idx) {
1776 
1777 			ut_ad(!btr_pcur_is_on_user_rec(&pcur));
1778 			break;
1779 		}
1780 
1781 		/* it could be that the tree has changed in such a way that
1782 		the record under dive_below_idx is the supremum record, in
1783 		this case rec_idx == dive_below_idx and pcur is positioned
1784 		on the supremum, we do not want to dive below it */
1785 		if (!btr_pcur_is_on_user_rec(&pcur)) {
1786 			break;
1787 		}
1788 
1789 		ut_a(rec_idx == dive_below_idx);
1790 
1791 		ib_uint64_t	n_diff_on_leaf_page;
1792 		ib_uint64_t	n_external_pages;
1793 
1794 		dict_stats_analyze_index_below_cur(btr_pcur_get_btr_cur(&pcur),
1795 						   n_prefix,
1796 						   &n_diff_on_leaf_page,
1797 						   &n_external_pages);
1798 
1799 		/* We adjust n_diff_on_leaf_page here to avoid counting
1800 		one record twice - once as the last on some page and once
1801 		as the first on another page. Consider the following example:
1802 		Leaf level:
1803 		page: (2,2,2,2,3,3)
1804 		... many pages like (3,3,3,3,3,3) ...
1805 		page: (3,3,3,3,5,5)
1806 		... many pages like (5,5,5,5,5,5) ...
1807 		page: (5,5,5,5,8,8)
1808 		page: (8,8,8,8,9,9)
1809 		our algo would (correctly) get an estimate that there are
1810 		2 distinct records per page (average). Having 4 pages below
1811 		non-boring records, it would (wrongly) estimate the number
1812 		of distinct records to 8. */
1813 		if (n_diff_on_leaf_page > 0) {
1814 			n_diff_on_leaf_page--;
1815 		}
1816 
1817 		n_diff_data->n_diff_all_analyzed_pages += n_diff_on_leaf_page;
1818 
1819 		n_diff_data->n_external_pages_sum += n_external_pages;
1820 	}
1821 
1822 	btr_pcur_close(&pcur);
1823 }
1824 
1825 /** Set dict_index_t::stat_n_diff_key_vals[] and stat_n_sample_sizes[].
1826 @param[in]	n_diff_data	input data to use to derive the results
1827 @param[in,out]	index		index whose stat_n_diff_key_vals[] to set */
1828 UNIV_INLINE
1829 void
dict_stats_index_set_n_diff(const n_diff_data_t * n_diff_data,dict_index_t * index)1830 dict_stats_index_set_n_diff(
1831 	const n_diff_data_t*	n_diff_data,
1832 	dict_index_t*		index)
1833 {
1834 	for (ulint n_prefix = dict_index_get_n_unique(index);
1835 	     n_prefix >= 1;
1836 	     n_prefix--) {
1837 		/* n_diff_all_analyzed_pages can be 0 here if
1838 		all the leaf pages sampled contained only
1839 		delete-marked records. In this case we should assign
1840 		0 to index->stat_n_diff_key_vals[n_prefix - 1], which
1841 		the formula below does. */
1842 
1843 		const n_diff_data_t*	data = &n_diff_data[n_prefix - 1];
1844 
1845 		ut_ad(data->n_leaf_pages_to_analyze > 0);
1846 		ut_ad(data->n_recs_on_level > 0);
1847 
1848 		ulint	n_ordinary_leaf_pages;
1849 
1850 		if (data->level == 1) {
1851 			/* If we know the number of records on level 1, then
1852 			this number is the same as the number of pages on
1853 			level 0 (leaf). */
1854 			n_ordinary_leaf_pages = data->n_recs_on_level;
1855 		} else {
1856 			/* If we analyzed D ordinary leaf pages and found E
1857 			external pages in total linked from those D ordinary
1858 			leaf pages, then this means that the ratio
1859 			ordinary/external is D/E. Then the ratio ordinary/total
1860 			is D / (D + E). Knowing that the total number of pages
1861 			is T (including ordinary and external) then we estimate
1862 			that the total number of ordinary leaf pages is
1863 			T * D / (D + E). */
1864 			n_ordinary_leaf_pages
1865 				= index->stat_n_leaf_pages
1866 				* data->n_leaf_pages_to_analyze
1867 				/ (data->n_leaf_pages_to_analyze
1868 				   + data->n_external_pages_sum);
1869 		}
1870 
1871 		/* See REF01 for an explanation of the algorithm */
1872 		index->stat_n_diff_key_vals[n_prefix - 1]
1873 			= n_ordinary_leaf_pages
1874 
1875 			* data->n_diff_on_level
1876 			/ data->n_recs_on_level
1877 
1878 			* data->n_diff_all_analyzed_pages
1879 			/ data->n_leaf_pages_to_analyze;
1880 
1881 		index->stat_n_sample_sizes[n_prefix - 1]
1882 			= data->n_leaf_pages_to_analyze;
1883 
1884 		DEBUG_PRINTF("    %s(): n_diff=" UINT64PF " for n_prefix=%lu"
1885 			     " (%lu"
1886 			     " * " UINT64PF " / " UINT64PF
1887 			     " * " UINT64PF " / " UINT64PF ")\n",
1888 			     __func__,
1889 			     index->stat_n_diff_key_vals[n_prefix - 1],
1890 			     n_prefix,
1891 			     index->stat_n_leaf_pages,
1892 			     data->n_diff_on_level,
1893 			     data->n_recs_on_level,
1894 			     data->n_diff_all_analyzed_pages,
1895 			     data->n_leaf_pages_to_analyze);
1896 	}
1897 }
1898 
1899 /*********************************************************************//**
1900 Calculates new statistics for a given index and saves them to the index
1901 members stat_n_diff_key_vals[], stat_n_sample_sizes[], stat_index_size and
1902 stat_n_leaf_pages. This function could be slow. */
1903 static
1904 void
dict_stats_analyze_index(dict_index_t * index)1905 dict_stats_analyze_index(
1906 /*=====================*/
1907 	dict_index_t*	index)	/*!< in/out: index to analyze */
1908 {
1909 	ulint		root_level;
1910 	ulint		level;
1911 	bool		level_is_analyzed;
1912 	ulint		n_uniq;
1913 	ulint		n_prefix;
1914 	ib_uint64_t	total_recs;
1915 	ib_uint64_t	total_pages;
1916 	mtr_t		mtr;
1917 	ulint		size;
1918 	DBUG_ENTER("dict_stats_analyze_index");
1919 
1920 	DBUG_PRINT("info", ("index: %s, online status: %d", index->name,
1921 			    dict_index_get_online_status(index)));
1922 
1923 	DEBUG_PRINTF("  %s(index=%s)\n", __func__, index->name);
1924 
1925 	dict_stats_empty_index(index);
1926 
1927 	mtr_start(&mtr);
1928 
1929 	mtr_s_lock(dict_index_get_lock(index), &mtr);
1930 
1931 	size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
1932 
1933 	if (size != ULINT_UNDEFINED) {
1934 		index->stat_index_size = size;
1935 		size = btr_get_size(index, BTR_N_LEAF_PAGES, &mtr);
1936 	}
1937 
1938 	/* Release the X locks on the root page taken by btr_get_size() */
1939 	mtr_commit(&mtr);
1940 
1941 	switch (size) {
1942 	case ULINT_UNDEFINED:
1943 		dict_stats_assert_initialized_index(index);
1944 		DBUG_VOID_RETURN;
1945 	case 0:
1946 		/* The root node of the tree is a leaf */
1947 		size = 1;
1948 	}
1949 
1950 	index->stat_n_leaf_pages = size;
1951 
1952 	mtr_start(&mtr);
1953 
1954 	mtr_s_lock(dict_index_get_lock(index), &mtr);
1955 
1956 	root_level = btr_height_get(index, &mtr);
1957 
1958 	n_uniq = dict_index_get_n_unique(index);
1959 
1960 	/* If the tree has just one level (and one page) or if the user
1961 	has requested to sample too many pages then do full scan.
1962 
1963 	For each n-column prefix (for n=1..n_uniq) N_SAMPLE_PAGES(index)
1964 	will be sampled, so in total N_SAMPLE_PAGES(index) * n_uniq leaf
1965 	pages will be sampled. If that number is bigger than the total
1966 	number of leaf pages then do full scan of the leaf level instead
1967 	since it will be faster and will give better results. */
1968 
1969 	if (root_level == 0
1970 	    || N_SAMPLE_PAGES(index) * n_uniq > index->stat_n_leaf_pages) {
1971 
1972 		if (root_level == 0) {
1973 			DEBUG_PRINTF("  %s(): just one page, "
1974 				     "doing full scan\n", __func__);
1975 		} else {
1976 			DEBUG_PRINTF("  %s(): too many pages requested for "
1977 				     "sampling, doing full scan\n", __func__);
1978 		}
1979 
1980 		/* do full scan of level 0; save results directly
1981 		into the index */
1982 
1983 		dict_stats_analyze_index_level(index,
1984 					       0 /* leaf level */,
1985 					       index->stat_n_diff_key_vals,
1986 					       &total_recs,
1987 					       &total_pages,
1988 					       NULL /* boundaries not needed */,
1989 					       &mtr);
1990 
1991 		for (ulint i = 0; i < n_uniq; i++) {
1992 			index->stat_n_sample_sizes[i] = total_pages;
1993 		}
1994 
1995 		mtr_commit(&mtr);
1996 
1997 		dict_stats_assert_initialized_index(index);
1998 		DBUG_VOID_RETURN;
1999 	}
2000 
2001 	/* For each level that is being scanned in the btree, this contains the
2002 	number of different key values for all possible n-column prefixes. */
2003 	ib_uint64_t*		n_diff_on_level = new ib_uint64_t[n_uniq];
2004 
2005 	/* For each level that is being scanned in the btree, this contains the
2006 	index of the last record from each group of equal records (when
2007 	comparing only the first n columns, n=1..n_uniq). */
2008 	boundaries_t*		n_diff_boundaries = new boundaries_t[n_uniq];
2009 
2010 	/* For each n-column prefix this array contains the input data that is
2011 	used to calculate dict_index_t::stat_n_diff_key_vals[]. */
2012 	n_diff_data_t*		n_diff_data = new n_diff_data_t[n_uniq];
2013 
2014 	/* total_recs is also used to estimate the number of pages on one
2015 	level below, so at the start we have 1 page (the root) */
2016 	total_recs = 1;
2017 
2018 	/* Here we use the following optimization:
2019 	If we find that level L is the first one (searching from the
2020 	root) that contains at least D distinct keys when looking at
2021 	the first n_prefix columns, then:
2022 	if we look at the first n_prefix-1 columns then the first
2023 	level that contains D distinct keys will be either L or a
2024 	lower one.
2025 	So if we find that the first level containing D distinct
2026 	keys (on n_prefix columns) is L, we continue from L when
2027 	searching for D distinct keys on n_prefix-1 columns. */
2028 	level = root_level;
2029 	level_is_analyzed = false;
2030 
2031 	for (n_prefix = n_uniq; n_prefix >= 1; n_prefix--) {
2032 
2033 		DEBUG_PRINTF("  %s(): searching level with >=%llu "
2034 			     "distinct records, n_prefix=%lu\n",
2035 			     __func__, N_DIFF_REQUIRED(index), n_prefix);
2036 
2037 		/* Commit the mtr to release the tree S lock to allow
2038 		other threads to do some work too. */
2039 		mtr_commit(&mtr);
2040 		mtr_start(&mtr);
2041 		mtr_s_lock(dict_index_get_lock(index), &mtr);
2042 		if (root_level != btr_height_get(index, &mtr)) {
2043 			/* Just quit if the tree has changed beyond
2044 			recognition here. The old stats from previous
2045 			runs will remain in the values that we have
2046 			not calculated yet. Initially when the index
2047 			object is created the stats members are given
2048 			some sensible values so leaving them untouched
2049 			here even the first time will not cause us to
2050 			read uninitialized memory later. */
2051 			break;
2052 		}
2053 
2054 		/* check whether we should pick the current level;
2055 		we pick level 1 even if it does not have enough
2056 		distinct records because we do not want to scan the
2057 		leaf level because it may contain too many records */
2058 		if (level_is_analyzed
2059 		    && (n_diff_on_level[n_prefix - 1] >= N_DIFF_REQUIRED(index)
2060 			|| level == 1)) {
2061 
2062 			goto found_level;
2063 		}
2064 
2065 		/* search for a level that contains enough distinct records */
2066 
2067 		if (level_is_analyzed && level > 1) {
2068 
2069 			/* if this does not hold we should be on
2070 			"found_level" instead of here */
2071 			ut_ad(n_diff_on_level[n_prefix - 1]
2072 			      < N_DIFF_REQUIRED(index));
2073 
2074 			level--;
2075 			level_is_analyzed = false;
2076 		}
2077 
2078 		/* descend into the tree, searching for "good enough" level */
2079 		for (;;) {
2080 
2081 			/* make sure we do not scan the leaf level
2082 			accidentally, it may contain too many pages */
2083 			ut_ad(level > 0);
2084 
2085 			/* scanning the same level twice is an optimization
2086 			bug */
2087 			ut_ad(!level_is_analyzed);
2088 
2089 			/* Do not scan if this would read too many pages.
2090 			Here we use the following fact:
2091 			the number of pages on level L equals the number
2092 			of records on level L+1, thus we deduce that the
2093 			following call would scan total_recs pages, because
2094 			total_recs is left from the previous iteration when
2095 			we scanned one level upper or we have not scanned any
2096 			levels yet in which case total_recs is 1. */
2097 			if (total_recs > N_SAMPLE_PAGES(index)) {
2098 
2099 				/* if the above cond is true then we are
2100 				not at the root level since on the root
2101 				level total_recs == 1 (set before we
2102 				enter the n-prefix loop) and cannot
2103 				be > N_SAMPLE_PAGES(index) */
2104 				ut_a(level != root_level);
2105 
2106 				/* step one level back and be satisfied with
2107 				whatever it contains */
2108 				level++;
2109 				level_is_analyzed = true;
2110 
2111 				break;
2112 			}
2113 
2114 			dict_stats_analyze_index_level(index,
2115 						       level,
2116 						       n_diff_on_level,
2117 						       &total_recs,
2118 						       &total_pages,
2119 						       n_diff_boundaries,
2120 						       &mtr);
2121 
2122 			level_is_analyzed = true;
2123 
2124 			if (level == 1
2125 			    || n_diff_on_level[n_prefix - 1]
2126 			    >= N_DIFF_REQUIRED(index)) {
2127 				/* we have reached the last level we could scan
2128 				or we found a good level with many distinct
2129 				records */
2130 				break;
2131 			}
2132 
2133 			level--;
2134 			level_is_analyzed = false;
2135 		}
2136 found_level:
2137 
2138 		DEBUG_PRINTF("  %s(): found level %lu that has " UINT64PF
2139 			     " distinct records for n_prefix=%lu\n",
2140 			     __func__, level, n_diff_on_level[n_prefix - 1],
2141 			     n_prefix);
2142 		/* here we are either on level 1 or the level that we are on
2143 		contains >= N_DIFF_REQUIRED distinct keys or we did not scan
2144 		deeper levels because they would contain too many pages */
2145 
2146 		ut_ad(level > 0);
2147 
2148 		ut_ad(level_is_analyzed);
2149 
2150 		/* if any of these is 0 then there is exactly one page in the
2151 		B-tree and it is empty and we should have done full scan and
2152 		should not be here */
2153 		ut_ad(total_recs > 0);
2154 		ut_ad(n_diff_on_level[n_prefix - 1] > 0);
2155 
2156 		ut_ad(N_SAMPLE_PAGES(index) > 0);
2157 
2158 		n_diff_data_t*	data = &n_diff_data[n_prefix - 1];
2159 
2160 		data->level = level;
2161 
2162 		data->n_recs_on_level = total_recs;
2163 
2164 		data->n_diff_on_level = n_diff_on_level[n_prefix - 1];
2165 
2166 		data->n_leaf_pages_to_analyze = std::min(
2167 			N_SAMPLE_PAGES(index),
2168 			n_diff_on_level[n_prefix - 1]);
2169 
2170 		/* pick some records from this level and dive below them for
2171 		the given n_prefix */
2172 
2173 		dict_stats_analyze_index_for_n_prefix(
2174 			index, n_prefix, &n_diff_boundaries[n_prefix - 1],
2175 			data, &mtr);
2176 	}
2177 
2178 	mtr_commit(&mtr);
2179 
2180 	delete[] n_diff_boundaries;
2181 
2182 	delete[] n_diff_on_level;
2183 
2184 	/* n_prefix == 0 means that the above loop did not end up prematurely
2185 	due to tree being changed and so n_diff_data[] is set up. */
2186 	if (n_prefix == 0) {
2187 		dict_stats_index_set_n_diff(n_diff_data, index);
2188 	}
2189 
2190 	delete[] n_diff_data;
2191 
2192 	dict_stats_assert_initialized_index(index);
2193 	DBUG_VOID_RETURN;
2194 }
2195 
2196 /*********************************************************************//**
2197 Calculates new estimates for table and index statistics. This function
2198 is relatively slow and is used to calculate persistent statistics that
2199 will be saved on disk.
2200 @return DB_SUCCESS or error code */
2201 static
2202 dberr_t
dict_stats_update_persistent(dict_table_t * table)2203 dict_stats_update_persistent(
2204 /*=========================*/
2205 	dict_table_t*	table)		/*!< in/out: table */
2206 {
2207 	dict_index_t*	index;
2208 
2209 	DEBUG_PRINTF("%s(table=%s)\n", __func__, table->name);
2210 
2211 	dict_table_stats_lock(table, RW_X_LATCH);
2212 
2213 	/* analyze the clustered index first */
2214 
2215 	index = dict_table_get_first_index(table);
2216 
2217 	if (index == NULL
2218 	    || dict_index_is_corrupted(index)
2219 	    || (index->type | DICT_UNIQUE) != (DICT_CLUSTERED | DICT_UNIQUE)) {
2220 
2221 		/* Table definition is corrupt */
2222 		dict_table_stats_unlock(table, RW_X_LATCH);
2223 		dict_stats_empty_table(table);
2224 
2225 		return(DB_CORRUPTION);
2226 	}
2227 
2228 	ut_ad(!dict_index_is_univ(index));
2229 
2230 	dict_stats_analyze_index(index);
2231 
2232 	ulint	n_unique = dict_index_get_n_unique(index);
2233 
2234 	table->stat_n_rows = index->stat_n_diff_key_vals[n_unique - 1];
2235 
2236 	table->stat_clustered_index_size = index->stat_index_size;
2237 
2238 	/* analyze other indexes from the table, if any */
2239 
2240 	table->stat_sum_of_other_index_sizes = 0;
2241 
2242 	for (index = dict_table_get_next_index(index);
2243 	     index != NULL;
2244 	     index = dict_table_get_next_index(index)) {
2245 
2246 		ut_ad(!dict_index_is_univ(index));
2247 
2248 		if (index->type & DICT_FTS) {
2249 			continue;
2250 		}
2251 
2252 		dict_stats_empty_index(index);
2253 
2254 		if (dict_stats_should_ignore_index(index)) {
2255 			continue;
2256 		}
2257 
2258 		if (!(table->stats_bg_flag & BG_STAT_SHOULD_QUIT)) {
2259 			dict_stats_analyze_index(index);
2260 		}
2261 
2262 		table->stat_sum_of_other_index_sizes
2263 			+= index->stat_index_size;
2264 	}
2265 
2266 	table->stats_last_recalc = ut_time();
2267 
2268 	table->stat_modified_counter = 0;
2269 
2270 	table->stat_initialized = TRUE;
2271 
2272 	dict_stats_assert_initialized(table);
2273 
2274 	dict_table_stats_unlock(table, RW_X_LATCH);
2275 
2276 	return(DB_SUCCESS);
2277 }
2278 
2279 #include "mysql_com.h"
2280 /** Save an individual index's statistic into the persistent statistics
2281 storage.
2282 @param[in]	index			index to be updated
2283 @param[in]	last_update		timestamp of the stat
2284 @param[in]	stat_name		name of the stat
2285 @param[in]	stat_value		value of the stat
2286 @param[in]	sample_size		n pages sampled or NULL
2287 @param[in]	stat_description	description of the stat
2288 @param[in,out]	trx			in case of NULL the function will
2289 allocate and free the trx object. If it is not NULL then it will be
2290 rolled back only in the case of error, but not freed.
2291 @return DB_SUCCESS or error code */
2292 static
2293 dberr_t
dict_stats_save_index_stat(dict_index_t * index,lint last_update,const char * stat_name,ib_uint64_t stat_value,ib_uint64_t * sample_size,const char * stat_description,trx_t * trx)2294 dict_stats_save_index_stat(
2295 	dict_index_t*	index,
2296 	lint		last_update,
2297 	const char*	stat_name,
2298 	ib_uint64_t	stat_value,
2299 	ib_uint64_t*	sample_size,
2300 	const char*	stat_description,
2301 	trx_t*		trx)
2302 {
2303 	pars_info_t*	pinfo;
2304 	dberr_t		ret;
2305 	char		db_utf8[MAX_DB_UTF8_LEN];
2306 	char		table_utf8[MAX_TABLE_UTF8_LEN];
2307 
2308 #ifdef UNIV_SYNC_DEBUG
2309 	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
2310 #endif /* UNIV_SYNC_DEBUG */
2311 	ut_ad(mutex_own(&dict_sys->mutex));
2312 
2313 	dict_fs2utf8(index->table->name, db_utf8, sizeof(db_utf8),
2314 		     table_utf8, sizeof(table_utf8));
2315 
2316 	pinfo = pars_info_create();
2317 	pars_info_add_str_literal(pinfo, "database_name", db_utf8);
2318 	pars_info_add_str_literal(pinfo, "table_name", table_utf8);
2319 	UNIV_MEM_ASSERT_RW_ABORT(index->name, strlen(index->name));
2320 	pars_info_add_str_literal(pinfo, "index_name", index->name);
2321 	UNIV_MEM_ASSERT_RW_ABORT(&last_update, 4);
2322 	pars_info_add_int4_literal(pinfo, "last_update", last_update);
2323 	UNIV_MEM_ASSERT_RW_ABORT(stat_name, strlen(stat_name));
2324 	pars_info_add_str_literal(pinfo, "stat_name", stat_name);
2325 	UNIV_MEM_ASSERT_RW_ABORT(&stat_value, 8);
2326 	pars_info_add_ull_literal(pinfo, "stat_value", stat_value);
2327 	if (sample_size != NULL) {
2328 		UNIV_MEM_ASSERT_RW_ABORT(sample_size, 8);
2329 		pars_info_add_ull_literal(pinfo, "sample_size", *sample_size);
2330 	} else {
2331 		pars_info_add_literal(pinfo, "sample_size", NULL,
2332 				      UNIV_SQL_NULL, DATA_FIXBINARY, 0);
2333 	}
2334 	UNIV_MEM_ASSERT_RW_ABORT(stat_description, strlen(stat_description));
2335 	pars_info_add_str_literal(pinfo, "stat_description",
2336 				  stat_description);
2337 
2338 	ret = dict_stats_exec_sql(
2339 		pinfo,
2340 		"PROCEDURE INDEX_STATS_SAVE () IS\n"
2341 		"BEGIN\n"
2342 
2343 		"DELETE FROM \"" INDEX_STATS_NAME "\"\n"
2344 		"WHERE\n"
2345 		"database_name = :database_name AND\n"
2346 		"table_name = :table_name AND\n"
2347 		"index_name = :index_name AND\n"
2348 		"stat_name = :stat_name;\n"
2349 
2350 		"INSERT INTO \"" INDEX_STATS_NAME "\"\n"
2351 		"VALUES\n"
2352 		"(\n"
2353 		":database_name,\n"
2354 		":table_name,\n"
2355 		":index_name,\n"
2356 		":last_update,\n"
2357 		":stat_name,\n"
2358 		":stat_value,\n"
2359 		":sample_size,\n"
2360 		":stat_description\n"
2361 		");\n"
2362 		"END;", trx);
2363 
2364 	if (ret != DB_SUCCESS) {
2365 		char	buf_table[MAX_FULL_NAME_LEN];
2366 		char	buf_index[MAX_FULL_NAME_LEN];
2367 		ut_print_timestamp(stderr);
2368 		fprintf(stderr,
2369 			" InnoDB: Cannot save index statistics for table "
2370 			"%s, index %s, stat name \"%s\": %s\n",
2371 			ut_format_name(index->table->name, TRUE,
2372 				       buf_table, sizeof(buf_table)),
2373 			ut_format_name(index->name, FALSE,
2374 				       buf_index, sizeof(buf_index)),
2375 			stat_name, ut_strerr(ret));
2376 	}
2377 
2378 	return(ret);
2379 }
2380 
2381 /** Save the table's statistics into the persistent statistics storage.
2382 @param[in] table_orig	table whose stats to save
2383 @param[in] only_for_index if this is non-NULL, then stats for indexes
2384 that are not equal to it will not be saved, if NULL, then all
2385 indexes' stats are saved
2386 @return DB_SUCCESS or error code */
2387 static
2388 dberr_t
dict_stats_save(dict_table_t * table_orig,const index_id_t * only_for_index)2389 dict_stats_save(
2390 /*============*/
2391 	dict_table_t*		table_orig,
2392 	const index_id_t*	only_for_index)
2393 {
2394 	pars_info_t*	pinfo;
2395 	lint		now;
2396 	dberr_t		ret;
2397 	dict_table_t*	table;
2398 	char		db_utf8[MAX_DB_UTF8_LEN];
2399 	char		table_utf8[MAX_TABLE_UTF8_LEN];
2400 
2401 	table = dict_stats_snapshot_create(table_orig);
2402 
2403 	dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8),
2404 		     table_utf8, sizeof(table_utf8));
2405 
2406 	rw_lock_x_lock(&dict_operation_lock);
2407 	mutex_enter(&dict_sys->mutex);
2408 
2409 	/* MySQL's timestamp is 4 byte, so we use
2410 	pars_info_add_int4_literal() which takes a lint arg, so "now" is
2411 	lint */
2412 	now = (lint) ut_time();
2413 
2414 	pinfo = pars_info_create();
2415 
2416 	pars_info_add_str_literal(pinfo, "database_name", db_utf8);
2417 	pars_info_add_str_literal(pinfo, "table_name", table_utf8);
2418 	pars_info_add_int4_literal(pinfo, "last_update", now);
2419 	pars_info_add_ull_literal(pinfo, "n_rows", table->stat_n_rows);
2420 	pars_info_add_ull_literal(pinfo, "clustered_index_size",
2421 		table->stat_clustered_index_size);
2422 	pars_info_add_ull_literal(pinfo, "sum_of_other_index_sizes",
2423 		table->stat_sum_of_other_index_sizes);
2424 
2425 	ret = dict_stats_exec_sql(
2426 		pinfo,
2427 		"PROCEDURE TABLE_STATS_SAVE () IS\n"
2428 		"BEGIN\n"
2429 
2430 		"DELETE FROM \"" TABLE_STATS_NAME "\"\n"
2431 		"WHERE\n"
2432 		"database_name = :database_name AND\n"
2433 		"table_name = :table_name;\n"
2434 
2435 		"INSERT INTO \"" TABLE_STATS_NAME "\"\n"
2436 		"VALUES\n"
2437 		"(\n"
2438 		":database_name,\n"
2439 		":table_name,\n"
2440 		":last_update,\n"
2441 		":n_rows,\n"
2442 		":clustered_index_size,\n"
2443 		":sum_of_other_index_sizes\n"
2444 		");\n"
2445 		"END;", NULL);
2446 
2447 	if (ret != DB_SUCCESS) {
2448 		char	buf[MAX_FULL_NAME_LEN];
2449 		ut_print_timestamp(stderr);
2450 		fprintf(stderr,
2451 			" InnoDB: Cannot save table statistics for table "
2452 			"%s: %s\n",
2453 			ut_format_name(table->name, TRUE, buf, sizeof(buf)),
2454 			ut_strerr(ret));
2455 
2456 		mutex_exit(&dict_sys->mutex);
2457 		rw_lock_x_unlock(&dict_operation_lock);
2458 
2459 		dict_stats_snapshot_free(table);
2460 
2461 		return(ret);
2462 	}
2463 
2464 	trx_t*	trx = trx_allocate_for_background();
2465 	trx_start_if_not_started(trx);
2466 
2467 	dict_index_t*	index;
2468 	index_map_t	indexes;
2469 
2470 	/* Below we do all the modifications in innodb_index_stats in a single
2471 	transaction for performance reasons. Modifying more than one row in a
2472 	single transaction may deadlock with other transactions if they
2473 	lock the rows in different order. Other transaction could be for
2474 	example when we DROP a table and do
2475 	DELETE FROM innodb_index_stats WHERE database_name = '...'
2476 	AND table_name = '...'; which will affect more than one row. To
2477 	prevent deadlocks we always lock the rows in the same order - the
2478 	order of the PK, which is (database_name, table_name, index_name,
2479 	stat_name). This is why below we sort the indexes by name and then
2480 	for each index, do the mods ordered by stat_name. */
2481 
2482 	for (index = dict_table_get_first_index(table);
2483 	     index != NULL;
2484 	     index = dict_table_get_next_index(index)) {
2485 
2486 		indexes[index->name] = index;
2487 	}
2488 
2489 	index_map_t::const_iterator	it;
2490 
2491 	for (it = indexes.begin(); it != indexes.end(); ++it) {
2492 
2493 		index = it->second;
2494 
2495 		if (only_for_index != NULL && index->id != *only_for_index) {
2496 			continue;
2497 		}
2498 
2499 		if (dict_stats_should_ignore_index(index)) {
2500 			continue;
2501 		}
2502 
2503 		ut_ad(!dict_index_is_univ(index));
2504 
2505 		for (ulint i = 0; i < index->n_uniq; i++) {
2506 
2507 			char	stat_name[16];
2508 			char	stat_description[1024];
2509 			ulint	j;
2510 
2511 			ut_snprintf(stat_name, sizeof(stat_name),
2512 				    "n_diff_pfx%02lu", i + 1);
2513 
2514 			/* craft a string that contains the columns names */
2515 			ut_snprintf(stat_description,
2516 				    sizeof(stat_description),
2517 				    "%s", index->fields[0].name);
2518 			for (j = 1; j <= i; j++) {
2519 				size_t	len;
2520 
2521 				len = strlen(stat_description);
2522 
2523 				ut_snprintf(stat_description + len,
2524 					    sizeof(stat_description) - len,
2525 					    ",%s", index->fields[j].name);
2526 			}
2527 
2528 			ret = dict_stats_save_index_stat(
2529 				index, now, stat_name,
2530 				index->stat_n_diff_key_vals[i],
2531 				&index->stat_n_sample_sizes[i],
2532 				stat_description, trx);
2533 
2534 			if (ret != DB_SUCCESS) {
2535 				goto end;
2536 			}
2537 		}
2538 
2539 		ret = dict_stats_save_index_stat(index, now, "n_leaf_pages",
2540 						 index->stat_n_leaf_pages,
2541 						 NULL,
2542 						 "Number of leaf pages "
2543 						 "in the index", trx);
2544 		if (ret != DB_SUCCESS) {
2545 			goto end;
2546 		}
2547 
2548 		ret = dict_stats_save_index_stat(index, now, "size",
2549 						 index->stat_index_size,
2550 						 NULL,
2551 						 "Number of pages "
2552 						 "in the index", trx);
2553 		if (ret != DB_SUCCESS) {
2554 			goto end;
2555 		}
2556 	}
2557 
2558 	trx_commit_for_mysql(trx);
2559 
2560 end:
2561 	trx_free_for_background(trx);
2562 
2563 	mutex_exit(&dict_sys->mutex);
2564 	rw_lock_x_unlock(&dict_operation_lock);
2565 
2566 	dict_stats_snapshot_free(table);
2567 
2568 	return(ret);
2569 }
2570 
2571 /*********************************************************************//**
2572 Called for the row that is selected by
2573 SELECT ... FROM mysql.innodb_table_stats WHERE table='...'
2574 The second argument is a pointer to the table and the fetched stats are
2575 written to it.
2576 @return non-NULL dummy */
2577 static
2578 ibool
dict_stats_fetch_table_stats_step(void * node_void,void * table_void)2579 dict_stats_fetch_table_stats_step(
2580 /*==============================*/
2581 	void*	node_void,	/*!< in: select node */
2582 	void*	table_void)	/*!< out: table */
2583 {
2584 	sel_node_t*	node = (sel_node_t*) node_void;
2585 	dict_table_t*	table = (dict_table_t*) table_void;
2586 	que_common_t*	cnode;
2587 	int		i;
2588 
2589 	/* this should loop exactly 3 times - for
2590 	n_rows,clustered_index_size,sum_of_other_index_sizes */
2591 	for (cnode = static_cast<que_common_t*>(node->select_list), i = 0;
2592 	     cnode != NULL;
2593 	     cnode = static_cast<que_common_t*>(que_node_get_next(cnode)),
2594 	     i++) {
2595 
2596 		const byte*	data;
2597 		dfield_t*	dfield = que_node_get_val(cnode);
2598 		dtype_t*	type = dfield_get_type(dfield);
2599 		ulint		len = dfield_get_len(dfield);
2600 
2601 		data = static_cast<const byte*>(dfield_get_data(dfield));
2602 
2603 		switch (i) {
2604 		case 0: /* mysql.innodb_table_stats.n_rows */
2605 
2606 			ut_a(dtype_get_mtype(type) == DATA_INT);
2607 			ut_a(len == 8);
2608 
2609 			table->stat_n_rows = mach_read_from_8(data);
2610 
2611 			break;
2612 
2613 		case 1: /* mysql.innodb_table_stats.clustered_index_size */
2614 
2615 			ut_a(dtype_get_mtype(type) == DATA_INT);
2616 			ut_a(len == 8);
2617 
2618 			table->stat_clustered_index_size
2619 				= (ulint) mach_read_from_8(data);
2620 
2621 			break;
2622 
2623 		case 2: /* mysql.innodb_table_stats.sum_of_other_index_sizes */
2624 
2625 			ut_a(dtype_get_mtype(type) == DATA_INT);
2626 			ut_a(len == 8);
2627 
2628 			table->stat_sum_of_other_index_sizes
2629 				= (ulint) mach_read_from_8(data);
2630 
2631 			break;
2632 
2633 		default:
2634 
2635 			/* someone changed SELECT
2636 			n_rows,clustered_index_size,sum_of_other_index_sizes
2637 			to select more columns from innodb_table_stats without
2638 			adjusting here */
2639 			ut_error;
2640 		}
2641 	}
2642 
2643 	/* if i < 3 this means someone changed the
2644 	SELECT n_rows,clustered_index_size,sum_of_other_index_sizes
2645 	to select less columns from innodb_table_stats without adjusting here;
2646 	if i > 3 we would have ut_error'ed earlier */
2647 	ut_a(i == 3 /*n_rows,clustered_index_size,sum_of_other_index_sizes*/);
2648 
2649 	/* XXX this is not used but returning non-NULL is necessary */
2650 	return(TRUE);
2651 }
2652 
2653 /** Aux struct used to pass a table and a boolean to
2654 dict_stats_fetch_index_stats_step(). */
2655 struct index_fetch_t {
2656 	dict_table_t*	table;	/*!< table whose indexes are to be modified */
2657 	bool		stats_were_modified; /*!< will be set to true if at
2658 				least one index stats were modified */
2659 };
2660 
2661 /*********************************************************************//**
2662 Called for the rows that are selected by
2663 SELECT ... FROM mysql.innodb_index_stats WHERE table='...'
2664 The second argument is a pointer to the table and the fetched stats are
2665 written to its indexes.
2666 Let a table has N indexes and each index has Ui unique columns for i=1..N,
2667 then mysql.innodb_index_stats will have SUM(Ui) i=1..N rows for that table.
2668 So this function will be called SUM(Ui) times where SUM(Ui) is of magnitude
2669 N*AVG(Ui). In each call it searches for the currently fetched index into
2670 table->indexes linearly, assuming this list is not sorted. Thus, overall,
2671 fetching all indexes' stats from mysql.innodb_index_stats is O(N^2) where N
2672 is the number of indexes.
2673 This can be improved if we sort table->indexes in a temporary area just once
2674 and then search in that sorted list. Then the complexity will be O(N*log(N)).
2675 We assume a table will not have more than 100 indexes, so we go with the
2676 simpler N^2 algorithm.
2677 @return non-NULL dummy */
2678 static
2679 ibool
dict_stats_fetch_index_stats_step(void * node_void,void * arg_void)2680 dict_stats_fetch_index_stats_step(
2681 /*==============================*/
2682 	void*	node_void,	/*!< in: select node */
2683 	void*	arg_void)	/*!< out: table + a flag that tells if we
2684 				modified anything */
2685 {
2686 	sel_node_t*	node = (sel_node_t*) node_void;
2687 	index_fetch_t*	arg = (index_fetch_t*) arg_void;
2688 	dict_table_t*	table = arg->table;
2689 	dict_index_t*	index = NULL;
2690 	que_common_t*	cnode;
2691 	const char*	stat_name = NULL;
2692 	ulint		stat_name_len = ULINT_UNDEFINED;
2693 	ib_uint64_t	stat_value = UINT64_UNDEFINED;
2694 	ib_uint64_t	sample_size = UINT64_UNDEFINED;
2695 	int		i;
2696 
2697 	/* this should loop exactly 4 times - for the columns that
2698 	were selected: index_name,stat_name,stat_value,sample_size */
2699 	for (cnode = static_cast<que_common_t*>(node->select_list), i = 0;
2700 	     cnode != NULL;
2701 	     cnode = static_cast<que_common_t*>(que_node_get_next(cnode)),
2702 	     i++) {
2703 
2704 		const byte*	data;
2705 		dfield_t*	dfield = que_node_get_val(cnode);
2706 		dtype_t*	type = dfield_get_type(dfield);
2707 		ulint		len = dfield_get_len(dfield);
2708 
2709 		data = static_cast<const byte*>(dfield_get_data(dfield));
2710 
2711 		switch (i) {
2712 		case 0: /* mysql.innodb_index_stats.index_name */
2713 
2714 			ut_a(dtype_get_mtype(type) == DATA_VARMYSQL);
2715 
2716 			/* search for index in table's indexes whose name
2717 			matches data; the fetched index name is in data,
2718 			has no terminating '\0' and has length len */
2719 			for (index = dict_table_get_first_index(table);
2720 			     index != NULL;
2721 			     index = dict_table_get_next_index(index)) {
2722 
2723 				if (strlen(index->name) == len
2724 				    && memcmp(index->name, data, len) == 0) {
2725 					/* the corresponding index was found */
2726 					break;
2727 				}
2728 			}
2729 
2730 			/* if index is NULL here this means that
2731 			mysql.innodb_index_stats contains more rows than the
2732 			number of indexes in the table; this is ok, we just
2733 			return ignoring those extra rows; in other words
2734 			dict_stats_fetch_index_stats_step() has been called
2735 			for a row from index_stats with unknown index_name
2736 			column */
2737 			if (index == NULL) {
2738 
2739 				return(TRUE);
2740 			}
2741 
2742 			break;
2743 
2744 		case 1: /* mysql.innodb_index_stats.stat_name */
2745 
2746 			ut_a(dtype_get_mtype(type) == DATA_VARMYSQL);
2747 
2748 			ut_a(index != NULL);
2749 
2750 			stat_name = (const char*) data;
2751 			stat_name_len = len;
2752 
2753 			break;
2754 
2755 		case 2: /* mysql.innodb_index_stats.stat_value */
2756 
2757 			ut_a(dtype_get_mtype(type) == DATA_INT);
2758 			ut_a(len == 8);
2759 
2760 			ut_a(index != NULL);
2761 			ut_a(stat_name != NULL);
2762 			ut_a(stat_name_len != ULINT_UNDEFINED);
2763 
2764 			stat_value = mach_read_from_8(data);
2765 
2766 			break;
2767 
2768 		case 3: /* mysql.innodb_index_stats.sample_size */
2769 
2770 			ut_a(dtype_get_mtype(type) == DATA_INT);
2771 			ut_a(len == 8 || len == UNIV_SQL_NULL);
2772 
2773 			ut_a(index != NULL);
2774 			ut_a(stat_name != NULL);
2775 			ut_a(stat_name_len != ULINT_UNDEFINED);
2776 			ut_a(stat_value != UINT64_UNDEFINED);
2777 
2778 			if (len == UNIV_SQL_NULL) {
2779 				break;
2780 			}
2781 			/* else */
2782 
2783 			sample_size = mach_read_from_8(data);
2784 
2785 			break;
2786 
2787 		default:
2788 
2789 			/* someone changed
2790 			SELECT index_name,stat_name,stat_value,sample_size
2791 			to select more columns from innodb_index_stats without
2792 			adjusting here */
2793 			ut_error;
2794 		}
2795 	}
2796 
2797 	/* if i < 4 this means someone changed the
2798 	SELECT index_name,stat_name,stat_value,sample_size
2799 	to select less columns from innodb_index_stats without adjusting here;
2800 	if i > 4 we would have ut_error'ed earlier */
2801 	ut_a(i == 4 /* index_name,stat_name,stat_value,sample_size */);
2802 
2803 	ut_a(index != NULL);
2804 	ut_a(stat_name != NULL);
2805 	ut_a(stat_name_len != ULINT_UNDEFINED);
2806 	ut_a(stat_value != UINT64_UNDEFINED);
2807 	/* sample_size could be UINT64_UNDEFINED here, if it is NULL */
2808 
2809 #define PFX	"n_diff_pfx"
2810 #define PFX_LEN	10
2811 
2812 	if (stat_name_len == 4 /* strlen("size") */
2813 	    && strncasecmp("size", stat_name, stat_name_len) == 0) {
2814 		index->stat_index_size = (ulint) stat_value;
2815 		arg->stats_were_modified = true;
2816 	} else if (stat_name_len == 12 /* strlen("n_leaf_pages") */
2817 		   && strncasecmp("n_leaf_pages", stat_name, stat_name_len)
2818 		   == 0) {
2819 		index->stat_n_leaf_pages = (ulint) stat_value;
2820 		arg->stats_were_modified = true;
2821 	} else if (stat_name_len > PFX_LEN /* e.g. stat_name=="n_diff_pfx01" */
2822 		   && strncasecmp(PFX, stat_name, PFX_LEN) == 0) {
2823 
2824 		const char*	num_ptr;
2825 		unsigned long	n_pfx;
2826 
2827 		/* point num_ptr into "1" from "n_diff_pfx12..." */
2828 		num_ptr = stat_name + PFX_LEN;
2829 
2830 		/* stat_name should have exactly 2 chars appended to PFX
2831 		and they should be digits */
2832 		if (stat_name_len != PFX_LEN + 2
2833 		    || num_ptr[0] < '0' || num_ptr[0] > '9'
2834 		    || num_ptr[1] < '0' || num_ptr[1] > '9') {
2835 
2836 			char	db_utf8[MAX_DB_UTF8_LEN];
2837 			char	table_utf8[MAX_TABLE_UTF8_LEN];
2838 
2839 			dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8),
2840 				     table_utf8, sizeof(table_utf8));
2841 
2842 			ut_print_timestamp(stderr);
2843 			fprintf(stderr,
2844 				" InnoDB: Ignoring strange row from "
2845 				"%s WHERE "
2846 				"database_name = '%s' AND "
2847 				"table_name = '%s' AND "
2848 				"index_name = '%s' AND "
2849 				"stat_name = '%.*s'; because stat_name "
2850 				"is malformed\n",
2851 				INDEX_STATS_NAME_PRINT,
2852 				db_utf8,
2853 				table_utf8,
2854 				index->name,
2855 				(int) stat_name_len,
2856 				stat_name);
2857 			return(TRUE);
2858 		}
2859 		/* else */
2860 
2861 		/* extract 12 from "n_diff_pfx12..." into n_pfx
2862 		note that stat_name does not have a terminating '\0' */
2863 		n_pfx = (num_ptr[0] - '0') * 10 + (num_ptr[1] - '0');
2864 
2865 		ulint	n_uniq = index->n_uniq;
2866 
2867 		if (n_pfx == 0 || n_pfx > n_uniq) {
2868 
2869 			char	db_utf8[MAX_DB_UTF8_LEN];
2870 			char	table_utf8[MAX_TABLE_UTF8_LEN];
2871 
2872 			dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8),
2873 				     table_utf8, sizeof(table_utf8));
2874 
2875 			ut_print_timestamp(stderr);
2876 			fprintf(stderr,
2877 				" InnoDB: Ignoring strange row from "
2878 				"%s WHERE "
2879 				"database_name = '%s' AND "
2880 				"table_name = '%s' AND "
2881 				"index_name = '%s' AND "
2882 				"stat_name = '%.*s'; because stat_name is "
2883 				"out of range, the index has %lu unique "
2884 				"columns\n",
2885 				INDEX_STATS_NAME_PRINT,
2886 				db_utf8,
2887 				table_utf8,
2888 				index->name,
2889 				(int) stat_name_len,
2890 				stat_name,
2891 				n_uniq);
2892 			return(TRUE);
2893 		}
2894 		/* else */
2895 
2896 		index->stat_n_diff_key_vals[n_pfx - 1] = stat_value;
2897 
2898 		if (sample_size != UINT64_UNDEFINED) {
2899 			index->stat_n_sample_sizes[n_pfx - 1] = sample_size;
2900 		} else {
2901 			/* hmm, strange... the user must have UPDATEd the
2902 			table manually and SET sample_size = NULL */
2903 			index->stat_n_sample_sizes[n_pfx - 1] = 0;
2904 		}
2905 
2906 		index->stat_n_non_null_key_vals[n_pfx - 1] = 0;
2907 
2908 		arg->stats_were_modified = true;
2909 	} else {
2910 		/* silently ignore rows with unknown stat_name, the
2911 		user may have developed her own stats */
2912 	}
2913 
2914 	/* XXX this is not used but returning non-NULL is necessary */
2915 	return(TRUE);
2916 }
2917 
2918 /*********************************************************************//**
2919 Read table's statistics from the persistent statistics storage.
2920 @return DB_SUCCESS or error code */
2921 static
2922 dberr_t
dict_stats_fetch_from_ps(dict_table_t * table)2923 dict_stats_fetch_from_ps(
2924 /*=====================*/
2925 	dict_table_t*	table)	/*!< in/out: table */
2926 {
2927 	index_fetch_t	index_fetch_arg;
2928 	trx_t*		trx;
2929 	pars_info_t*	pinfo;
2930 	dberr_t		ret;
2931 	char		db_utf8[MAX_DB_UTF8_LEN];
2932 	char		table_utf8[MAX_TABLE_UTF8_LEN];
2933 
2934 	ut_ad(!mutex_own(&dict_sys->mutex));
2935 
2936 	/* Initialize all stats to dummy values before fetching because if
2937 	the persistent storage contains incomplete stats (e.g. missing stats
2938 	for some index) then we would end up with (partially) uninitialized
2939 	stats. */
2940 	dict_stats_empty_table(table);
2941 
2942 	trx = trx_allocate_for_background();
2943 
2944 	/* Use 'read-uncommitted' so that the SELECTs we execute
2945 	do not get blocked in case some user has locked the rows we
2946 	are SELECTing */
2947 
2948 	trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
2949 
2950 	trx_start_if_not_started(trx);
2951 
2952 	dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8),
2953 		     table_utf8, sizeof(table_utf8));
2954 
2955 	pinfo = pars_info_create();
2956 
2957 	pars_info_add_str_literal(pinfo, "database_name", db_utf8);
2958 
2959 	pars_info_add_str_literal(pinfo, "table_name", table_utf8);
2960 
2961 	pars_info_bind_function(pinfo,
2962 			       "fetch_table_stats_step",
2963 			       dict_stats_fetch_table_stats_step,
2964 			       table);
2965 
2966 	index_fetch_arg.table = table;
2967 	index_fetch_arg.stats_were_modified = false;
2968 	pars_info_bind_function(pinfo,
2969 			        "fetch_index_stats_step",
2970 			        dict_stats_fetch_index_stats_step,
2971 			        &index_fetch_arg);
2972 
2973 	ret = que_eval_sql(pinfo,
2974 			   "PROCEDURE FETCH_STATS () IS\n"
2975 			   "found INT;\n"
2976 			   "DECLARE FUNCTION fetch_table_stats_step;\n"
2977 			   "DECLARE FUNCTION fetch_index_stats_step;\n"
2978 			   "DECLARE CURSOR table_stats_cur IS\n"
2979 			   "  SELECT\n"
2980 			   /* if you change the selected fields, be
2981 			   sure to adjust
2982 			   dict_stats_fetch_table_stats_step() */
2983 			   "  n_rows,\n"
2984 			   "  clustered_index_size,\n"
2985 			   "  sum_of_other_index_sizes\n"
2986 			   "  FROM \"" TABLE_STATS_NAME "\"\n"
2987 			   "  WHERE\n"
2988 			   "  database_name = :database_name AND\n"
2989 			   "  table_name = :table_name;\n"
2990 			   "DECLARE CURSOR index_stats_cur IS\n"
2991 			   "  SELECT\n"
2992 			   /* if you change the selected fields, be
2993 			   sure to adjust
2994 			   dict_stats_fetch_index_stats_step() */
2995 			   "  index_name,\n"
2996 			   "  stat_name,\n"
2997 			   "  stat_value,\n"
2998 			   "  sample_size\n"
2999 			   "  FROM \"" INDEX_STATS_NAME "\"\n"
3000 			   "  WHERE\n"
3001 			   "  database_name = :database_name AND\n"
3002 			   "  table_name = :table_name;\n"
3003 
3004 			   "BEGIN\n"
3005 
3006 			   "OPEN table_stats_cur;\n"
3007 			   "FETCH table_stats_cur INTO\n"
3008 			   "  fetch_table_stats_step();\n"
3009 			   "IF (SQL % NOTFOUND) THEN\n"
3010 			   "  CLOSE table_stats_cur;\n"
3011 			   "  RETURN;\n"
3012 			   "END IF;\n"
3013 			   "CLOSE table_stats_cur;\n"
3014 
3015 			   "OPEN index_stats_cur;\n"
3016 			   "found := 1;\n"
3017 			   "WHILE found = 1 LOOP\n"
3018 			   "  FETCH index_stats_cur INTO\n"
3019 			   "    fetch_index_stats_step();\n"
3020 			   "  IF (SQL % NOTFOUND) THEN\n"
3021 			   "    found := 0;\n"
3022 			   "  END IF;\n"
3023 			   "END LOOP;\n"
3024 			   "CLOSE index_stats_cur;\n"
3025 
3026 			   "END;",
3027 			   TRUE, trx);
3028 	/* pinfo is freed by que_eval_sql() */
3029 
3030 	trx_commit_for_mysql(trx);
3031 
3032 	trx_free_for_background(trx);
3033 
3034 	if (!index_fetch_arg.stats_were_modified) {
3035 		return(DB_STATS_DO_NOT_EXIST);
3036 	}
3037 
3038 	return(ret);
3039 }
3040 
3041 /*********************************************************************//**
3042 Fetches or calculates new estimates for index statistics. */
3043 UNIV_INTERN
3044 void
dict_stats_update_for_index(dict_index_t * index)3045 dict_stats_update_for_index(
3046 /*========================*/
3047 	dict_index_t*	index)	/*!< in/out: index */
3048 {
3049 	DBUG_ENTER("dict_stats_update_for_index");
3050 
3051 	ut_ad(!mutex_own(&dict_sys->mutex));
3052 
3053 	if (dict_stats_is_persistent_enabled(index->table)) {
3054 
3055 		if (dict_stats_persistent_storage_check(false)) {
3056 			dict_table_stats_lock(index->table, RW_X_LATCH);
3057 			dict_stats_analyze_index(index);
3058 			index->table->stat_sum_of_other_index_sizes += index->stat_index_size;
3059 			dict_table_stats_unlock(index->table, RW_X_LATCH);
3060 			dict_stats_save(index->table, &index->id);
3061 			DBUG_VOID_RETURN;
3062 		}
3063 		/* else */
3064 
3065 		/* Fall back to transient stats since the persistent
3066 		storage is not present or is corrupted */
3067 		char	buf_table[MAX_FULL_NAME_LEN];
3068 		char	buf_index[MAX_FULL_NAME_LEN];
3069 		ut_print_timestamp(stderr);
3070 		fprintf(stderr,
3071 			" InnoDB: Recalculation of persistent statistics "
3072 			"requested for table %s index %s but the required "
3073 			"persistent statistics storage is not present or is "
3074 			"corrupted. Using transient stats instead.\n",
3075 			ut_format_name(index->table->name, TRUE,
3076 				       buf_table, sizeof(buf_table)),
3077 			ut_format_name(index->name, FALSE,
3078 				       buf_index, sizeof(buf_index)));
3079 	}
3080 
3081 	dict_table_stats_lock(index->table, RW_X_LATCH);
3082 	dict_stats_update_transient_for_index(index);
3083 	dict_table_stats_unlock(index->table, RW_X_LATCH);
3084 
3085 	DBUG_VOID_RETURN;
3086 }
3087 
3088 /*********************************************************************//**
3089 Calculates new estimates for table and index statistics. The statistics
3090 are used in query optimization.
3091 @return DB_SUCCESS or error code */
3092 UNIV_INTERN
3093 dberr_t
dict_stats_update(dict_table_t * table,dict_stats_upd_option_t stats_upd_option)3094 dict_stats_update(
3095 /*==============*/
3096 	dict_table_t*		table,	/*!< in/out: table */
3097 	dict_stats_upd_option_t	stats_upd_option)
3098 					/*!< in: whether to (re) calc
3099 					the stats or to fetch them from
3100 					the persistent statistics
3101 					storage */
3102 {
3103 	char			buf[MAX_FULL_NAME_LEN];
3104 
3105 	ut_ad(!mutex_own(&dict_sys->mutex));
3106 
3107 	if (table->ibd_file_missing) {
3108 		ut_print_timestamp(stderr);
3109 		fprintf(stderr,
3110 			" InnoDB: cannot calculate statistics for table %s "
3111 			"because the .ibd file is missing. For help, please "
3112 			"refer to " REFMAN "innodb-troubleshooting.html\n",
3113 			ut_format_name(table->name, TRUE, buf, sizeof(buf)));
3114 		dict_stats_empty_table(table);
3115 		return(DB_TABLESPACE_DELETED);
3116 	} else if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
3117 		/* If we have set a high innodb_force_recovery level, do
3118 		not calculate statistics, as a badly corrupted index can
3119 		cause a crash in it. */
3120 		dict_stats_empty_table(table);
3121 		return(DB_SUCCESS);
3122 	}
3123 
3124 	switch (stats_upd_option) {
3125 	case DICT_STATS_RECALC_PERSISTENT:
3126 
3127 		if (srv_read_only_mode) {
3128 			goto transient;
3129 		}
3130 
3131 		/* Persistent recalculation requested, called from
3132 		1) ANALYZE TABLE, or
3133 		2) the auto recalculation background thread, or
3134 		3) open table if stats do not exist on disk and auto recalc
3135 		   is enabled */
3136 
3137 		/* InnoDB internal tables (e.g. SYS_TABLES) cannot have
3138 		persistent stats enabled */
3139 		ut_a(strchr(table->name, '/') != NULL);
3140 
3141 		/* check if the persistent statistics storage exists
3142 		before calling the potentially slow function
3143 		dict_stats_update_persistent(); that is a
3144 		prerequisite for dict_stats_save() succeeding */
3145 		if (dict_stats_persistent_storage_check(false)) {
3146 
3147 			dberr_t	err;
3148 
3149 			err = dict_stats_update_persistent(table);
3150 
3151 			if (err != DB_SUCCESS) {
3152 				return(err);
3153 			}
3154 
3155 			err = dict_stats_save(table, NULL);
3156 
3157 			return(err);
3158 		}
3159 
3160 		/* Fall back to transient stats since the persistent
3161 		storage is not present or is corrupted */
3162 
3163 		ut_print_timestamp(stderr);
3164 		fprintf(stderr,
3165 			" InnoDB: Recalculation of persistent statistics "
3166 			"requested for table %s but the required persistent "
3167 			"statistics storage is not present or is corrupted. "
3168 			"Using transient stats instead.\n",
3169 			ut_format_name(table->name, TRUE, buf, sizeof(buf)));
3170 
3171 		goto transient;
3172 
3173 	case DICT_STATS_RECALC_TRANSIENT:
3174 
3175 		goto transient;
3176 
3177 	case DICT_STATS_EMPTY_TABLE:
3178 
3179 		dict_stats_empty_table(table);
3180 
3181 		/* If table is using persistent stats,
3182 		then save the stats on disk */
3183 
3184 		if (dict_stats_is_persistent_enabled(table)) {
3185 
3186 			if (dict_stats_persistent_storage_check(false)) {
3187 
3188 				return(dict_stats_save(table, NULL));
3189 			}
3190 
3191 			return(DB_STATS_DO_NOT_EXIST);
3192 		}
3193 
3194 		return(DB_SUCCESS);
3195 
3196 	case DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY:
3197 
3198 		/* fetch requested, either fetch from persistent statistics
3199 		storage or use the old method */
3200 
3201 		if (table->stat_initialized) {
3202 			return(DB_SUCCESS);
3203 		}
3204 
3205 		/* InnoDB internal tables (e.g. SYS_TABLES) cannot have
3206 		persistent stats enabled */
3207 		ut_a(strchr(table->name, '/') != NULL);
3208 
3209 		if (!dict_stats_persistent_storage_check(false)) {
3210 			/* persistent statistics storage does not exist
3211 			or is corrupted, calculate the transient stats */
3212 
3213 			ut_print_timestamp(stderr);
3214 			fprintf(stderr,
3215 				" InnoDB: Error: Fetch of persistent "
3216 				"statistics requested for table %s but the "
3217 				"required system tables %s and %s are not "
3218 				"present or have unexpected structure. "
3219 				"Using transient stats instead.\n",
3220 				ut_format_name(table->name, TRUE,
3221 					       buf, sizeof(buf)),
3222 				TABLE_STATS_NAME_PRINT,
3223 				INDEX_STATS_NAME_PRINT);
3224 
3225 			goto transient;
3226 		}
3227 
3228 		dict_table_t*	t;
3229 
3230 		/* Create a dummy table object with the same name and
3231 		indexes, suitable for fetching the stats into it. */
3232 		t = dict_stats_table_clone_create(table);
3233 
3234 		dberr_t	err = dict_stats_fetch_from_ps(t);
3235 
3236 		t->stats_last_recalc = table->stats_last_recalc;
3237 		t->stat_modified_counter = 0;
3238 
3239 		switch (err) {
3240 		case DB_SUCCESS:
3241 
3242 			dict_table_stats_lock(table, RW_X_LATCH);
3243 
3244 			dict_stats_copy(table, t);
3245 
3246 			dict_stats_assert_initialized(table);
3247 
3248 			dict_table_stats_unlock(table, RW_X_LATCH);
3249 
3250 			dict_stats_table_clone_free(t);
3251 
3252 			return(DB_SUCCESS);
3253 		case DB_STATS_DO_NOT_EXIST:
3254 
3255 			dict_stats_table_clone_free(t);
3256 
3257 			if (srv_read_only_mode) {
3258 				goto transient;
3259 			}
3260 
3261 			if (dict_stats_auto_recalc_is_enabled(table)) {
3262 				return(dict_stats_update(
3263 						table,
3264 						DICT_STATS_RECALC_PERSISTENT));
3265 			}
3266 
3267 			ut_format_name(table->name, TRUE, buf, sizeof(buf));
3268 			ut_print_timestamp(stderr);
3269 			fprintf(stderr,
3270 				" InnoDB: Trying to use table %s which has "
3271 				"persistent statistics enabled, but auto "
3272 				"recalculation turned off and the statistics "
3273 				"do not exist in %s and %s. Please either run "
3274 				"\"ANALYZE TABLE %s;\" manually or enable the "
3275 				"auto recalculation with "
3276 				"\"ALTER TABLE %s STATS_AUTO_RECALC=1;\". "
3277 				"InnoDB will now use transient statistics for "
3278 				"%s.\n",
3279 				buf, TABLE_STATS_NAME, INDEX_STATS_NAME, buf,
3280 				buf, buf);
3281 
3282 			goto transient;
3283 		default:
3284 
3285 			dict_stats_table_clone_free(t);
3286 
3287 			ut_print_timestamp(stderr);
3288 			fprintf(stderr,
3289 				" InnoDB: Error fetching persistent statistics "
3290 				"for table %s from %s and %s: %s. "
3291 				"Using transient stats method instead.\n",
3292 				ut_format_name(table->name, TRUE, buf,
3293 					       sizeof(buf)),
3294 				TABLE_STATS_NAME,
3295 				INDEX_STATS_NAME,
3296 				ut_strerr(err));
3297 
3298 			goto transient;
3299 		}
3300 	/* no "default:" in order to produce a compilation warning
3301 	about unhandled enumeration value */
3302 	}
3303 
3304 transient:
3305 
3306 	dict_table_stats_lock(table, RW_X_LATCH);
3307 
3308 	dict_stats_update_transient(table);
3309 
3310 	dict_table_stats_unlock(table, RW_X_LATCH);
3311 
3312 	return(DB_SUCCESS);
3313 }
3314 
3315 /*********************************************************************//**
3316 Removes the information for a particular index's stats from the persistent
3317 storage if it exists and if there is data stored for this index.
3318 This function creates its own trx and commits it.
3319 A note from Marko why we cannot edit user and sys_* tables in one trx:
3320 marko: The problem is that ibuf merges should be disabled while we are
3321 rolling back dict transactions.
3322 marko: If ibuf merges are not disabled, we need to scan the *.ibd files.
3323 But we shouldn't open *.ibd files before we have rolled back dict
3324 transactions and opened the SYS_* records for the *.ibd files.
3325 @return DB_SUCCESS or error code */
3326 UNIV_INTERN
3327 dberr_t
dict_stats_drop_index(const char * db_and_table,const char * iname,char * errstr,ulint errstr_sz)3328 dict_stats_drop_index(
3329 /*==================*/
3330 	const char*	db_and_table,/*!< in: db and table, e.g. 'db/table' */
3331 	const char*	iname,	/*!< in: index name */
3332 	char*		errstr, /*!< out: error message if != DB_SUCCESS
3333 				is returned */
3334 	ulint		errstr_sz)/*!< in: size of the errstr buffer */
3335 {
3336 	char		db_utf8[MAX_DB_UTF8_LEN];
3337 	char		table_utf8[MAX_TABLE_UTF8_LEN];
3338 	pars_info_t*	pinfo;
3339 	dberr_t		ret;
3340 
3341 	ut_ad(!mutex_own(&dict_sys->mutex));
3342 
3343 	/* skip indexes whose table names do not contain a database name
3344 	e.g. if we are dropping an index from SYS_TABLES */
3345 	if (strchr(db_and_table, '/') == NULL) {
3346 
3347 		return(DB_SUCCESS);
3348 	}
3349 
3350 	dict_fs2utf8(db_and_table, db_utf8, sizeof(db_utf8),
3351 		     table_utf8, sizeof(table_utf8));
3352 
3353 	pinfo = pars_info_create();
3354 
3355 	pars_info_add_str_literal(pinfo, "database_name", db_utf8);
3356 
3357 	pars_info_add_str_literal(pinfo, "table_name", table_utf8);
3358 
3359 	pars_info_add_str_literal(pinfo, "index_name", iname);
3360 
3361 	rw_lock_x_lock(&dict_operation_lock);
3362 	mutex_enter(&dict_sys->mutex);
3363 
3364 	ret = dict_stats_exec_sql(
3365 		pinfo,
3366 		"PROCEDURE DROP_INDEX_STATS () IS\n"
3367 		"BEGIN\n"
3368 		"DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
3369 		"database_name = :database_name AND\n"
3370 		"table_name = :table_name AND\n"
3371 		"index_name = :index_name;\n"
3372 		"END;\n", NULL);
3373 
3374 	mutex_exit(&dict_sys->mutex);
3375 	rw_lock_x_unlock(&dict_operation_lock);
3376 
3377 	if (ret == DB_STATS_DO_NOT_EXIST) {
3378 		ret = DB_SUCCESS;
3379 	}
3380 
3381 	if (ret != DB_SUCCESS) {
3382 		ut_snprintf(errstr, errstr_sz,
3383 			    "Unable to delete statistics for index %s "
3384 			    "from %s%s: %s. They can be deleted later using "
3385 			    "DELETE FROM %s WHERE "
3386 			    "database_name = '%s' AND "
3387 			    "table_name = '%s' AND "
3388 			    "index_name = '%s';",
3389 			    iname,
3390 			    INDEX_STATS_NAME_PRINT,
3391 			    (ret == DB_LOCK_WAIT_TIMEOUT
3392 			     ? " because the rows are locked"
3393 			     : ""),
3394 			    ut_strerr(ret),
3395 			    INDEX_STATS_NAME_PRINT,
3396 			    db_utf8,
3397 			    table_utf8,
3398 			    iname);
3399 
3400 		ut_print_timestamp(stderr);
3401 		fprintf(stderr, " InnoDB: %s\n", errstr);
3402 	}
3403 
3404 	return(ret);
3405 }
3406 
3407 /*********************************************************************//**
3408 Executes
3409 DELETE FROM mysql.innodb_table_stats
3410 WHERE database_name = '...' AND table_name = '...';
3411 Creates its own transaction and commits it.
3412 @return DB_SUCCESS or error code */
3413 UNIV_INLINE
3414 dberr_t
dict_stats_delete_from_table_stats(const char * database_name,const char * table_name)3415 dict_stats_delete_from_table_stats(
3416 /*===============================*/
3417 	const char*	database_name,	/*!< in: database name, e.g. 'db' */
3418 	const char*	table_name)	/*!< in: table name, e.g. 'table' */
3419 {
3420 	pars_info_t*	pinfo;
3421 	dberr_t		ret;
3422 
3423 #ifdef UNIV_SYNC_DEBUG
3424 	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
3425 #endif /* UNIV_SYNC_DEBUG */
3426 	ut_ad(mutex_own(&dict_sys->mutex));
3427 
3428 	pinfo = pars_info_create();
3429 
3430 	pars_info_add_str_literal(pinfo, "database_name", database_name);
3431 	pars_info_add_str_literal(pinfo, "table_name", table_name);
3432 
3433 	ret = dict_stats_exec_sql(
3434 		pinfo,
3435 		"PROCEDURE DELETE_FROM_TABLE_STATS () IS\n"
3436 		"BEGIN\n"
3437 		"DELETE FROM \"" TABLE_STATS_NAME "\" WHERE\n"
3438 		"database_name = :database_name AND\n"
3439 		"table_name = :table_name;\n"
3440 		"END;\n", NULL);
3441 
3442 	return(ret);
3443 }
3444 
3445 /*********************************************************************//**
3446 Executes
3447 DELETE FROM mysql.innodb_index_stats
3448 WHERE database_name = '...' AND table_name = '...';
3449 Creates its own transaction and commits it.
3450 @return DB_SUCCESS or error code */
3451 UNIV_INLINE
3452 dberr_t
dict_stats_delete_from_index_stats(const char * database_name,const char * table_name)3453 dict_stats_delete_from_index_stats(
3454 /*===============================*/
3455 	const char*	database_name,	/*!< in: database name, e.g. 'db' */
3456 	const char*	table_name)	/*!< in: table name, e.g. 'table' */
3457 {
3458 	pars_info_t*	pinfo;
3459 	dberr_t		ret;
3460 
3461 #ifdef UNIV_SYNC_DEBUG
3462 	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
3463 #endif /* UNIV_SYNC_DEBUG */
3464 	ut_ad(mutex_own(&dict_sys->mutex));
3465 
3466 	pinfo = pars_info_create();
3467 
3468 	pars_info_add_str_literal(pinfo, "database_name", database_name);
3469 	pars_info_add_str_literal(pinfo, "table_name", table_name);
3470 
3471 	ret = dict_stats_exec_sql(
3472 		pinfo,
3473 		"PROCEDURE DELETE_FROM_INDEX_STATS () IS\n"
3474 		"BEGIN\n"
3475 		"DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
3476 		"database_name = :database_name AND\n"
3477 		"table_name = :table_name;\n"
3478 		"END;\n", NULL);
3479 
3480 	return(ret);
3481 }
3482 
3483 /*********************************************************************//**
3484 Removes the statistics for a table and all of its indexes from the
3485 persistent statistics storage if it exists and if there is data stored for
3486 the table. This function creates its own transaction and commits it.
3487 @return DB_SUCCESS or error code */
3488 UNIV_INTERN
3489 dberr_t
dict_stats_drop_table(const char * db_and_table,char * errstr,ulint errstr_sz)3490 dict_stats_drop_table(
3491 /*==================*/
3492 	const char*	db_and_table,	/*!< in: db and table, e.g. 'db/table' */
3493 	char*		errstr,		/*!< out: error message
3494 					if != DB_SUCCESS is returned */
3495 	ulint		errstr_sz)	/*!< in: size of errstr buffer */
3496 {
3497 	char		db_utf8[MAX_DB_UTF8_LEN];
3498 	char		table_utf8[MAX_TABLE_UTF8_LEN];
3499 	dberr_t		ret;
3500 
3501 #ifdef UNIV_SYNC_DEBUG
3502 	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
3503 #endif /* UNIV_SYNC_DEBUG */
3504 	ut_ad(mutex_own(&dict_sys->mutex));
3505 
3506 	/* skip tables that do not contain a database name
3507 	e.g. if we are dropping SYS_TABLES */
3508 	if (strchr(db_and_table, '/') == NULL) {
3509 
3510 		return(DB_SUCCESS);
3511 	}
3512 
3513 	/* skip innodb_table_stats and innodb_index_stats themselves */
3514 	if (strcmp(db_and_table, TABLE_STATS_NAME) == 0
3515 	    || strcmp(db_and_table, INDEX_STATS_NAME) == 0) {
3516 
3517 		return(DB_SUCCESS);
3518 	}
3519 
3520 	dict_fs2utf8(db_and_table, db_utf8, sizeof(db_utf8),
3521 		     table_utf8, sizeof(table_utf8));
3522 
3523 	ret = dict_stats_delete_from_table_stats(db_utf8, table_utf8);
3524 
3525 	if (ret == DB_SUCCESS) {
3526 		ret = dict_stats_delete_from_index_stats(db_utf8, table_utf8);
3527 	}
3528 
3529 	if (ret == DB_STATS_DO_NOT_EXIST) {
3530 		ret = DB_SUCCESS;
3531 	}
3532 
3533 	if (ret != DB_SUCCESS) {
3534 
3535 		ut_snprintf(errstr, errstr_sz,
3536 			    "Unable to delete statistics for table %s.%s: %s. "
3537 			    "They can be deleted later using "
3538 
3539 			    "DELETE FROM %s WHERE "
3540 			    "database_name = '%s' AND "
3541 			    "table_name = '%s'; "
3542 
3543 			    "DELETE FROM %s WHERE "
3544 			    "database_name = '%s' AND "
3545 			    "table_name = '%s';",
3546 
3547 			    db_utf8, table_utf8,
3548 			    ut_strerr(ret),
3549 
3550 			    INDEX_STATS_NAME_PRINT,
3551 			    db_utf8, table_utf8,
3552 
3553 			    TABLE_STATS_NAME_PRINT,
3554 			    db_utf8, table_utf8);
3555 	}
3556 
3557 	return(ret);
3558 }
3559 
3560 /*********************************************************************//**
3561 Executes
3562 UPDATE mysql.innodb_table_stats SET
3563 database_name = '...', table_name = '...'
3564 WHERE database_name = '...' AND table_name = '...';
3565 Creates its own transaction and commits it.
3566 @return DB_SUCCESS or error code */
3567 UNIV_INLINE
3568 dberr_t
dict_stats_rename_in_table_stats(const char * old_dbname_utf8,const char * old_tablename_utf8,const char * new_dbname_utf8,const char * new_tablename_utf8)3569 dict_stats_rename_in_table_stats(
3570 /*=============================*/
3571 	const char*	old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */
3572 	const char*	old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */
3573 	const char*	new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */
3574 	const char*	new_tablename_utf8)/*!< in: table name, e.g. 'newtable' */
3575 {
3576 	pars_info_t*	pinfo;
3577 	dberr_t		ret;
3578 
3579 #ifdef UNIV_SYNC_DEBUG
3580 	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
3581 #endif /* UNIV_SYNC_DEBUG */
3582 	ut_ad(mutex_own(&dict_sys->mutex));
3583 
3584 	pinfo = pars_info_create();
3585 
3586 	pars_info_add_str_literal(pinfo, "old_dbname_utf8", old_dbname_utf8);
3587 	pars_info_add_str_literal(pinfo, "old_tablename_utf8", old_tablename_utf8);
3588 	pars_info_add_str_literal(pinfo, "new_dbname_utf8", new_dbname_utf8);
3589 	pars_info_add_str_literal(pinfo, "new_tablename_utf8", new_tablename_utf8);
3590 
3591 	ret = dict_stats_exec_sql(
3592 		pinfo,
3593 		"PROCEDURE RENAME_IN_TABLE_STATS () IS\n"
3594 		"BEGIN\n"
3595 		"UPDATE \"" TABLE_STATS_NAME "\" SET\n"
3596 		"database_name = :new_dbname_utf8,\n"
3597 		"table_name = :new_tablename_utf8\n"
3598 		"WHERE\n"
3599 		"database_name = :old_dbname_utf8 AND\n"
3600 		"table_name = :old_tablename_utf8;\n"
3601 		"END;\n", NULL);
3602 
3603 	return(ret);
3604 }
3605 
3606 /*********************************************************************//**
3607 Executes
3608 UPDATE mysql.innodb_index_stats SET
3609 database_name = '...', table_name = '...'
3610 WHERE database_name = '...' AND table_name = '...';
3611 Creates its own transaction and commits it.
3612 @return DB_SUCCESS or error code */
3613 UNIV_INLINE
3614 dberr_t
dict_stats_rename_in_index_stats(const char * old_dbname_utf8,const char * old_tablename_utf8,const char * new_dbname_utf8,const char * new_tablename_utf8)3615 dict_stats_rename_in_index_stats(
3616 /*=============================*/
3617 	const char*	old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */
3618 	const char*	old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */
3619 	const char*	new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */
3620 	const char*	new_tablename_utf8)/*!< in: table name, e.g. 'newtable' */
3621 {
3622 	pars_info_t*	pinfo;
3623 	dberr_t		ret;
3624 
3625 #ifdef UNIV_SYNC_DEBUG
3626 	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
3627 #endif /* UNIV_SYNC_DEBUG */
3628 	ut_ad(mutex_own(&dict_sys->mutex));
3629 
3630 	pinfo = pars_info_create();
3631 
3632 	pars_info_add_str_literal(pinfo, "old_dbname_utf8", old_dbname_utf8);
3633 	pars_info_add_str_literal(pinfo, "old_tablename_utf8", old_tablename_utf8);
3634 	pars_info_add_str_literal(pinfo, "new_dbname_utf8", new_dbname_utf8);
3635 	pars_info_add_str_literal(pinfo, "new_tablename_utf8", new_tablename_utf8);
3636 
3637 	ret = dict_stats_exec_sql(
3638 		pinfo,
3639 		"PROCEDURE RENAME_IN_INDEX_STATS () IS\n"
3640 		"BEGIN\n"
3641 		"UPDATE \"" INDEX_STATS_NAME "\" SET\n"
3642 		"database_name = :new_dbname_utf8,\n"
3643 		"table_name = :new_tablename_utf8\n"
3644 		"WHERE\n"
3645 		"database_name = :old_dbname_utf8 AND\n"
3646 		"table_name = :old_tablename_utf8;\n"
3647 		"END;\n", NULL);
3648 
3649 	return(ret);
3650 }
3651 
3652 /*********************************************************************//**
3653 Renames a table in InnoDB persistent stats storage.
3654 This function creates its own transaction and commits it.
3655 @return DB_SUCCESS or error code */
3656 UNIV_INTERN
3657 dberr_t
dict_stats_rename_table(const char * old_name,const char * new_name,char * errstr,size_t errstr_sz)3658 dict_stats_rename_table(
3659 /*====================*/
3660 	const char*	old_name,	/*!< in: old name, e.g. 'db/table' */
3661 	const char*	new_name,	/*!< in: new name, e.g. 'db/table' */
3662 	char*		errstr,		/*!< out: error string if != DB_SUCCESS
3663 					is returned */
3664 	size_t		errstr_sz)	/*!< in: errstr size */
3665 {
3666 	char		old_db_utf8[MAX_DB_UTF8_LEN];
3667 	char		new_db_utf8[MAX_DB_UTF8_LEN];
3668 	char		old_table_utf8[MAX_TABLE_UTF8_LEN];
3669 	char		new_table_utf8[MAX_TABLE_UTF8_LEN];
3670 	dberr_t		ret;
3671 
3672 #ifdef UNIV_SYNC_DEBUG
3673 	ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
3674 #endif /* UNIV_SYNC_DEBUG */
3675 	ut_ad(!mutex_own(&dict_sys->mutex));
3676 
3677 	/* skip innodb_table_stats and innodb_index_stats themselves */
3678 	if (strcmp(old_name, TABLE_STATS_NAME) == 0
3679 	    || strcmp(old_name, INDEX_STATS_NAME) == 0
3680 	    || strcmp(new_name, TABLE_STATS_NAME) == 0
3681 	    || strcmp(new_name, INDEX_STATS_NAME) == 0) {
3682 
3683 		return(DB_SUCCESS);
3684 	}
3685 
3686 	dict_fs2utf8(old_name, old_db_utf8, sizeof(old_db_utf8),
3687 		     old_table_utf8, sizeof(old_table_utf8));
3688 
3689 	dict_fs2utf8(new_name, new_db_utf8, sizeof(new_db_utf8),
3690 		     new_table_utf8, sizeof(new_table_utf8));
3691 
3692 	rw_lock_x_lock(&dict_operation_lock);
3693 	mutex_enter(&dict_sys->mutex);
3694 
3695 	ulint	n_attempts = 0;
3696 	do {
3697 		n_attempts++;
3698 
3699 		ret = dict_stats_rename_in_table_stats(
3700 			old_db_utf8, old_table_utf8,
3701 			new_db_utf8, new_table_utf8);
3702 
3703 		if (ret == DB_DUPLICATE_KEY) {
3704 			dict_stats_delete_from_table_stats(
3705 				new_db_utf8, new_table_utf8);
3706 		}
3707 
3708 		if (ret == DB_STATS_DO_NOT_EXIST) {
3709 			ret = DB_SUCCESS;
3710 		}
3711 
3712 		if (ret != DB_SUCCESS) {
3713 			mutex_exit(&dict_sys->mutex);
3714 			rw_lock_x_unlock(&dict_operation_lock);
3715 			os_thread_sleep(200000 /* 0.2 sec */);
3716 			rw_lock_x_lock(&dict_operation_lock);
3717 			mutex_enter(&dict_sys->mutex);
3718 		}
3719 	} while ((ret == DB_DEADLOCK
3720 		  || ret == DB_DUPLICATE_KEY
3721 		  || ret == DB_LOCK_WAIT_TIMEOUT)
3722 		 && n_attempts < 5);
3723 
3724 	if (ret != DB_SUCCESS) {
3725 		ut_snprintf(errstr, errstr_sz,
3726 			    "Unable to rename statistics from "
3727 			    "%s.%s to %s.%s in %s: %s. "
3728 			    "They can be renamed later using "
3729 
3730 			    "UPDATE %s SET "
3731 			    "database_name = '%s', "
3732 			    "table_name = '%s' "
3733 			    "WHERE "
3734 			    "database_name = '%s' AND "
3735 			    "table_name = '%s';",
3736 
3737 			    old_db_utf8, old_table_utf8,
3738 			    new_db_utf8, new_table_utf8,
3739 			    TABLE_STATS_NAME_PRINT,
3740 			    ut_strerr(ret),
3741 
3742 			    TABLE_STATS_NAME_PRINT,
3743 			    new_db_utf8, new_table_utf8,
3744 			    old_db_utf8, old_table_utf8);
3745 		mutex_exit(&dict_sys->mutex);
3746 		rw_lock_x_unlock(&dict_operation_lock);
3747 		return(ret);
3748 	}
3749 	/* else */
3750 
3751 	n_attempts = 0;
3752 	do {
3753 		n_attempts++;
3754 
3755 		ret = dict_stats_rename_in_index_stats(
3756 			old_db_utf8, old_table_utf8,
3757 			new_db_utf8, new_table_utf8);
3758 
3759 		if (ret == DB_DUPLICATE_KEY) {
3760 			dict_stats_delete_from_index_stats(
3761 				new_db_utf8, new_table_utf8);
3762 		}
3763 
3764 		if (ret == DB_STATS_DO_NOT_EXIST) {
3765 			ret = DB_SUCCESS;
3766 		}
3767 
3768 		if (ret != DB_SUCCESS) {
3769 			mutex_exit(&dict_sys->mutex);
3770 			rw_lock_x_unlock(&dict_operation_lock);
3771 			os_thread_sleep(200000 /* 0.2 sec */);
3772 			rw_lock_x_lock(&dict_operation_lock);
3773 			mutex_enter(&dict_sys->mutex);
3774 		}
3775 	} while ((ret == DB_DEADLOCK
3776 		  || ret == DB_DUPLICATE_KEY
3777 		  || ret == DB_LOCK_WAIT_TIMEOUT)
3778 		 && n_attempts < 5);
3779 
3780 	mutex_exit(&dict_sys->mutex);
3781 	rw_lock_x_unlock(&dict_operation_lock);
3782 
3783 	if (ret != DB_SUCCESS) {
3784 		ut_snprintf(errstr, errstr_sz,
3785 			    "Unable to rename statistics from "
3786 			    "%s.%s to %s.%s in %s: %s. "
3787 			    "They can be renamed later using "
3788 
3789 			    "UPDATE %s SET "
3790 			    "database_name = '%s', "
3791 			    "table_name = '%s' "
3792 			    "WHERE "
3793 			    "database_name = '%s' AND "
3794 			    "table_name = '%s';",
3795 
3796 			    old_db_utf8, old_table_utf8,
3797 			    new_db_utf8, new_table_utf8,
3798 			    INDEX_STATS_NAME_PRINT,
3799 			    ut_strerr(ret),
3800 
3801 			    INDEX_STATS_NAME_PRINT,
3802 			    new_db_utf8, new_table_utf8,
3803 			    old_db_utf8, old_table_utf8);
3804 	}
3805 
3806 	return(ret);
3807 }
3808 
3809 /* tests @{ */
3810 #ifdef UNIV_COMPILE_TEST_FUNCS
3811 
3812 /* The following unit tests test some of the functions in this file
3813 individually, such testing cannot be performed by the mysql-test framework
3814 via SQL. */
3815 
3816 /* test_dict_table_schema_check() @{ */
3817 void
test_dict_table_schema_check()3818 test_dict_table_schema_check()
3819 {
3820 	/*
3821 	CREATE TABLE tcheck (
3822 		c01 VARCHAR(123),
3823 		c02 INT,
3824 		c03 INT NOT NULL,
3825 		c04 INT UNSIGNED,
3826 		c05 BIGINT,
3827 		c06 BIGINT UNSIGNED NOT NULL,
3828 		c07 TIMESTAMP
3829 	) ENGINE=INNODB;
3830 	*/
3831 	/* definition for the table 'test/tcheck' */
3832 	dict_col_meta_t	columns[] = {
3833 		{"c01", DATA_VARCHAR, 0, 123},
3834 		{"c02", DATA_INT, 0, 4},
3835 		{"c03", DATA_INT, DATA_NOT_NULL, 4},
3836 		{"c04", DATA_INT, DATA_UNSIGNED, 4},
3837 		{"c05", DATA_INT, 0, 8},
3838 		{"c06", DATA_INT, DATA_NOT_NULL | DATA_UNSIGNED, 8},
3839 		{"c07", DATA_INT, 0, 4},
3840 		{"c_extra", DATA_INT, 0, 4}
3841 	};
3842 	dict_table_schema_t	schema = {
3843 		"test/tcheck",
3844 		0 /* will be set individually for each test below */,
3845 		columns
3846 	};
3847 	char	errstr[512];
3848 
3849 	ut_snprintf(errstr, sizeof(errstr), "Table not found");
3850 
3851 	/* prevent any data dictionary modifications while we are checking
3852 	the tables' structure */
3853 
3854 	mutex_enter(&(dict_sys->mutex));
3855 
3856 	/* check that a valid table is reported as valid */
3857 	schema.n_cols = 7;
3858 	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3859 	    == DB_SUCCESS) {
3860 		printf("OK: test.tcheck ok\n");
3861 	} else {
3862 		printf("ERROR: %s\n", errstr);
3863 		printf("ERROR: test.tcheck not present or corrupted\n");
3864 		goto test_dict_table_schema_check_end;
3865 	}
3866 
3867 	/* check columns with wrong length */
3868 	schema.columns[1].len = 8;
3869 	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3870 	    != DB_SUCCESS) {
3871 		printf("OK: test.tcheck.c02 has different length and is "
3872 		       "reported as corrupted\n");
3873 	} else {
3874 		printf("OK: test.tcheck.c02 has different length but is "
3875 		       "reported as ok\n");
3876 		goto test_dict_table_schema_check_end;
3877 	}
3878 	schema.columns[1].len = 4;
3879 
3880 	/* request that c02 is NOT NULL while actually it does not have
3881 	this flag set */
3882 	schema.columns[1].prtype_mask |= DATA_NOT_NULL;
3883 	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3884 	    != DB_SUCCESS) {
3885 		printf("OK: test.tcheck.c02 does not have NOT NULL while "
3886 		       "it should and is reported as corrupted\n");
3887 	} else {
3888 		printf("ERROR: test.tcheck.c02 does not have NOT NULL while "
3889 		       "it should and is not reported as corrupted\n");
3890 		goto test_dict_table_schema_check_end;
3891 	}
3892 	schema.columns[1].prtype_mask &= ~DATA_NOT_NULL;
3893 
3894 	/* check a table that contains some extra columns */
3895 	schema.n_cols = 6;
3896 	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3897 	    == DB_SUCCESS) {
3898 		printf("ERROR: test.tcheck has more columns but is not "
3899 		       "reported as corrupted\n");
3900 		goto test_dict_table_schema_check_end;
3901 	} else {
3902 		printf("OK: test.tcheck has more columns and is "
3903 		       "reported as corrupted\n");
3904 	}
3905 
3906 	/* check a table that has some columns missing */
3907 	schema.n_cols = 8;
3908 	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3909 	    != DB_SUCCESS) {
3910 		printf("OK: test.tcheck has missing columns and is "
3911 		       "reported as corrupted\n");
3912 	} else {
3913 		printf("ERROR: test.tcheck has missing columns but is "
3914 		       "reported as ok\n");
3915 		goto test_dict_table_schema_check_end;
3916 	}
3917 
3918 	/* check non-existent table */
3919 	schema.table_name = "test/tcheck_nonexistent";
3920 	if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
3921 	    != DB_SUCCESS) {
3922 		printf("OK: test.tcheck_nonexistent is not present\n");
3923 	} else {
3924 		printf("ERROR: test.tcheck_nonexistent is present!?\n");
3925 		goto test_dict_table_schema_check_end;
3926 	}
3927 
3928 test_dict_table_schema_check_end:
3929 
3930 	mutex_exit(&(dict_sys->mutex));
3931 }
3932 /* @} */
3933 
3934 /* save/fetch aux macros @{ */
3935 #define TEST_DATABASE_NAME		"foobardb"
3936 #define TEST_TABLE_NAME			"test_dict_stats"
3937 
3938 #define TEST_N_ROWS			111
3939 #define TEST_CLUSTERED_INDEX_SIZE	222
3940 #define TEST_SUM_OF_OTHER_INDEX_SIZES	333
3941 
3942 #define TEST_IDX1_NAME			"tidx1"
3943 #define TEST_IDX1_COL1_NAME		"tidx1_col1"
3944 #define TEST_IDX1_INDEX_SIZE		123
3945 #define TEST_IDX1_N_LEAF_PAGES		234
3946 #define TEST_IDX1_N_DIFF1		50
3947 #define TEST_IDX1_N_DIFF1_SAMPLE_SIZE	500
3948 
3949 #define TEST_IDX2_NAME			"tidx2"
3950 #define TEST_IDX2_COL1_NAME		"tidx2_col1"
3951 #define TEST_IDX2_COL2_NAME		"tidx2_col2"
3952 #define TEST_IDX2_COL3_NAME		"tidx2_col3"
3953 #define TEST_IDX2_COL4_NAME		"tidx2_col4"
3954 #define TEST_IDX2_INDEX_SIZE		321
3955 #define TEST_IDX2_N_LEAF_PAGES		432
3956 #define TEST_IDX2_N_DIFF1		60
3957 #define TEST_IDX2_N_DIFF1_SAMPLE_SIZE	600
3958 #define TEST_IDX2_N_DIFF2		61
3959 #define TEST_IDX2_N_DIFF2_SAMPLE_SIZE	610
3960 #define TEST_IDX2_N_DIFF3		62
3961 #define TEST_IDX2_N_DIFF3_SAMPLE_SIZE	620
3962 #define TEST_IDX2_N_DIFF4		63
3963 #define TEST_IDX2_N_DIFF4_SAMPLE_SIZE	630
3964 /* @} */
3965 
3966 /* test_dict_stats_save() @{ */
3967 void
test_dict_stats_save()3968 test_dict_stats_save()
3969 {
3970 	dict_table_t	table;
3971 	dict_index_t	index1;
3972 	dict_field_t	index1_fields[1];
3973 	ib_uint64_t	index1_stat_n_diff_key_vals[1];
3974 	ib_uint64_t	index1_stat_n_sample_sizes[1];
3975 	dict_index_t	index2;
3976 	dict_field_t	index2_fields[4];
3977 	ib_uint64_t	index2_stat_n_diff_key_vals[4];
3978 	ib_uint64_t	index2_stat_n_sample_sizes[4];
3979 	dberr_t		ret;
3980 
3981 	/* craft a dummy dict_table_t */
3982 	table.name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
3983 	table.stat_n_rows = TEST_N_ROWS;
3984 	table.stat_clustered_index_size = TEST_CLUSTERED_INDEX_SIZE;
3985 	table.stat_sum_of_other_index_sizes = TEST_SUM_OF_OTHER_INDEX_SIZES;
3986 	UT_LIST_INIT(table.indexes);
3987 	UT_LIST_ADD_LAST(indexes, table.indexes, &index1);
3988 	UT_LIST_ADD_LAST(indexes, table.indexes, &index2);
3989 	ut_d(table.magic_n = DICT_TABLE_MAGIC_N);
3990 	ut_d(index1.magic_n = DICT_INDEX_MAGIC_N);
3991 
3992 	index1.name = TEST_IDX1_NAME;
3993 	index1.table = &table;
3994 	index1.cached = 1;
3995 	index1.n_uniq = 1;
3996 	index1.fields = index1_fields;
3997 	index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals;
3998 	index1.stat_n_sample_sizes = index1_stat_n_sample_sizes;
3999 	index1.stat_index_size = TEST_IDX1_INDEX_SIZE;
4000 	index1.stat_n_leaf_pages = TEST_IDX1_N_LEAF_PAGES;
4001 	index1_fields[0].name = TEST_IDX1_COL1_NAME;
4002 	index1_stat_n_diff_key_vals[0] = TEST_IDX1_N_DIFF1;
4003 	index1_stat_n_sample_sizes[0] = TEST_IDX1_N_DIFF1_SAMPLE_SIZE;
4004 
4005 	ut_d(index2.magic_n = DICT_INDEX_MAGIC_N);
4006 	index2.name = TEST_IDX2_NAME;
4007 	index2.table = &table;
4008 	index2.cached = 1;
4009 	index2.n_uniq = 4;
4010 	index2.fields = index2_fields;
4011 	index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals;
4012 	index2.stat_n_sample_sizes = index2_stat_n_sample_sizes;
4013 	index2.stat_index_size = TEST_IDX2_INDEX_SIZE;
4014 	index2.stat_n_leaf_pages = TEST_IDX2_N_LEAF_PAGES;
4015 	index2_fields[0].name = TEST_IDX2_COL1_NAME;
4016 	index2_fields[1].name = TEST_IDX2_COL2_NAME;
4017 	index2_fields[2].name = TEST_IDX2_COL3_NAME;
4018 	index2_fields[3].name = TEST_IDX2_COL4_NAME;
4019 	index2_stat_n_diff_key_vals[0] = TEST_IDX2_N_DIFF1;
4020 	index2_stat_n_diff_key_vals[1] = TEST_IDX2_N_DIFF2;
4021 	index2_stat_n_diff_key_vals[2] = TEST_IDX2_N_DIFF3;
4022 	index2_stat_n_diff_key_vals[3] = TEST_IDX2_N_DIFF4;
4023 	index2_stat_n_sample_sizes[0] = TEST_IDX2_N_DIFF1_SAMPLE_SIZE;
4024 	index2_stat_n_sample_sizes[1] = TEST_IDX2_N_DIFF2_SAMPLE_SIZE;
4025 	index2_stat_n_sample_sizes[2] = TEST_IDX2_N_DIFF3_SAMPLE_SIZE;
4026 	index2_stat_n_sample_sizes[3] = TEST_IDX2_N_DIFF4_SAMPLE_SIZE;
4027 
4028 	ret = dict_stats_save(&table, NULL);
4029 
4030 	ut_a(ret == DB_SUCCESS);
4031 
4032 	printf("\nOK: stats saved successfully, now go ahead and read "
4033 	       "what's inside %s and %s:\n\n",
4034 	       TABLE_STATS_NAME_PRINT,
4035 	       INDEX_STATS_NAME_PRINT);
4036 
4037 	printf("SELECT COUNT(*) = 1 AS table_stats_saved_successfully\n"
4038 	       "FROM %s\n"
4039 	       "WHERE\n"
4040 	       "database_name = '%s' AND\n"
4041 	       "table_name = '%s' AND\n"
4042 	       "n_rows = %d AND\n"
4043 	       "clustered_index_size = %d AND\n"
4044 	       "sum_of_other_index_sizes = %d;\n"
4045 	       "\n",
4046 	       TABLE_STATS_NAME_PRINT,
4047 	       TEST_DATABASE_NAME,
4048 	       TEST_TABLE_NAME,
4049 	       TEST_N_ROWS,
4050 	       TEST_CLUSTERED_INDEX_SIZE,
4051 	       TEST_SUM_OF_OTHER_INDEX_SIZES);
4052 
4053 	printf("SELECT COUNT(*) = 3 AS tidx1_stats_saved_successfully\n"
4054 	       "FROM %s\n"
4055 	       "WHERE\n"
4056 	       "database_name = '%s' AND\n"
4057 	       "table_name = '%s' AND\n"
4058 	       "index_name = '%s' AND\n"
4059 	       "(\n"
4060 	       " (stat_name = 'size' AND stat_value = %d AND"
4061 	       "  sample_size IS NULL) OR\n"
4062 	       " (stat_name = 'n_leaf_pages' AND stat_value = %d AND"
4063 	       "  sample_size IS NULL) OR\n"
4064 	       " (stat_name = 'n_diff_pfx01' AND stat_value = %d AND"
4065 	       "  sample_size = '%d' AND stat_description = '%s')\n"
4066 	       ");\n"
4067 	       "\n",
4068 	       INDEX_STATS_NAME_PRINT,
4069 	       TEST_DATABASE_NAME,
4070 	       TEST_TABLE_NAME,
4071 	       TEST_IDX1_NAME,
4072 	       TEST_IDX1_INDEX_SIZE,
4073 	       TEST_IDX1_N_LEAF_PAGES,
4074 	       TEST_IDX1_N_DIFF1,
4075 	       TEST_IDX1_N_DIFF1_SAMPLE_SIZE,
4076 	       TEST_IDX1_COL1_NAME);
4077 
4078 	printf("SELECT COUNT(*) = 6 AS tidx2_stats_saved_successfully\n"
4079 	       "FROM %s\n"
4080 	       "WHERE\n"
4081 	       "database_name = '%s' AND\n"
4082 	       "table_name = '%s' AND\n"
4083 	       "index_name = '%s' AND\n"
4084 	       "(\n"
4085 	       " (stat_name = 'size' AND stat_value = %d AND"
4086 	       "  sample_size IS NULL) OR\n"
4087 	       " (stat_name = 'n_leaf_pages' AND stat_value = %d AND"
4088 	       "  sample_size IS NULL) OR\n"
4089 	       " (stat_name = 'n_diff_pfx01' AND stat_value = %d AND"
4090 	       "  sample_size = '%d' AND stat_description = '%s') OR\n"
4091 	       " (stat_name = 'n_diff_pfx02' AND stat_value = %d AND"
4092 	       "  sample_size = '%d' AND stat_description = '%s,%s') OR\n"
4093 	       " (stat_name = 'n_diff_pfx03' AND stat_value = %d AND"
4094 	       "  sample_size = '%d' AND stat_description = '%s,%s,%s') OR\n"
4095 	       " (stat_name = 'n_diff_pfx04' AND stat_value = %d AND"
4096 	       "  sample_size = '%d' AND stat_description = '%s,%s,%s,%s')\n"
4097 	       ");\n"
4098 	       "\n",
4099 	       INDEX_STATS_NAME_PRINT,
4100 	       TEST_DATABASE_NAME,
4101 	       TEST_TABLE_NAME,
4102 	       TEST_IDX2_NAME,
4103 	       TEST_IDX2_INDEX_SIZE,
4104 	       TEST_IDX2_N_LEAF_PAGES,
4105 	       TEST_IDX2_N_DIFF1,
4106 	       TEST_IDX2_N_DIFF1_SAMPLE_SIZE, TEST_IDX2_COL1_NAME,
4107 	       TEST_IDX2_N_DIFF2,
4108 	       TEST_IDX2_N_DIFF2_SAMPLE_SIZE,
4109 	       TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME,
4110 	       TEST_IDX2_N_DIFF3,
4111 	       TEST_IDX2_N_DIFF3_SAMPLE_SIZE,
4112 	       TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, TEST_IDX2_COL3_NAME,
4113 	       TEST_IDX2_N_DIFF4,
4114 	       TEST_IDX2_N_DIFF4_SAMPLE_SIZE,
4115 	       TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, TEST_IDX2_COL3_NAME,
4116 	       TEST_IDX2_COL4_NAME);
4117 }
4118 /* @} */
4119 
4120 /* test_dict_stats_fetch_from_ps() @{ */
4121 void
test_dict_stats_fetch_from_ps()4122 test_dict_stats_fetch_from_ps()
4123 {
4124 	dict_table_t	table;
4125 	dict_index_t	index1;
4126 	ib_uint64_t	index1_stat_n_diff_key_vals[1];
4127 	ib_uint64_t	index1_stat_n_sample_sizes[1];
4128 	dict_index_t	index2;
4129 	ib_uint64_t	index2_stat_n_diff_key_vals[4];
4130 	ib_uint64_t	index2_stat_n_sample_sizes[4];
4131 	dberr_t		ret;
4132 
4133 	/* craft a dummy dict_table_t */
4134 	table.name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
4135 	UT_LIST_INIT(table.indexes);
4136 	UT_LIST_ADD_LAST(indexes, table.indexes, &index1);
4137 	UT_LIST_ADD_LAST(indexes, table.indexes, &index2);
4138 	ut_d(table.magic_n = DICT_TABLE_MAGIC_N);
4139 
4140 	index1.name = TEST_IDX1_NAME;
4141 	ut_d(index1.magic_n = DICT_INDEX_MAGIC_N);
4142 	index1.cached = 1;
4143 	index1.n_uniq = 1;
4144 	index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals;
4145 	index1.stat_n_sample_sizes = index1_stat_n_sample_sizes;
4146 
4147 	index2.name = TEST_IDX2_NAME;
4148 	ut_d(index2.magic_n = DICT_INDEX_MAGIC_N);
4149 	index2.cached = 1;
4150 	index2.n_uniq = 4;
4151 	index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals;
4152 	index2.stat_n_sample_sizes = index2_stat_n_sample_sizes;
4153 
4154 	ret = dict_stats_fetch_from_ps(&table);
4155 
4156 	ut_a(ret == DB_SUCCESS);
4157 
4158 	ut_a(table.stat_n_rows == TEST_N_ROWS);
4159 	ut_a(table.stat_clustered_index_size == TEST_CLUSTERED_INDEX_SIZE);
4160 	ut_a(table.stat_sum_of_other_index_sizes
4161 	     == TEST_SUM_OF_OTHER_INDEX_SIZES);
4162 
4163 	ut_a(index1.stat_index_size == TEST_IDX1_INDEX_SIZE);
4164 	ut_a(index1.stat_n_leaf_pages == TEST_IDX1_N_LEAF_PAGES);
4165 	ut_a(index1_stat_n_diff_key_vals[0] == TEST_IDX1_N_DIFF1);
4166 	ut_a(index1_stat_n_sample_sizes[0] == TEST_IDX1_N_DIFF1_SAMPLE_SIZE);
4167 
4168 	ut_a(index2.stat_index_size == TEST_IDX2_INDEX_SIZE);
4169 	ut_a(index2.stat_n_leaf_pages == TEST_IDX2_N_LEAF_PAGES);
4170 	ut_a(index2_stat_n_diff_key_vals[0] == TEST_IDX2_N_DIFF1);
4171 	ut_a(index2_stat_n_sample_sizes[0] == TEST_IDX2_N_DIFF1_SAMPLE_SIZE);
4172 	ut_a(index2_stat_n_diff_key_vals[1] == TEST_IDX2_N_DIFF2);
4173 	ut_a(index2_stat_n_sample_sizes[1] == TEST_IDX2_N_DIFF2_SAMPLE_SIZE);
4174 	ut_a(index2_stat_n_diff_key_vals[2] == TEST_IDX2_N_DIFF3);
4175 	ut_a(index2_stat_n_sample_sizes[2] == TEST_IDX2_N_DIFF3_SAMPLE_SIZE);
4176 	ut_a(index2_stat_n_diff_key_vals[3] == TEST_IDX2_N_DIFF4);
4177 	ut_a(index2_stat_n_sample_sizes[3] == TEST_IDX2_N_DIFF4_SAMPLE_SIZE);
4178 
4179 	printf("OK: fetch successful\n");
4180 }
4181 /* @} */
4182 
4183 /* test_dict_stats_all() @{ */
4184 void
test_dict_stats_all()4185 test_dict_stats_all()
4186 {
4187 	test_dict_table_schema_check();
4188 
4189 	test_dict_stats_save();
4190 
4191 	test_dict_stats_fetch_from_ps();
4192 }
4193 /* @} */
4194 
4195 #endif /* UNIV_COMPILE_TEST_FUNCS */
4196 /* @} */
4197 
4198 #endif /* UNIV_HOTBACKUP */
4199