1 /*****************************************************************************
2 
3 Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2015, 2020, MariaDB Corporation.
5 
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free Software
8 Foundation; version 2 of the License.
9 
10 This program is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 
14 You should have received a copy of the GNU General Public License along with
15 this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
17 
18 *****************************************************************************/
19 
20 /**************************************************//**
21 @file include/row0merge.h
22 Index build routines using a merge sort
23 
24 Created 13/06/2005 Jan Lindstrom
25 *******************************************************/
26 
27 #ifndef row0merge_h
28 #define row0merge_h
29 
30 #include "que0types.h"
31 #include "trx0types.h"
32 #include "mtr0mtr.h"
33 #include "rem0types.h"
34 #include "rem0rec.h"
35 #include "btr0types.h"
36 #include "row0mysql.h"
37 #include "lock0types.h"
38 #include "srv0srv.h"
39 #include "ut0stage.h"
40 
41 /* Reserve free space from every block for key_version */
42 #define ROW_MERGE_RESERVE_SIZE 4
43 
44 /* Cluster index read task is mandatory */
45 #define COST_READ_CLUSTERED_INDEX            1.0
46 
47 /* Basic fixed cost to build all type of index */
48 #define COST_BUILD_INDEX_STATIC              0.5
49 /* Dynamic cost to build all type of index, dynamic cost will be re-distributed based on page count ratio of each index */
50 #define COST_BUILD_INDEX_DYNAMIC             0.5
51 
52 /* Sum of below two must be 1.0 */
53 #define PCT_COST_MERGESORT_INDEX                 0.4
54 #define PCT_COST_INSERT_INDEX                    0.6
55 
56 // Forward declaration
57 struct ib_sequence_t;
58 
59 /** @brief Block size for I/O operations in merge sort.
60 
61 The minimum is srv_page_size, or page_get_free_space_of_empty()
62 rounded to a power of 2.
63 
64 When not creating a PRIMARY KEY that contains column prefixes, this
65 can be set as small as srv_page_size / 2. */
66 typedef byte	row_merge_block_t;
67 
68 /** @brief Secondary buffer for I/O operations of merge records.
69 
70 This buffer is used for writing or reading a record that spans two
71 row_merge_block_t.  Thus, it must be able to hold one merge record,
72 whose maximum size is the same as the minimum size of
73 row_merge_block_t. */
74 typedef byte	mrec_buf_t[UNIV_PAGE_SIZE_MAX];
75 
76 /** @brief Merge record in row_merge_block_t.
77 
78 The format is the same as a record in ROW_FORMAT=COMPACT with the
79 exception that the REC_N_NEW_EXTRA_BYTES are omitted. */
80 typedef byte	mrec_t;
81 
82 /** Merge record in row_merge_buf_t */
83 struct mtuple_t {
84 	dfield_t*	fields;		/*!< data fields */
85 };
86 
87 /** Buffer for sorting in main memory. */
88 struct row_merge_buf_t {
89 	mem_heap_t*	heap;		/*!< memory heap where allocated */
90 	dict_index_t*	index;		/*!< the index the tuples belong to */
91 	ulint		total_size;	/*!< total amount of data bytes */
92 	ulint		n_tuples;	/*!< number of data tuples */
93 	ulint		max_tuples;	/*!< maximum number of data tuples */
94 	mtuple_t*	tuples;		/*!< array of data tuples */
95 	mtuple_t*	tmp_tuples;	/*!< temporary copy of tuples,
96 					for sorting */
97 };
98 
99 /** Information about temporary files used in merge sort */
100 struct merge_file_t {
101 	pfs_os_file_t	fd;		/*!< file descriptor */
102 	ulint		offset;		/*!< file offset (end of file) */
103 	ib_uint64_t	n_rec;		/*!< number of records in the file */
104 };
105 
106 /** Index field definition */
107 struct index_field_t {
108 	ulint		col_no;		/*!< column offset */
109 	ulint		prefix_len;	/*!< column prefix length, or 0
110 					if indexing the whole column */
111 	bool		is_v_col;	/*!< whether this is a virtual column */
112 };
113 
114 /** Definition of an index being created */
115 struct index_def_t {
116 	const char*	name;		/*!< index name */
117 	bool		rebuild;	/*!< whether the table is rebuilt */
118 	ulint		ind_type;	/*!< 0, DICT_UNIQUE,
119 					or DICT_CLUSTERED */
120 	ulint		key_number;	/*!< MySQL key number,
121 					or ULINT_UNDEFINED if none */
122 	ulint		n_fields;	/*!< number of fields in index */
123 	index_field_t*	fields;		/*!< field definitions */
124 	st_mysql_ftparser*
125 			parser;		/*!< fulltext parser plugin */
126 };
127 
128 /** Structure for reporting duplicate records. */
129 struct row_merge_dup_t {
130 	dict_index_t*		index;	/*!< index being sorted */
131 	struct TABLE*		table;	/*!< MySQL table object */
132 	const ulint*		col_map;/*!< mapping of column numbers
133 					in table to the rebuilt table
134 					(index->table), or NULL if not
135 					rebuilding table */
136 	ulint			n_dup;	/*!< number of duplicates */
137 };
138 
139 /*************************************************************//**
140 Report a duplicate key. */
141 void
142 row_merge_dup_report(
143 /*=================*/
144 	row_merge_dup_t*	dup,	/*!< in/out: for reporting duplicates */
145 	const dfield_t*		entry)	/*!< in: duplicate index entry */
146 	MY_ATTRIBUTE((nonnull));
147 
148 /*********************************************************************//**
149 Sets an exclusive lock on a table, for the duration of creating indexes.
150 @return error code or DB_SUCCESS */
151 dberr_t
152 row_merge_lock_table(
153 /*=================*/
154 	trx_t*		trx,		/*!< in/out: transaction */
155 	dict_table_t*	table,		/*!< in: table to lock */
156 	enum lock_mode	mode)		/*!< in: LOCK_X or LOCK_S */
157 	MY_ATTRIBUTE((nonnull(1,2), warn_unused_result));
158 
159 /*********************************************************************//**
160 Drop indexes that were created before an error occurred.
161 The data dictionary must have been locked exclusively by the caller,
162 because the transaction will not be committed. */
163 void
164 row_merge_drop_indexes_dict(
165 /*========================*/
166 	trx_t*		trx,	/*!< in/out: dictionary transaction */
167 	table_id_t	table_id)/*!< in: table identifier */
168 	MY_ATTRIBUTE((nonnull));
169 
170 /** Drop indexes that were created before an error occurred.
171 The data dictionary must have been locked exclusively by the caller,
172 because the transaction will not be committed.
173 @param trx              dictionary transaction
174 @param table            table containing the indexes
175 @param locked           True if table is locked,
176                         false - may need to do lazy drop
177 @param alter_trx        Alter table transaction */
178 void
179 row_merge_drop_indexes(
180         trx_t*          trx,
181         dict_table_t*   table,
182         bool            locked,
183         const trx_t*    alter_trx=NULL);
184 
185 /*********************************************************************//**
186 Drop all partially created indexes during crash recovery. */
187 void
188 row_merge_drop_temp_indexes(void);
189 /*=============================*/
190 
191 /** Create temporary merge files in the given paramater path, and if
192 UNIV_PFS_IO defined, register the file descriptor with Performance Schema.
193 @param[in]	path	location for creating temporary merge files, or NULL
194 @return File descriptor */
195 pfs_os_file_t
196 row_merge_file_create_low(
197 	const char*	path)
198 	MY_ATTRIBUTE((warn_unused_result));
199 /*********************************************************************//**
200 Destroy a merge file. And de-register the file from Performance Schema
201 if UNIV_PFS_IO is defined. */
202 void
203 row_merge_file_destroy_low(
204 /*=======================*/
205 	const pfs_os_file_t&	fd);	/*!< in: merge file descriptor */
206 
207 /*********************************************************************//**
208 Rename the tables in the data dictionary.  The data dictionary must
209 have been locked exclusively by the caller, because the transaction
210 will not be committed.
211 @return error code or DB_SUCCESS */
212 dberr_t
213 row_merge_rename_tables_dict(
214 /*=========================*/
215 	dict_table_t*	old_table,	/*!< in/out: old table, renamed to
216 					tmp_name */
217 	dict_table_t*	new_table,	/*!< in/out: new table, renamed to
218 					old_table->name */
219 	const char*	tmp_name,	/*!< in: new name for old_table */
220 	trx_t*		trx)		/*!< in/out: dictionary transaction */
221 	MY_ATTRIBUTE((nonnull, warn_unused_result));
222 
223 /*********************************************************************//**
224 Rename an index in the dictionary that was created. The data
225 dictionary must have been locked exclusively by the caller, because
226 the transaction will not be committed.
227 @return DB_SUCCESS if all OK */
228 dberr_t
229 row_merge_rename_index_to_add(
230 /*==========================*/
231 	trx_t*		trx,		/*!< in/out: transaction */
232 	table_id_t	table_id,	/*!< in: table identifier */
233 	index_id_t	index_id)	/*!< in: index identifier */
234 	MY_ATTRIBUTE((nonnull(1), warn_unused_result));
235 
236 /*********************************************************************//**
237 Rename an index in the dictionary that is to be dropped. The data
238 dictionary must have been locked exclusively by the caller, because
239 the transaction will not be committed.
240 @return DB_SUCCESS if all OK */
241 dberr_t
242 row_merge_rename_index_to_drop(
243 /*===========================*/
244 	trx_t*		trx,		/*!< in/out: transaction */
245 	table_id_t	table_id,	/*!< in: table identifier */
246 	index_id_t	index_id)	/*!< in: index identifier */
247 	MY_ATTRIBUTE((nonnull(1), warn_unused_result));
248 
249 /** Create the index and load in to the dictionary.
250 @param[in,out]	table		the index is on this table
251 @param[in]	index_def	the index definition
252 @param[in]	add_v		new virtual columns added along with add
253 				index call
254 @return index, or NULL on error */
255 dict_index_t*
256 row_merge_create_index(
257 	dict_table_t*		table,
258 	const index_def_t*	index_def,
259 	const dict_add_v_col_t*	add_v)
260 	MY_ATTRIBUTE((warn_unused_result));
261 
262 /*********************************************************************//**
263 Check if a transaction can use an index.
264 @return whether the index can be used by the transaction */
265 bool
266 row_merge_is_index_usable(
267 /*======================*/
268 	const trx_t*		trx,	/*!< in: transaction */
269 	const dict_index_t*	index)	/*!< in: index to check */
270 	MY_ATTRIBUTE((nonnull, warn_unused_result));
271 
272 /*********************************************************************//**
273 Drop a table. The caller must have ensured that the background stats
274 thread is not processing the table. This can be done by calling
275 dict_stats_wait_bg_to_stop_using_table() after locking the dictionary and
276 before calling this function.
277 @return DB_SUCCESS or error code */
278 dberr_t
279 row_merge_drop_table(
280 /*=================*/
281 	trx_t*		trx,		/*!< in: transaction */
282 	dict_table_t*	table)		/*!< in: table instance to drop */
283 	MY_ATTRIBUTE((nonnull, warn_unused_result));
284 
285 /** Write an MLOG_INDEX_LOAD record to indicate in the redo-log
286 that redo-logging of individual index pages was disabled, and
287 the flushing of such pages to the data files was completed.
288 @param[in]	index	an index tree on which redo logging was disabled */
289 void row_merge_write_redo(const dict_index_t* index);
290 
291 /** Build indexes on a table by reading a clustered index, creating a temporary
292 file containing index entries, merge sorting these index entries and inserting
293 sorted index entries to indexes.
294 @param[in]	trx		transaction
295 @param[in]	old_table	table where rows are read from
296 @param[in]	new_table	table where indexes are created; identical to
297 old_table unless creating a PRIMARY KEY
298 @param[in]	online		true if creating indexes online
299 @param[in]	indexes		indexes to be created
300 @param[in]	key_numbers	MySQL key numbers
301 @param[in]	n_indexes	size of indexes[]
302 @param[in,out]	table		MySQL table, for reporting erroneous key value
303 if applicable
304 @param[in]	defaults	default values of added, changed columns, or NULL
305 @param[in]	col_map		mapping of old column numbers to new ones, or
306 NULL if old_table == new_table
307 @param[in]	add_autoinc	number of added AUTO_INCREMENT columns, or
308 ULINT_UNDEFINED if none is added
309 @param[in,out]	sequence	autoinc sequence
310 @param[in]	skip_pk_sort	whether the new PRIMARY KEY will follow
311 existing order
312 @param[in,out]	stage		performance schema accounting object, used by
313 ALTER TABLE. stage->begin_phase_read_pk() will be called at the beginning of
314 this function and it will be passed to other functions for further accounting.
315 @param[in]	add_v		new virtual columns added along with indexes
316 @param[in]	eval_table	mysql table used to evaluate virtual column
317 				value, see innobase_get_computed_value().
318 @param[in]	allow_non_null	allow the conversion from null to not-null
319 @return DB_SUCCESS or error code */
320 dberr_t
321 row_merge_build_indexes(
322 	trx_t*			trx,
323 	dict_table_t*		old_table,
324 	dict_table_t*		new_table,
325 	bool			online,
326 	dict_index_t**		indexes,
327 	const ulint*		key_numbers,
328 	ulint			n_indexes,
329 	struct TABLE*		table,
330 	const dtuple_t*		defaults,
331 	const ulint*		col_map,
332 	ulint			add_autoinc,
333 	ib_sequence_t&		sequence,
334 	bool			skip_pk_sort,
335 	ut_stage_alter_t*	stage,
336 	const dict_add_v_col_t*	add_v,
337 	struct TABLE*		eval_table,
338 	bool			allow_non_null)
339 	MY_ATTRIBUTE((warn_unused_result));
340 
341 /********************************************************************//**
342 Write a buffer to a block. */
343 void
344 row_merge_buf_write(
345 /*================*/
346 	const row_merge_buf_t*	buf,	/*!< in: sorted buffer */
347 	const merge_file_t*	of,	/*!< in: output file */
348 	row_merge_block_t*	block)	/*!< out: buffer for writing to file */
349 	MY_ATTRIBUTE((nonnull));
350 
351 /********************************************************************//**
352 Sort a buffer. */
353 void
354 row_merge_buf_sort(
355 /*===============*/
356 	row_merge_buf_t*	buf,	/*!< in/out: sort buffer */
357 	row_merge_dup_t*	dup)	/*!< in/out: reporter of duplicates
358 					(NULL if non-unique index) */
359 	MY_ATTRIBUTE((nonnull(1)));
360 
361 /********************************************************************//**
362 Write a merge block to the file system.
363 @return whether the request was completed successfully
364 @retval	false	on error
365 @retval	true	on success */
366 UNIV_INTERN
367 bool
368 row_merge_write(
369 /*============*/
370 	const pfs_os_file_t&	fd,	/*!< in: file descriptor */
371 	ulint		offset,	/*!< in: offset where to write,
372 				in number of row_merge_block_t elements */
373 	const void*	buf,	/*!< in: data */
374 	void*		crypt_buf,		/*!< in: crypt buf or NULL */
375 	ulint		space)			/*!< in: space id */
376 	MY_ATTRIBUTE((warn_unused_result));
377 
378 /********************************************************************//**
379 Empty a sort buffer.
380 @return sort buffer */
381 row_merge_buf_t*
382 row_merge_buf_empty(
383 /*================*/
384 	row_merge_buf_t*	buf)	/*!< in,own: sort buffer */
385 	MY_ATTRIBUTE((warn_unused_result, nonnull));
386 
387 /** Create a merge file in the given location.
388 @param[out]	merge_file	merge file structure
389 @param[in]	path		location for creating temporary file, or NULL
390 @return file descriptor, or -1 on failure */
391 pfs_os_file_t
392 row_merge_file_create(
393 	merge_file_t*	merge_file,
394 	const char*	path)
395 	MY_ATTRIBUTE((warn_unused_result, nonnull(1)));
396 
397 /** Merge disk files.
398 @param[in]	trx	transaction
399 @param[in]	dup	descriptor of index being created
400 @param[in,out]	file	file containing index entries
401 @param[in,out]	block	3 buffers
402 @param[in,out]	tmpfd	temporary file handle
403 @param[in]      update_progress true, if we should update progress status
404 @param[in]      pct_progress total progress percent until now
405 @param[in]      pct_ocst current progress percent
406 @param[in]      crypt_block crypt buf or NULL
407 @param[in]      space    space_id
408 @param[in,out]	stage	performance schema accounting object, used by
409 ALTER TABLE. If not NULL, stage->begin_phase_sort() will be called initially
410 and then stage->inc() will be called for each record processed.
411 @return DB_SUCCESS or error code */
412 dberr_t
413 row_merge_sort(
414 /*===========*/
415 	trx_t*			trx,
416 	const row_merge_dup_t*	dup,
417 	merge_file_t*		file,
418 	row_merge_block_t*	block,
419 	pfs_os_file_t*		tmpfd,
420 	const bool		update_progress,
421 	const double	pct_progress,
422 	const double	pct_cost,
423 	row_merge_block_t*	crypt_block,
424 	ulint			space,
425 	ut_stage_alter_t*	stage = NULL)
426 	MY_ATTRIBUTE((warn_unused_result));
427 
428 /*********************************************************************//**
429 Allocate a sort buffer.
430 @return own: sort buffer */
431 row_merge_buf_t*
432 row_merge_buf_create(
433 /*=================*/
434 	dict_index_t*	index)	/*!< in: secondary index */
435 	MY_ATTRIBUTE((warn_unused_result, nonnull, malloc));
436 
437 /*********************************************************************//**
438 Deallocate a sort buffer. */
439 void
440 row_merge_buf_free(
441 /*===============*/
442 	row_merge_buf_t*	buf)	/*!< in,own: sort buffer to be freed */
443 	MY_ATTRIBUTE((nonnull));
444 
445 /*********************************************************************//**
446 Destroy a merge file. */
447 void
448 row_merge_file_destroy(
449 /*===================*/
450 	merge_file_t*	merge_file)	/*!< in/out: merge file structure */
451 	MY_ATTRIBUTE((nonnull));
452 
453 /** Read a merge block from the file system.
454 @return whether the request was completed successfully */
455 bool
456 row_merge_read(
457 /*===========*/
458 	const pfs_os_file_t&	fd,	/*!< in: file descriptor */
459 	ulint			offset,	/*!< in: offset where to read
460 					in number of row_merge_block_t
461 					elements */
462 	row_merge_block_t*	buf,	/*!< out: data */
463 	row_merge_block_t*	crypt_buf, /*!< in: crypt buf or NULL */
464 	ulint			space)	   /*!< in: space id */
465 	MY_ATTRIBUTE((warn_unused_result));
466 
467 /********************************************************************//**
468 Read a merge record.
469 @return pointer to next record, or NULL on I/O error or end of list */
470 const byte*
471 row_merge_read_rec(
472 /*===============*/
473 	row_merge_block_t*	block,	/*!< in/out: file buffer */
474 	mrec_buf_t*		buf,	/*!< in/out: secondary buffer */
475 	const byte*		b,	/*!< in: pointer to record */
476 	const dict_index_t*	index,	/*!< in: index of the record */
477 	const pfs_os_file_t&	fd,	/*!< in: file descriptor */
478 	ulint*			foffs,	/*!< in/out: file offset */
479 	const mrec_t**		mrec,	/*!< out: pointer to merge record,
480 					or NULL on end of list
481 					(non-NULL on I/O error) */
482 	rec_offs*		offsets,/*!< out: offsets of mrec */
483 	row_merge_block_t*	crypt_block, /*!< in: crypt buf or NULL */
484 	ulint			space)	   /*!< in: space id */
485 	MY_ATTRIBUTE((warn_unused_result));
486 #endif /* row0merge.h */
487