1 /*****************************************************************************
2 
3 Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2015, 2020, MariaDB Corporation.
5 
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free Software
8 Foundation; version 2 of the License.
9 
10 This program is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 
14 You should have received a copy of the GNU General Public License along with
15 this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
17 
18 *****************************************************************************/
19 
20 /**************************************************//**
21 @file include/row0merge.h
22 Index build routines using a merge sort
23 
24 Created 13/06/2005 Jan Lindstrom
25 *******************************************************/
26 
27 #ifndef row0merge_h
28 #define row0merge_h
29 
30 #include "que0types.h"
31 #include "trx0types.h"
32 #include "mtr0mtr.h"
33 #include "rem0types.h"
34 #include "rem0rec.h"
35 #include "btr0types.h"
36 #include "row0mysql.h"
37 #include "lock0types.h"
38 #include "srv0srv.h"
39 #include "ut0stage.h"
40 
41 /* Reserve free space from every block for key_version */
42 #define ROW_MERGE_RESERVE_SIZE 4
43 
44 /* Cluster index read task is mandatory */
45 #define COST_READ_CLUSTERED_INDEX            1.0
46 
47 /* Basic fixed cost to build all type of index */
48 #define COST_BUILD_INDEX_STATIC              0.5
49 /* Dynamic cost to build all type of index, dynamic cost will be re-distributed based on page count ratio of each index */
50 #define COST_BUILD_INDEX_DYNAMIC             0.5
51 
52 /* Sum of below two must be 1.0 */
53 #define PCT_COST_MERGESORT_INDEX                 0.4
54 #define PCT_COST_INSERT_INDEX                    0.6
55 
56 // Forward declaration
57 struct ib_sequence_t;
58 
59 /** @brief Block size for I/O operations in merge sort.
60 
61 The minimum is srv_page_size, or page_get_free_space_of_empty()
62 rounded to a power of 2.
63 
64 When not creating a PRIMARY KEY that contains column prefixes, this
65 can be set as small as srv_page_size / 2. */
66 typedef byte	row_merge_block_t;
67 
68 /** @brief Secondary buffer for I/O operations of merge records.
69 
70 This buffer is used for writing or reading a record that spans two
71 row_merge_block_t.  Thus, it must be able to hold one merge record,
72 whose maximum size is the same as the minimum size of
73 row_merge_block_t. */
74 typedef byte	mrec_buf_t[UNIV_PAGE_SIZE_MAX];
75 
76 /** @brief Merge record in row_merge_block_t.
77 
78 The format is the same as a record in ROW_FORMAT=COMPACT with the
79 exception that the REC_N_NEW_EXTRA_BYTES are omitted. */
80 typedef byte	mrec_t;
81 
82 /** Merge record in row_merge_buf_t */
83 struct mtuple_t {
84 	dfield_t*	fields;		/*!< data fields */
85 };
86 
87 /** Buffer for sorting in main memory. */
88 struct row_merge_buf_t {
89 	mem_heap_t*	heap;		/*!< memory heap where allocated */
90 	dict_index_t*	index;		/*!< the index the tuples belong to */
91 	ulint		total_size;	/*!< total amount of data bytes */
92 	ulint		n_tuples;	/*!< number of data tuples */
93 	ulint		max_tuples;	/*!< maximum number of data tuples */
94 	mtuple_t*	tuples;		/*!< array of data tuples */
95 	mtuple_t*	tmp_tuples;	/*!< temporary copy of tuples,
96 					for sorting */
97 };
98 
99 /** Information about temporary files used in merge sort */
100 struct merge_file_t {
101 	pfs_os_file_t	fd;		/*!< file descriptor */
102 	ulint		offset;		/*!< file offset (end of file) */
103 	ib_uint64_t	n_rec;		/*!< number of records in the file */
104 };
105 
106 /** Index field definition */
107 struct index_field_t {
108 	ulint		col_no;		/*!< column offset */
109 	ulint		prefix_len;	/*!< column prefix length, or 0
110 					if indexing the whole column */
111 	bool		is_v_col;	/*!< whether this is a virtual column */
112 };
113 
114 /** Definition of an index being created */
115 struct index_def_t {
116 	const char*	name;		/*!< index name */
117 	bool		rebuild;	/*!< whether the table is rebuilt */
118 	ulint		ind_type;	/*!< 0, DICT_UNIQUE,
119 					or DICT_CLUSTERED */
120 	ulint		key_number;	/*!< MySQL key number,
121 					or ULINT_UNDEFINED if none */
122 	ulint		n_fields;	/*!< number of fields in index */
123 	index_field_t*	fields;		/*!< field definitions */
124 	st_mysql_ftparser*
125 			parser;		/*!< fulltext parser plugin */
126 };
127 
128 /** Structure for reporting duplicate records. */
129 struct row_merge_dup_t {
130 	dict_index_t*		index;	/*!< index being sorted */
131 	struct TABLE*		table;	/*!< MySQL table object */
132 	const ulint*		col_map;/*!< mapping of column numbers
133 					in table to the rebuilt table
134 					(index->table), or NULL if not
135 					rebuilding table */
136 	ulint			n_dup;	/*!< number of duplicates */
137 };
138 
139 /*************************************************************//**
140 Report a duplicate key. */
141 void
142 row_merge_dup_report(
143 /*=================*/
144 	row_merge_dup_t*	dup,	/*!< in/out: for reporting duplicates */
145 	const dfield_t*		entry)	/*!< in: duplicate index entry */
146 	MY_ATTRIBUTE((nonnull));
147 
148 /*********************************************************************//**
149 Sets an exclusive lock on a table, for the duration of creating indexes.
150 @return error code or DB_SUCCESS */
151 dberr_t
152 row_merge_lock_table(
153 /*=================*/
154 	trx_t*		trx,		/*!< in/out: transaction */
155 	dict_table_t*	table,		/*!< in: table to lock */
156 	enum lock_mode	mode)		/*!< in: LOCK_X or LOCK_S */
157 	MY_ATTRIBUTE((nonnull(1,2), warn_unused_result));
158 
159 /*********************************************************************//**
160 Drop indexes that were created before an error occurred.
161 The data dictionary must have been locked exclusively by the caller,
162 because the transaction will not be committed. */
163 void
164 row_merge_drop_indexes_dict(
165 /*========================*/
166 	trx_t*		trx,	/*!< in/out: dictionary transaction */
167 	table_id_t	table_id)/*!< in: table identifier */
168 	MY_ATTRIBUTE((nonnull));
169 
170 /** Drop indexes that were created before an error occurred.
171 The data dictionary must have been locked exclusively by the caller,
172 because the transaction will not be committed.
173 @param trx              dictionary transaction
174 @param table            table containing the indexes
175 @param locked           True if table is locked,
176                         false - may need to do lazy drop
177 @param alter_trx        Alter table transaction */
178 void
179 row_merge_drop_indexes(
180         trx_t*          trx,
181         dict_table_t*   table,
182         bool            locked,
183         const trx_t*    alter_trx=NULL);
184 
185 /*********************************************************************//**
186 Drop all partially created indexes during crash recovery. */
187 void
188 row_merge_drop_temp_indexes(void);
189 /*=============================*/
190 
191 /** Create temporary merge files in the given paramater path, and if
192 UNIV_PFS_IO defined, register the file descriptor with Performance Schema.
193 @param[in]	path	location for creating temporary merge files, or NULL
194 @return File descriptor */
195 pfs_os_file_t
196 row_merge_file_create_low(
197 	const char*	path)
198 	MY_ATTRIBUTE((warn_unused_result));
199 /*********************************************************************//**
200 Destroy a merge file. And de-register the file from Performance Schema
201 if UNIV_PFS_IO is defined. */
202 void
203 row_merge_file_destroy_low(
204 /*=======================*/
205 	const pfs_os_file_t&	fd);	/*!< in: merge file descriptor */
206 
207 /*********************************************************************//**
208 Rename an index in the dictionary that was created. The data
209 dictionary must have been locked exclusively by the caller, because
210 the transaction will not be committed.
211 @return DB_SUCCESS if all OK */
212 dberr_t
213 row_merge_rename_index_to_add(
214 /*==========================*/
215 	trx_t*		trx,		/*!< in/out: transaction */
216 	table_id_t	table_id,	/*!< in: table identifier */
217 	index_id_t	index_id)	/*!< in: index identifier */
218 	MY_ATTRIBUTE((nonnull(1), warn_unused_result));
219 
220 /*********************************************************************//**
221 Rename an index in the dictionary that is to be dropped. The data
222 dictionary must have been locked exclusively by the caller, because
223 the transaction will not be committed.
224 @return DB_SUCCESS if all OK */
225 dberr_t
226 row_merge_rename_index_to_drop(
227 /*===========================*/
228 	trx_t*		trx,		/*!< in/out: transaction */
229 	table_id_t	table_id,	/*!< in: table identifier */
230 	index_id_t	index_id)	/*!< in: index identifier */
231 	MY_ATTRIBUTE((nonnull(1), warn_unused_result));
232 
233 /** Create the index and load in to the dictionary.
234 @param[in,out]	table		the index is on this table
235 @param[in]	index_def	the index definition
236 @param[in]	add_v		new virtual columns added along with add
237 				index call
238 @return index, or NULL on error */
239 dict_index_t*
240 row_merge_create_index(
241 	dict_table_t*		table,
242 	const index_def_t*	index_def,
243 	const dict_add_v_col_t*	add_v)
244 	MY_ATTRIBUTE((warn_unused_result));
245 
246 /*********************************************************************//**
247 Check if a transaction can use an index.
248 @return whether the index can be used by the transaction */
249 bool
250 row_merge_is_index_usable(
251 /*======================*/
252 	const trx_t*		trx,	/*!< in: transaction */
253 	const dict_index_t*	index)	/*!< in: index to check */
254 	MY_ATTRIBUTE((nonnull, warn_unused_result));
255 
256 /*********************************************************************//**
257 Drop a table. The caller must have ensured that the background stats
258 thread is not processing the table. This can be done by calling
259 dict_stats_wait_bg_to_stop_using_table() after locking the dictionary and
260 before calling this function.
261 @return DB_SUCCESS or error code */
262 dberr_t
263 row_merge_drop_table(
264 /*=================*/
265 	trx_t*		trx,		/*!< in: transaction */
266 	dict_table_t*	table)		/*!< in: table instance to drop */
267 	MY_ATTRIBUTE((nonnull, warn_unused_result));
268 
269 /** Build indexes on a table by reading a clustered index, creating a temporary
270 file containing index entries, merge sorting these index entries and inserting
271 sorted index entries to indexes.
272 @param[in]	trx		transaction
273 @param[in]	old_table	table where rows are read from
274 @param[in]	new_table	table where indexes are created; identical to
275 old_table unless creating a PRIMARY KEY
276 @param[in]	online		true if creating indexes online
277 @param[in]	indexes		indexes to be created
278 @param[in]	key_numbers	MySQL key numbers
279 @param[in]	n_indexes	size of indexes[]
280 @param[in,out]	table		MySQL table, for reporting erroneous key value
281 if applicable
282 @param[in]	defaults	default values of added, changed columns, or NULL
283 @param[in]	col_map		mapping of old column numbers to new ones, or
284 NULL if old_table == new_table
285 @param[in]	add_autoinc	number of added AUTO_INCREMENT columns, or
286 ULINT_UNDEFINED if none is added
287 @param[in,out]	sequence	autoinc sequence
288 @param[in]	skip_pk_sort	whether the new PRIMARY KEY will follow
289 existing order
290 @param[in,out]	stage		performance schema accounting object, used by
291 ALTER TABLE. stage->begin_phase_read_pk() will be called at the beginning of
292 this function and it will be passed to other functions for further accounting.
293 @param[in]	add_v		new virtual columns added along with indexes
294 @param[in]	eval_table	mysql table used to evaluate virtual column
295 				value, see innobase_get_computed_value().
296 @param[in]	allow_non_null	allow the conversion from null to not-null
297 @return DB_SUCCESS or error code */
298 dberr_t
299 row_merge_build_indexes(
300 	trx_t*			trx,
301 	dict_table_t*		old_table,
302 	dict_table_t*		new_table,
303 	bool			online,
304 	dict_index_t**		indexes,
305 	const ulint*		key_numbers,
306 	ulint			n_indexes,
307 	struct TABLE*		table,
308 	const dtuple_t*		defaults,
309 	const ulint*		col_map,
310 	ulint			add_autoinc,
311 	ib_sequence_t&		sequence,
312 	bool			skip_pk_sort,
313 	ut_stage_alter_t*	stage,
314 	const dict_add_v_col_t*	add_v,
315 	struct TABLE*		eval_table,
316 	bool			allow_non_null)
317 	MY_ATTRIBUTE((warn_unused_result));
318 
319 /********************************************************************//**
320 Write a buffer to a block. */
321 void
322 row_merge_buf_write(
323 /*================*/
324 	const row_merge_buf_t*	buf,	/*!< in: sorted buffer */
325 	const merge_file_t*	of,	/*!< in: output file */
326 	row_merge_block_t*	block)	/*!< out: buffer for writing to file */
327 	MY_ATTRIBUTE((nonnull));
328 
329 /********************************************************************//**
330 Sort a buffer. */
331 void
332 row_merge_buf_sort(
333 /*===============*/
334 	row_merge_buf_t*	buf,	/*!< in/out: sort buffer */
335 	row_merge_dup_t*	dup)	/*!< in/out: reporter of duplicates
336 					(NULL if non-unique index) */
337 	MY_ATTRIBUTE((nonnull(1)));
338 
339 /********************************************************************//**
340 Write a merge block to the file system.
341 @return whether the request was completed successfully
342 @retval	false	on error
343 @retval	true	on success */
344 UNIV_INTERN
345 bool
346 row_merge_write(
347 /*============*/
348 	const pfs_os_file_t&	fd,	/*!< in: file descriptor */
349 	ulint		offset,	/*!< in: offset where to write,
350 				in number of row_merge_block_t elements */
351 	const void*	buf,	/*!< in: data */
352 	void*		crypt_buf,		/*!< in: crypt buf or NULL */
353 	ulint		space)			/*!< in: space id */
354 	MY_ATTRIBUTE((warn_unused_result));
355 
356 /********************************************************************//**
357 Empty a sort buffer.
358 @return sort buffer */
359 row_merge_buf_t*
360 row_merge_buf_empty(
361 /*================*/
362 	row_merge_buf_t*	buf)	/*!< in,own: sort buffer */
363 	MY_ATTRIBUTE((warn_unused_result, nonnull));
364 
365 /** Create a merge file in the given location.
366 @param[out]	merge_file	merge file structure
367 @param[in]	path		location for creating temporary file, or NULL
368 @return file descriptor, or -1 on failure */
369 pfs_os_file_t
370 row_merge_file_create(
371 	merge_file_t*	merge_file,
372 	const char*	path)
373 	MY_ATTRIBUTE((warn_unused_result, nonnull(1)));
374 
375 /** Merge disk files.
376 @param[in]	trx	transaction
377 @param[in]	dup	descriptor of index being created
378 @param[in,out]	file	file containing index entries
379 @param[in,out]	block	3 buffers
380 @param[in,out]	tmpfd	temporary file handle
381 @param[in]      update_progress true, if we should update progress status
382 @param[in]      pct_progress total progress percent until now
383 @param[in]      pct_ocst current progress percent
384 @param[in]      crypt_block crypt buf or NULL
385 @param[in]      space    space_id
386 @param[in,out]	stage	performance schema accounting object, used by
387 ALTER TABLE. If not NULL, stage->begin_phase_sort() will be called initially
388 and then stage->inc() will be called for each record processed.
389 @return DB_SUCCESS or error code */
390 dberr_t
391 row_merge_sort(
392 /*===========*/
393 	trx_t*			trx,
394 	const row_merge_dup_t*	dup,
395 	merge_file_t*		file,
396 	row_merge_block_t*	block,
397 	pfs_os_file_t*		tmpfd,
398 	const bool		update_progress,
399 	const double	pct_progress,
400 	const double	pct_cost,
401 	row_merge_block_t*	crypt_block,
402 	ulint			space,
403 	ut_stage_alter_t*	stage = NULL)
404 	MY_ATTRIBUTE((warn_unused_result));
405 
406 /*********************************************************************//**
407 Allocate a sort buffer.
408 @return own: sort buffer */
409 row_merge_buf_t*
410 row_merge_buf_create(
411 /*=================*/
412 	dict_index_t*	index)	/*!< in: secondary index */
413 	MY_ATTRIBUTE((warn_unused_result, nonnull, malloc));
414 
415 /*********************************************************************//**
416 Deallocate a sort buffer. */
417 void
418 row_merge_buf_free(
419 /*===============*/
420 	row_merge_buf_t*	buf)	/*!< in,own: sort buffer to be freed */
421 	MY_ATTRIBUTE((nonnull));
422 
423 /*********************************************************************//**
424 Destroy a merge file. */
425 void
426 row_merge_file_destroy(
427 /*===================*/
428 	merge_file_t*	merge_file)	/*!< in/out: merge file structure */
429 	MY_ATTRIBUTE((nonnull));
430 
431 /** Read a merge block from the file system.
432 @return whether the request was completed successfully */
433 bool
434 row_merge_read(
435 /*===========*/
436 	const pfs_os_file_t&	fd,	/*!< in: file descriptor */
437 	ulint			offset,	/*!< in: offset where to read
438 					in number of row_merge_block_t
439 					elements */
440 	row_merge_block_t*	buf,	/*!< out: data */
441 	row_merge_block_t*	crypt_buf, /*!< in: crypt buf or NULL */
442 	ulint			space)	   /*!< in: space id */
443 	MY_ATTRIBUTE((warn_unused_result));
444 
445 /********************************************************************//**
446 Read a merge record.
447 @return pointer to next record, or NULL on I/O error or end of list */
448 const byte*
449 row_merge_read_rec(
450 /*===============*/
451 	row_merge_block_t*	block,	/*!< in/out: file buffer */
452 	mrec_buf_t*		buf,	/*!< in/out: secondary buffer */
453 	const byte*		b,	/*!< in: pointer to record */
454 	const dict_index_t*	index,	/*!< in: index of the record */
455 	const pfs_os_file_t&	fd,	/*!< in: file descriptor */
456 	ulint*			foffs,	/*!< in/out: file offset */
457 	const mrec_t**		mrec,	/*!< out: pointer to merge record,
458 					or NULL on end of list
459 					(non-NULL on I/O error) */
460 	rec_offs*		offsets,/*!< out: offsets of mrec */
461 	row_merge_block_t*	crypt_block, /*!< in: crypt buf or NULL */
462 	ulint			space)	   /*!< in: space id */
463 	MY_ATTRIBUTE((warn_unused_result));
464 #endif /* row0merge.h */
465