1 /*****************************************************************************
2 
3 Copyright (c) 2013, 2021, Oracle and/or its affiliates.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file row/row0trunc.cc
29 TRUNCATE implementation
30 
31 Created 2013-04-12 Sunny Bains
32 *******************************************************/
33 
34 #include "row0mysql.h"
35 #include "pars0pars.h"
36 #include "dict0crea.h"
37 #include "dict0boot.h"
38 #include "dict0stats.h"
39 #include "dict0stats_bg.h"
40 #include "lock0lock.h"
41 #include "fts0fts.h"
42 #include "fsp0sysspace.h"
43 #include "srv0start.h"
44 #include "row0trunc.h"
45 #include "os0file.h"
46 #include <vector>
47 
48 bool	truncate_t::s_fix_up_active = false;
49 truncate_t::tables_t		truncate_t::s_tables;
50 truncate_t::truncated_tables_t	truncate_t::s_truncated_tables;
51 
52 /**
53 Iterator over the the raw records in an index, doesn't support MVCC. */
54 class IndexIterator {
55 
56 public:
57 	/**
58 	Iterate over an indexes records
59 	@param index		index to iterate over */
IndexIterator(dict_index_t * index)60 	explicit IndexIterator(dict_index_t* index)
61 		:
62 		m_index(index)
63 	{
64 		/* Do nothing */
65 	}
66 
67 	/**
68 	Search for key. Position the cursor on a record GE key.
69 	@return DB_SUCCESS or error code. */
search(dtuple_t & key,bool noredo)70 	dberr_t search(dtuple_t& key, bool noredo)
71 	{
72 		mtr_start(&m_mtr);
73 
74 		if (noredo) {
75 			mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO);
76 		}
77 
78 		btr_pcur_open_on_user_rec(
79 			m_index,
80 			&key,
81 			PAGE_CUR_GE,
82 			BTR_MODIFY_LEAF,
83 			&m_pcur, &m_mtr);
84 
85 		return(DB_SUCCESS);
86 	}
87 
88 	/**
89 	Iterate over all the records
90 	@return DB_SUCCESS or error code */
91 	template <typename Callback>
for_each(Callback & callback)92 	dberr_t for_each(Callback& callback)
93 	{
94 		dberr_t	err = DB_SUCCESS;
95 
96 		for (;;) {
97 
98 			if (!btr_pcur_is_on_user_rec(&m_pcur)
99 			    || !callback.match(&m_mtr, &m_pcur)) {
100 
101 				/* The end of of the index has been reached. */
102 				err = DB_END_OF_INDEX;
103 				break;
104 			}
105 
106 			rec_t*	rec = btr_pcur_get_rec(&m_pcur);
107 
108 			if (!rec_get_deleted_flag(rec, FALSE)) {
109 
110 				err = callback(&m_mtr, &m_pcur);
111 
112 				if (err != DB_SUCCESS) {
113 					break;
114 				}
115 			}
116 
117 			btr_pcur_move_to_next_user_rec(&m_pcur, &m_mtr);
118 		}
119 
120 		btr_pcur_close(&m_pcur);
121 		mtr_commit(&m_mtr);
122 
123 		return(err == DB_END_OF_INDEX ? DB_SUCCESS : err);
124 	}
125 
126 private:
127 	// Disable copying
128 	IndexIterator(const IndexIterator&);
129 	IndexIterator& operator=(const IndexIterator&);
130 
131 private:
132 	mtr_t		m_mtr;
133 	btr_pcur_t	m_pcur;
134 	dict_index_t*	m_index;
135 };
136 
137 /** SysIndex table iterator, iterate over records for a table. */
138 class SysIndexIterator {
139 
140 public:
141 	/**
142 	Iterate over all the records that match the table id.
143 	@return DB_SUCCESS or error code */
144 	template <typename Callback>
for_each(Callback & callback) const145 	dberr_t for_each(Callback& callback) const
146 	{
147 		dict_index_t*	sys_index;
148 		byte		buf[DTUPLE_EST_ALLOC(1)];
149 		dtuple_t*	tuple =
150 			dtuple_create_from_mem(buf, sizeof(buf), 1, 0);
151 		dfield_t*	dfield = dtuple_get_nth_field(tuple, 0);
152 
153 		dfield_set_data(
154 			dfield,
155 			callback.table_id(),
156 			sizeof(*callback.table_id()));
157 
158 		sys_index = dict_table_get_first_index(dict_sys->sys_indexes);
159 
160 		dict_index_copy_types(tuple, sys_index, 1);
161 
162 		IndexIterator	iterator(sys_index);
163 
164 		/* Search on the table id and position the cursor
165 		on GE table_id. */
166 		iterator.search(*tuple, callback.get_logging_status());
167 
168 		return(iterator.for_each(callback));
169 	}
170 };
171 
172 /** Generic callback abstract class. */
173 class Callback
174 {
175 
176 public:
177 	/**
178 	Constructor
179 	@param	table_id		id of the table being operated.
180 	@param	noredo			if true turn off logging. */
Callback(table_id_t table_id,bool noredo)181 	Callback(table_id_t table_id, bool noredo)
182 		:
183 		m_id(),
184 		m_noredo(noredo)
185 	{
186 		/* Convert to storage byte order. */
187 		mach_write_to_8(&m_id, table_id);
188 	}
189 
190 	/**
191 	Destructor */
~Callback()192 	virtual ~Callback()
193 	{
194 		/* Do nothing */
195 	}
196 
197 	/**
198 	@param mtr		mini-transaction covering the iteration
199 	@param pcur		persistent cursor used for iteration
200 	@return true if the table id column matches. */
match(mtr_t * mtr,btr_pcur_t * pcur) const201 	bool match(mtr_t* mtr, btr_pcur_t* pcur) const
202 	{
203 		ulint		len;
204 		const byte*	field;
205 		rec_t*		rec = btr_pcur_get_rec(pcur);
206 
207 		field = rec_get_nth_field_old(
208 			rec, DICT_FLD__SYS_INDEXES__TABLE_ID, &len);
209 
210 		ut_ad(len == 8);
211 
212 		return(memcmp(&m_id, field, len) == 0);
213 	}
214 
215 	/**
216 	@return pointer to table id storage format buffer */
table_id() const217 	const table_id_t* table_id() const
218 	{
219 		return(&m_id);
220 	}
221 
222 	/**
223 	@return	return if logging needs to be turned off. */
get_logging_status() const224 	bool get_logging_status() const
225 	{
226 		return(m_noredo);
227 	}
228 
229 protected:
230 	// Disably copying
231 	Callback(const Callback&);
232 	Callback& operator=(const Callback&);
233 
234 protected:
235 	/** Table id in storage format */
236 	table_id_t		m_id;
237 
238 	/** Turn off logging. */
239 	const bool		m_noredo;
240 };
241 
242 /**
243 Creates a TRUNCATE log record with space id, table name, data directory path,
244 tablespace flags, table format, index ids, index types, number of index fields
245 and index field information of the table. */
246 class TruncateLogger : public Callback {
247 
248 public:
249 	/**
250 	Constructor
251 
252 	@param table	Table to truncate
253 	@param flags	tablespace falgs */
TruncateLogger(dict_table_t * table,ulint flags,table_id_t new_table_id)254 	TruncateLogger(
255 		dict_table_t*	table,
256 		ulint		flags,
257 		table_id_t	new_table_id)
258 		:
259 		Callback(table->id, false),
260 		m_table(table),
261 		m_flags(flags),
262 		m_truncate(table->id, new_table_id, table->data_dir_path),
263 		m_log_file_name()
264 	{
265 		/* Do nothing */
266 	}
267 
268 	/**
269 	Initialize Truncate Logger by constructing Truncate Log File Name.
270 
271 	@return DB_SUCCESS or error code. */
init()272 	dberr_t init()
273 	{
274 		/* Construct log file name. */
275 		ulint	log_file_name_buf_sz =
276 			strlen(srv_log_group_home_dir) + 22 + 22 + 1 /* NUL */
277 			+ strlen(TruncateLogger::s_log_prefix)
278 			+ strlen(TruncateLogger::s_log_ext);
279 
280 		m_log_file_name = UT_NEW_ARRAY_NOKEY(char, log_file_name_buf_sz);
281 		if (m_log_file_name == NULL) {
282 			return(DB_OUT_OF_MEMORY);
283 		}
284 		memset(m_log_file_name, 0, log_file_name_buf_sz);
285 
286 		strcpy(m_log_file_name, srv_log_group_home_dir);
287 		ulint	log_file_name_len = strlen(m_log_file_name);
288 		if (m_log_file_name[log_file_name_len - 1]
289 			!= OS_PATH_SEPARATOR) {
290 
291 			m_log_file_name[log_file_name_len]
292 				= OS_PATH_SEPARATOR;
293 			log_file_name_len = strlen(m_log_file_name);
294 		}
295 
296 		ut_snprintf(m_log_file_name + log_file_name_len,
297 			    log_file_name_buf_sz - log_file_name_len,
298 			    "%s%lu_%lu_%s",
299 			    TruncateLogger::s_log_prefix,
300 			    (ulong) m_table->space,
301 			    (ulong) m_table->id,
302 			    TruncateLogger::s_log_ext);
303 
304 		return(DB_SUCCESS);
305 
306 	}
307 
308 	/**
309 	Destructor */
~TruncateLogger()310 	~TruncateLogger()
311 	{
312 		if (m_log_file_name != NULL) {
313 			bool exist;
314 			os_file_delete_if_exists(
315 				innodb_log_file_key, m_log_file_name, &exist);
316 			UT_DELETE_ARRAY(m_log_file_name);
317 			m_log_file_name = NULL;
318 		}
319 	}
320 
321 	/**
322 	@param mtr	mini-transaction covering the read
323 	@param pcur	persistent cursor used for reading
324 	@return DB_SUCCESS or error code */
325 	dberr_t operator()(mtr_t* mtr, btr_pcur_t* pcur);
326 
327 	/** Called after iteratoring over the records.
328 	@return true if invariant satisfied. */
debug() const329 	bool debug() const
330 	{
331 		/* We must find all the index entries on disk. */
332 		return(UT_LIST_GET_LEN(m_table->indexes)
333 		       == m_truncate.indexes());
334 	}
335 
336 	/**
337 	Write the TRUNCATE log
338 	@return DB_SUCCESS or error code */
log() const339 	dberr_t log() const
340 	{
341 		dberr_t	err = DB_SUCCESS;
342 
343 		if (m_log_file_name == 0) {
344 			return(DB_ERROR);
345 		}
346 
347 		bool		ret;
348 		pfs_os_file_t	handle = os_file_create(
349 			innodb_log_file_key, m_log_file_name,
350 			OS_FILE_CREATE, OS_FILE_NORMAL,
351 			OS_LOG_FILE, srv_read_only_mode, &ret);
352 		if (!ret) {
353 			return(DB_IO_ERROR);
354 		}
355 
356 
357 		ulint	sz = UNIV_PAGE_SIZE;
358 		void*	buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE);
359 		if (buf == 0) {
360 			os_file_close(handle);
361 			return(DB_OUT_OF_MEMORY);
362 		}
363 
364 		/* Align the memory for file i/o if we might have O_DIRECT set*/
365 		byte*	log_buf = static_cast<byte*>(
366 			ut_align(buf, UNIV_PAGE_SIZE));
367 
368 		lsn_t	lsn = log_get_lsn();
369 
370 		/* Generally loop should exit in single go but
371 		just for those 1% of rare cases we need to assume
372 		corner case. */
373 		do {
374 			/* First 4 bytes are reserved for magic number
375 			which is currently 0. */
376 			err = m_truncate.write(
377 				log_buf + 4, log_buf + sz - 4,
378 				m_table->space, m_table->name.m_name,
379 				m_flags, m_table->flags, lsn);
380 
381 			DBUG_EXECUTE_IF("ib_err_trunc_oom_logging",
382 					err = DB_FAIL;);
383 
384 			if (err != DB_SUCCESS) {
385 				ut_ad(err == DB_FAIL);
386 				ut_free(buf);
387 				sz *= 2;
388 				buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE);
389 				DBUG_EXECUTE_IF("ib_err_trunc_oom_logging",
390 						ut_free(buf);
391 						buf = 0;);
392 				if (buf == 0) {
393 					os_file_close(handle);
394 					return(DB_OUT_OF_MEMORY);
395 				}
396 				log_buf = static_cast<byte*>(
397 					ut_align(buf, UNIV_PAGE_SIZE));
398 			}
399 
400 		} while (err != DB_SUCCESS);
401 
402 		dberr_t	io_err;
403 
404 		IORequest	request(IORequest::WRITE);
405 
406 		request.disable_compression();
407 
408 		io_err = os_file_write(
409 			request, m_log_file_name, handle, log_buf, 0, sz);
410 
411 		if (io_err != DB_SUCCESS) {
412 
413 			ib::error()
414 				<< "IO: Failed to write the file size to '"
415 				<< m_log_file_name << "'";
416 
417 			/* Preserve the original error code */
418 			if (err == DB_SUCCESS) {
419 				err = io_err;
420 			}
421 		}
422 
423 		os_file_flush(handle);
424 		os_file_close(handle);
425 
426 		ut_free(buf);
427 
428 		/* Why we need MLOG_TRUNCATE when we have truncate_log for
429 		recovery?
430 		- truncate log can protect us if crash happens while truncate
431 		  is active. Once truncate is done truncate log is removed.
432 		- If crash happens post truncate and system is yet to
433 		  checkpoint, on recovery we would see REDO records from action
434 		  before truncate (unless we explicitly checkpoint before
435 		  returning from truncate API. Costly alternative so rejected).
436 		- These REDO records may reference a page that doesn't exist
437 		  post truncate so we need a mechanism to skip all such REDO
438 		  records. MLOG_TRUNCATE records space_id and lsn that exactly
439 		  serve the purpose.
440 		- If checkpoint happens post truncate and crash happens post
441 		  this point then neither MLOG_TRUNCATE nor REDO record
442 		  from action before truncate are accessible. */
443 		if (!is_system_tablespace(m_table->space)) {
444 			mtr_t	mtr;
445 			byte*	log_ptr;
446 
447 			mtr_start(&mtr);
448 
449 			log_ptr = mlog_open(&mtr, 11 + 8);
450 			log_ptr = mlog_write_initial_log_record_low(
451 				MLOG_TRUNCATE, m_table->space, 0,
452 				log_ptr, &mtr);
453 
454 			mach_write_to_8(log_ptr, lsn);
455 			log_ptr += 8;
456 
457 			mlog_close(&mtr, log_ptr);
458 			mtr_commit(&mtr);
459 		}
460 
461 		return(err);
462 	}
463 
464 	/**
465 	Indicate completion of truncate log by writing magic-number.
466 	File will be removed from the system but to protect against
467 	unlink (File-System) anomalies we ensure we write magic-number. */
done()468 	void done()
469 	{
470 		if (m_log_file_name == 0) {
471 			return;
472 		}
473 
474 		bool	ret;
475 		pfs_os_file_t handle = os_file_create_simple_no_error_handling(
476 			innodb_log_file_key, m_log_file_name,
477 			OS_FILE_OPEN, OS_FILE_READ_WRITE,
478 			srv_read_only_mode, &ret);
479 		DBUG_EXECUTE_IF("ib_err_trunc_writing_magic_number",
480 				os_file_close(handle);
481 				ret = false;);
482 		if (!ret) {
483 			ib::error() << "Failed to open truncate log file "
484 				<< m_log_file_name << "."
485 				" If server crashes before truncate log is"
486 				" removed make sure it is manually removed"
487 				" before restarting server";
488 			os_file_delete(innodb_log_file_key, m_log_file_name);
489 			return;
490 		}
491 
492 		byte	buffer[sizeof(TruncateLogger::s_magic)];
493 		mach_write_to_4(buffer, TruncateLogger::s_magic);
494 
495 		dberr_t	err;
496 
497 		IORequest	request(IORequest::WRITE);
498 
499 		request.disable_compression();
500 
501 		err = os_file_write(
502 			request,
503 			m_log_file_name, handle, buffer, 0, sizeof(buffer));
504 
505 		if (err != DB_SUCCESS) {
506 
507 			ib::error()
508 				<< "IO: Failed to write the magic number to '"
509 				<< m_log_file_name << "'";
510 		}
511 
512 		DBUG_EXECUTE_IF("ib_trunc_crash_after_updating_magic_no",
513 				DBUG_SUICIDE(););
514 		os_file_flush(handle);
515 		os_file_close(handle);
516 		DBUG_EXECUTE_IF("ib_trunc_crash_after_logging_complete",
517 				log_buffer_flush_to_disk();
518 				os_thread_sleep(1000000);
519 				DBUG_SUICIDE(););
520 		os_file_delete(innodb_log_file_key, m_log_file_name);
521 	}
522 
523 private:
524 	// Disably copying
525 	TruncateLogger(const TruncateLogger&);
526 	TruncateLogger& operator=(const TruncateLogger&);
527 
528 private:
529 	/** Lookup the index using the index id.
530 	@return index instance if found else NULL */
find(index_id_t id) const531 	const dict_index_t* find(index_id_t id) const
532 	{
533 		for (const dict_index_t* index = UT_LIST_GET_FIRST(
534 				m_table->indexes);
535 		     index != NULL;
536 		     index = UT_LIST_GET_NEXT(indexes, index)) {
537 
538 			if (index->id == id) {
539 				return(index);
540 			}
541 		}
542 
543 		return(NULL);
544 	}
545 
546 private:
547 	/** Table to be truncated */
548 	dict_table_t*		m_table;
549 
550 	/** Tablespace flags */
551 	ulint			m_flags;
552 
553 	/** Collect table to truncate information */
554 	truncate_t		m_truncate;
555 
556 	/** Truncate log file name. */
557 	char*			m_log_file_name;
558 
559 
560 public:
561 	/** Magic Number to indicate truncate action is complete. */
562 	const static ib_uint32_t	s_magic;
563 
564 	/** Truncate Log file Prefix. */
565 	const static char*		s_log_prefix;
566 
567 	/** Truncate Log file Extension. */
568 	const static char*		s_log_ext;
569 };
570 
571 const ib_uint32_t	TruncateLogger::s_magic = 32743712;
572 const char*		TruncateLogger::s_log_prefix = "ib_";
573 const char*		TruncateLogger::s_log_ext = "trunc.log";
574 
575 /**
576 Scan to find out truncate log file from the given directory path.
577 
578 @param dir_path		look for log directory in following path.
579 @param log_files	cache to hold truncate log file name found.
580 @return DB_SUCCESS or error code. */
581 dberr_t
scan(const char * dir_path,trunc_log_files_t & log_files)582 TruncateLogParser::scan(
583 	const char*		dir_path,
584 	trunc_log_files_t&	log_files)
585 {
586 	os_file_dir_t	dir;
587 	os_file_stat_t	fileinfo;
588 	dberr_t		err = DB_SUCCESS;
589 	ulint		ext_len = strlen(TruncateLogger::s_log_ext);
590 	ulint		prefix_len = strlen(TruncateLogger::s_log_prefix);
591 	ulint		dir_len = strlen(dir_path);
592 
593 	/* Scan and look out for the truncate log files. */
594 	dir = os_file_opendir(dir_path, true);
595 	if (dir == NULL) {
596 		return(DB_IO_ERROR);
597 	}
598 
599 	while (fil_file_readdir_next_file(
600 			&err, dir_path, dir, &fileinfo) == 0) {
601 
602 		ulint nm_len = strlen(fileinfo.name);
603 
604 		if (fileinfo.type == OS_FILE_TYPE_FILE
605 		    && nm_len > ext_len + prefix_len
606 		    && (0 == strncmp(fileinfo.name + nm_len - ext_len,
607 				     TruncateLogger::s_log_ext, ext_len))
608 		    && (0 == strncmp(fileinfo.name,
609 				     TruncateLogger::s_log_prefix,
610 				     prefix_len))) {
611 
612 			if (fileinfo.size == 0) {
613 				/* Truncate log not written. Remove the file. */
614 				os_file_delete(
615 					innodb_log_file_key, fileinfo.name);
616 				continue;
617 			}
618 
619 			/* Construct file name by appending directory path */
620 			ulint	sz = dir_len + 22 + 22 + 1 + ext_len + prefix_len;
621 			char*	log_file_name = UT_NEW_ARRAY_NOKEY(char, sz);
622 			if (log_file_name == NULL) {
623 				err = DB_OUT_OF_MEMORY;
624 				break;
625 			}
626 			memset(log_file_name, 0, sz);
627 
628 			memcpy(log_file_name, dir_path, dir_len);
629 			ulint	log_file_name_len = strlen(log_file_name);
630 			if (log_file_name[log_file_name_len - 1]
631 				!= OS_PATH_SEPARATOR) {
632 
633 				log_file_name[log_file_name_len]
634 					= OS_PATH_SEPARATOR;
635 				log_file_name_len = strlen(log_file_name);
636 			}
637 			strcat(log_file_name, fileinfo.name);
638 			log_files.push_back(log_file_name);
639 		}
640 	}
641 
642 	os_file_closedir(dir);
643 
644 	return(err);
645 }
646 
647 /**
648 Parse the log file and populate table to truncate information.
649 (Add this table to truncate information to central vector that is then
650  used by truncate fix-up routine to fix-up truncate action of the table.)
651 
652 @param	log_file_name	log file to parse
653 @return DB_SUCCESS or error code. */
654 dberr_t
parse(const char * log_file_name)655 TruncateLogParser::parse(
656 	const char*	log_file_name)
657 {
658 	dberr_t		err = DB_SUCCESS;
659 	truncate_t*	truncate = NULL;
660 
661 	/* Open the file and read magic-number to findout if truncate action
662 	was completed. */
663 	bool		ret;
664 	pfs_os_file_t	handle = os_file_create_simple(
665 		innodb_log_file_key, log_file_name,
666 		OS_FILE_OPEN, OS_FILE_READ_ONLY, srv_read_only_mode, &ret);
667 	if (!ret) {
668 		ib::error() << "Error opening truncate log file: "
669 			<< log_file_name;
670 		return(DB_IO_ERROR);
671 	}
672 
673 	ulint	sz = UNIV_PAGE_SIZE;
674 	void*	buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE);
675 	if (buf == 0) {
676 		os_file_close(handle);
677 		return(DB_OUT_OF_MEMORY);
678 	}
679 
680 	IORequest	request(IORequest::READ);
681 
682 	request.disable_compression();
683 
684 	/* Align the memory for file i/o if we might have O_DIRECT set*/
685 	byte*	log_buf = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE));
686 
687 	do {
688 		err = os_file_read(request, handle, log_buf, 0, sz);
689 
690 		if (err != DB_SUCCESS) {
691 			os_file_close(handle);
692 			break;
693 		}
694 
695 		ulint	magic_n = mach_read_from_4(log_buf);
696 		if (magic_n == TruncateLogger::s_magic) {
697 
698 			/* Truncate action completed. Avoid parsing the file. */
699 			os_file_close(handle);
700 
701 			os_file_delete(innodb_log_file_key, log_file_name);
702 			break;
703 		}
704 
705 		if (truncate == NULL) {
706 			truncate = UT_NEW_NOKEY(truncate_t(log_file_name));
707 			if (truncate == NULL) {
708 				os_file_close(handle);
709 				err = DB_OUT_OF_MEMORY;
710 				break;
711 			}
712 		}
713 
714 		err = truncate->parse(log_buf + 4, log_buf + sz - 4);
715 
716 		if (err != DB_SUCCESS) {
717 
718 			ut_ad(err == DB_FAIL);
719 
720 			ut_free(buf);
721 			buf = 0;
722 
723 			sz *= 2;
724 
725 			buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE);
726 
727 			if (buf == 0) {
728 				os_file_close(handle);
729 				err = DB_OUT_OF_MEMORY;
730 				UT_DELETE(truncate);
731 				truncate = NULL;
732 				break;
733 			}
734 
735 			log_buf = static_cast<byte*>(
736 				ut_align(buf, UNIV_PAGE_SIZE));
737 		}
738 	} while (err != DB_SUCCESS);
739 
740 	ut_free(buf);
741 
742 	if (err == DB_SUCCESS && truncate != NULL) {
743 		truncate_t::add(truncate);
744 		os_file_close(handle);
745 	}
746 
747 	return(err);
748 }
749 
750 /**
751 Scan and Parse truncate log files.
752 
753 @param dir_path		look for log directory in following path
754 @return DB_SUCCESS or error code. */
755 dberr_t
scan_and_parse(const char * dir_path)756 TruncateLogParser::scan_and_parse(
757 	const char*	dir_path)
758 {
759 	dberr_t			err;
760 	trunc_log_files_t	log_files;
761 
762 	/* Scan and trace all the truncate log files. */
763 	err = TruncateLogParser::scan(dir_path, log_files);
764 
765 	/* Parse truncate lof files if scan was successful. */
766 	if (err == DB_SUCCESS) {
767 
768 		for (ulint i = 0;
769 		     i < log_files.size() && err == DB_SUCCESS;
770 		     i++) {
771 			err = TruncateLogParser::parse(log_files[i]);
772 		}
773 	}
774 
775 	trunc_log_files_t::const_iterator end = log_files.end();
776 	for (trunc_log_files_t::const_iterator it = log_files.begin();
777 	     it != end;
778 	     ++it) {
779 		if (*it != NULL) {
780 			UT_DELETE_ARRAY(*it);
781 		}
782 	}
783 	log_files.clear();
784 
785 	return(err);
786 }
787 
788 /** Callback to drop indexes during TRUNCATE */
789 class DropIndex : public Callback {
790 
791 public:
792 	/**
793 	Constructor
794 
795 	@param[in,out]	table	Table to truncate
796 	@param[in]	noredo	whether to disable redo logging */
DropIndex(dict_table_t * table,bool noredo)797 	DropIndex(dict_table_t* table, bool noredo)
798 		:
799 		Callback(table->id, noredo),
800 		m_table(table)
801 	{
802 		/* No op */
803 	}
804 
805 	/**
806 	@param mtr	mini-transaction covering the read
807 	@param pcur	persistent cursor used for reading
808 	@return DB_SUCCESS or error code */
809 	dberr_t operator()(mtr_t* mtr, btr_pcur_t* pcur) const;
810 
811 private:
812 	/** Table to be truncated */
813 	dict_table_t*		m_table;
814 };
815 
816 /** Callback to create the indexes during TRUNCATE */
817 class CreateIndex : public Callback {
818 
819 public:
820 	/**
821 	Constructor
822 
823 	@param[in,out]	table	Table to truncate
824 	@param[in]	noredo	whether to disable redo logging */
CreateIndex(dict_table_t * table,bool noredo)825 	CreateIndex(dict_table_t* table, bool noredo)
826 		:
827 		Callback(table->id, noredo),
828 		m_table(table)
829 	{
830 		/* No op */
831 	}
832 
833 	/**
834 	Create the new index and update the root page number in the
835 	SysIndex table.
836 
837 	@param mtr	mini-transaction covering the read
838 	@param pcur	persistent cursor used for reading
839 	@return DB_SUCCESS or error code */
840 	dberr_t operator()(mtr_t* mtr, btr_pcur_t* pcur) const;
841 
842 private:
843 	// Disably copying
844 	CreateIndex(const CreateIndex&);
845 	CreateIndex& operator=(const CreateIndex&);
846 
847 private:
848 	/** Table to be truncated */
849 	dict_table_t*		m_table;
850 };
851 
852 /** Check for presence of table-id in SYS_XXXX tables. */
853 class TableLocator : public Callback {
854 
855 public:
856 	/**
857 	Constructor
858 	@param table_id	table_id to look for */
TableLocator(table_id_t table_id)859 	explicit TableLocator(table_id_t table_id)
860 		:
861 		Callback(table_id, false),
862 		m_table_found()
863 	{
864 		/* No op */
865 	}
866 
867 	/**
868 	@return true if table is found */
is_table_found() const869 	bool is_table_found() const
870 	{
871 		return(m_table_found);
872 	}
873 
874 	/**
875 	Look for table-id in SYS_XXXX tables without loading the table.
876 
877 	@param mtr	mini-transaction covering the read
878 	@param pcur	persistent cursor used for reading
879 	@return DB_SUCCESS or error code */
880 	dberr_t operator()(mtr_t* mtr, btr_pcur_t* pcur);
881 
882 private:
883 	// Disably copying
884 	TableLocator(const TableLocator&);
885 	TableLocator& operator=(const TableLocator&);
886 
887 private:
888 	/** Set to true if table is present */
889 	bool			m_table_found;
890 };
891 
892 /**
893 @param mtr	mini-transaction covering the read
894 @param pcur	persistent cursor used for reading
895 @return DB_SUCCESS or error code */
896 dberr_t
operator ()(mtr_t * mtr,btr_pcur_t * pcur)897 TruncateLogger::operator()(mtr_t* mtr, btr_pcur_t* pcur)
898 {
899 	ulint			len;
900 	const byte*		field;
901 	rec_t*			rec = btr_pcur_get_rec(pcur);
902 	truncate_t::index_t	index;
903 
904 	field = rec_get_nth_field_old(
905 		rec, DICT_FLD__SYS_INDEXES__TYPE, &len);
906 	ut_ad(len == 4);
907 	index.m_type = mach_read_from_4(field);
908 
909 	field = rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__ID, &len);
910 	ut_ad(len == 8);
911 	index.m_id = mach_read_from_8(field);
912 
913 	field = rec_get_nth_field_old(
914 			rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len);
915 	ut_ad(len == 4);
916 	index.m_root_page_no = mach_read_from_4(field);
917 
918 	/* For compressed tables we need to store extra meta-data
919 	required during btr_create(). */
920 	if (fsp_flags_is_compressed(m_flags)) {
921 
922 		const dict_index_t* dict_index = find(index.m_id);
923 
924 		if (dict_index != NULL) {
925 
926 			dberr_t err = index.set(dict_index);
927 
928 			if (err != DB_SUCCESS) {
929 				m_truncate.clear();
930 				return(err);
931 			}
932 
933 		} else {
934 			ib::warn() << "Index id " << index.m_id
935 				<< " not found";
936 		}
937 	}
938 
939 	m_truncate.add(index);
940 
941 	return(DB_SUCCESS);
942 }
943 
944 /**
945 Drop an index in the table.
946 
947 @param mtr	mini-transaction covering the read
948 @param pcur	persistent cursor used for reading
949 @return DB_SUCCESS or error code */
950 dberr_t
operator ()(mtr_t * mtr,btr_pcur_t * pcur) const951 DropIndex::operator()(mtr_t* mtr, btr_pcur_t* pcur) const
952 {
953 	rec_t*	rec = btr_pcur_get_rec(pcur);
954 
955 	bool	freed = dict_drop_index_tree(rec, pcur, mtr);
956 
957 #ifdef UNIV_DEBUG
958 	{
959 		ulint		len;
960 		const byte*	field;
961 		ulint		index_type;
962 
963 		field = rec_get_nth_field_old(
964 			btr_pcur_get_rec(pcur), DICT_FLD__SYS_INDEXES__TYPE,
965 			&len);
966 		ut_ad(len == 4);
967 
968 		index_type = mach_read_from_4(field);
969 
970 		if (index_type & DICT_CLUSTERED) {
971 			/* Clustered index */
972 			DBUG_EXECUTE_IF("ib_trunc_crash_on_drop_of_clust_index",
973 					log_buffer_flush_to_disk();
974 					os_thread_sleep(2000000);
975 					DBUG_SUICIDE(););
976 		} else if (index_type & DICT_UNIQUE) {
977 			/* Unique index */
978 			DBUG_EXECUTE_IF("ib_trunc_crash_on_drop_of_uniq_index",
979 					log_buffer_flush_to_disk();
980 					os_thread_sleep(2000000);
981 					DBUG_SUICIDE(););
982 		} else if (index_type == 0) {
983 			/* Secondary index */
984 			DBUG_EXECUTE_IF("ib_trunc_crash_on_drop_of_sec_index",
985 					log_buffer_flush_to_disk();
986 					os_thread_sleep(2000000);
987 					DBUG_SUICIDE(););
988 		}
989 	}
990 #endif /* UNIV_DEBUG */
991 
992 	DBUG_EXECUTE_IF("ib_err_trunc_drop_index",
993 			freed = false;);
994 
995 	if (freed) {
996 
997 		/* We will need to commit and restart the
998 		mini-transaction in order to avoid deadlocks.
999 		The dict_drop_index_tree() call has freed
1000 		a page in this mini-transaction, and the rest
1001 		of this loop could latch another index page.*/
1002 		const mtr_log_t log_mode = mtr->get_log_mode();
1003 		mtr_commit(mtr);
1004 
1005 		mtr_start(mtr);
1006 		mtr->set_log_mode(log_mode);
1007 
1008 		btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);
1009 	} else {
1010 		/* Check if the .ibd file is missing. */
1011 		bool	found;
1012 
1013 		fil_space_get_page_size(m_table->space, &found);
1014 
1015 		DBUG_EXECUTE_IF("ib_err_trunc_drop_index",
1016 				found = false;);
1017 
1018 		if (!found) {
1019 			return(DB_ERROR);
1020 		}
1021 	}
1022 
1023 	return(DB_SUCCESS);
1024 }
1025 
1026 /**
1027 Create the new index and update the root page number in the
1028 SysIndex table.
1029 
1030 @param mtr	mini-transaction covering the read
1031 @param pcur	persistent cursor used for reading
1032 @return DB_SUCCESS or error code */
1033 dberr_t
operator ()(mtr_t * mtr,btr_pcur_t * pcur) const1034 CreateIndex::operator()(mtr_t* mtr, btr_pcur_t* pcur) const
1035 {
1036 	ulint	root_page_no;
1037 
1038 	root_page_no = dict_recreate_index_tree(m_table, pcur, mtr);
1039 
1040 #ifdef UNIV_DEBUG
1041 	{
1042 		ulint		len;
1043 		const byte*	field;
1044 		ulint		index_type;
1045 
1046 		field = rec_get_nth_field_old(
1047 			btr_pcur_get_rec(pcur), DICT_FLD__SYS_INDEXES__TYPE,
1048 			&len);
1049 		ut_ad(len == 4);
1050 
1051 		index_type = mach_read_from_4(field);
1052 
1053 		if (index_type & DICT_CLUSTERED) {
1054 			/* Clustered index */
1055 			DBUG_EXECUTE_IF(
1056 				"ib_trunc_crash_on_create_of_clust_index",
1057 				log_buffer_flush_to_disk();
1058 				os_thread_sleep(2000000);
1059 				DBUG_SUICIDE(););
1060 		} else if (index_type & DICT_UNIQUE) {
1061 			/* Unique index */
1062 			DBUG_EXECUTE_IF(
1063 				"ib_trunc_crash_on_create_of_uniq_index",
1064 				log_buffer_flush_to_disk();
1065 				os_thread_sleep(2000000);
1066 				DBUG_SUICIDE(););
1067 		} else if (index_type == 0) {
1068 			/* Secondary index */
1069 			DBUG_EXECUTE_IF(
1070 				"ib_trunc_crash_on_create_of_sec_index",
1071 				log_buffer_flush_to_disk();
1072 				os_thread_sleep(2000000);
1073 				DBUG_SUICIDE(););
1074 		}
1075 	}
1076 #endif /* UNIV_DEBUG */
1077 
1078 	DBUG_EXECUTE_IF("ib_err_trunc_create_index",
1079 			root_page_no = FIL_NULL;);
1080 
1081 	if (root_page_no != FIL_NULL) {
1082 
1083 		rec_t*	rec = btr_pcur_get_rec(pcur);
1084 
1085 		page_rec_write_field(
1086 			rec, DICT_FLD__SYS_INDEXES__PAGE_NO,
1087 			root_page_no, mtr);
1088 
1089 		/* We will need to commit and restart the
1090 		mini-transaction in order to avoid deadlocks.
1091 		The dict_create_index_tree() call has allocated
1092 		a page in this mini-transaction, and the rest of
1093 		this loop could latch another index page. */
1094 		mtr_commit(mtr);
1095 
1096 		mtr_start(mtr);
1097 
1098 		btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);
1099 
1100 	} else {
1101 		bool	found;
1102 		fil_space_get_page_size(m_table->space, &found);
1103 
1104 		DBUG_EXECUTE_IF("ib_err_trunc_create_index",
1105 				found = false;);
1106 
1107 		if (!found) {
1108 			return(DB_ERROR);
1109 		}
1110 	}
1111 
1112 	return(DB_SUCCESS);
1113 }
1114 
1115 /**
1116 Look for table-id in SYS_XXXX tables without loading the table.
1117 
1118 @param mtr	mini-transaction covering the read
1119 @param pcur	persistent cursor used for reading
1120 @return DB_SUCCESS */
1121 dberr_t
operator ()(mtr_t * mtr,btr_pcur_t * pcur)1122 TableLocator::operator()(mtr_t* mtr, btr_pcur_t* pcur)
1123 {
1124 	m_table_found = true;
1125 
1126 	return(DB_SUCCESS);
1127 }
1128 
1129 /**
1130 Rollback the transaction and release the index locks.
1131 Drop indexes if table is corrupted so that drop/create
1132 sequence works as expected.
1133 
1134 @param table			table to truncate
1135 @param trx			transaction covering the TRUNCATE
1136 @param new_id			new table id that was suppose to get assigned
1137 				to the table if truncate executed successfully.
1138 @param has_internal_doc_id	indicate existence of fts index
1139 @param no_redo			if true, turn-off redo logging
1140 @param corrupted		table corrupted status
1141 @param unlock_index		if true then unlock indexes before action */
1142 static
1143 void
row_truncate_rollback(dict_table_t * table,trx_t * trx,table_id_t new_id,bool has_internal_doc_id,bool no_redo,bool corrupted,bool unlock_index)1144 row_truncate_rollback(
1145 	dict_table_t*	table,
1146 	trx_t*		trx,
1147 	table_id_t	new_id,
1148 	bool		has_internal_doc_id,
1149 	bool		no_redo,
1150 	bool		corrupted,
1151 	bool		unlock_index)
1152 {
1153 	if (unlock_index) {
1154 		dict_table_x_unlock_indexes(table);
1155 	}
1156 
1157 	trx->error_state = DB_SUCCESS;
1158 
1159 	trx_rollback_to_savepoint(trx, NULL);
1160 
1161 	trx->error_state = DB_SUCCESS;
1162 
1163 	if (corrupted && !dict_table_is_temporary(table)) {
1164 
1165 		/* Cleanup action to ensure we don't left over stale entries
1166 		if we are marking table as corrupted. This will ensure
1167 		it can be recovered using drop/create sequence. */
1168 		dict_table_x_lock_indexes(table);
1169 
1170 		DropIndex       dropIndex(table, no_redo);
1171 
1172 		SysIndexIterator().for_each(dropIndex);
1173 
1174 		dict_table_x_unlock_indexes(table);
1175 
1176 		for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
1177 		     index != NULL;
1178 		     index = UT_LIST_GET_NEXT(indexes, index)) {
1179 
1180 			dict_set_corrupted(index, trx, "TRUNCATE TABLE");
1181 		}
1182 
1183 		if (has_internal_doc_id) {
1184 
1185 			ut_ad(!trx_is_started(trx));
1186 
1187 			table_id_t      id = table->id;
1188 
1189 			table->id = new_id;
1190 
1191 			fts_drop_tables(trx, table);
1192 
1193 			table->id = id;
1194 
1195 			ut_ad(trx_is_started(trx));
1196 
1197 			trx_commit_for_mysql(trx);
1198 		}
1199 
1200 	} else if (corrupted && dict_table_is_temporary(table)) {
1201 
1202 		dict_table_x_lock_indexes(table);
1203 
1204 		for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
1205 		     index != NULL;
1206 		     index = UT_LIST_GET_NEXT(indexes, index)) {
1207 
1208 			dict_drop_index_tree_in_mem(index, index->page);
1209 
1210 			index->page = FIL_NULL;
1211 		}
1212 
1213 		dict_table_x_unlock_indexes(table);
1214 	}
1215 
1216 	table->corrupted = corrupted;
1217 }
1218 
1219 /**
1220 Finish the TRUNCATE operations for both commit and rollback.
1221 
1222 @param table		table being truncated
1223 @param trx		transaction covering the truncate
1224 @param fsp_flags	tablespace flags
1225 @param logger		table to truncate information logger
1226 @param err		status of truncate operation
1227 
1228 @return DB_SUCCESS or error code */
1229 static MY_ATTRIBUTE((warn_unused_result))
1230 dberr_t
row_truncate_complete(dict_table_t * table,trx_t * trx,ulint fsp_flags,TruncateLogger * & logger,dberr_t err)1231 row_truncate_complete(
1232 	dict_table_t*		table,
1233 	trx_t*			trx,
1234 	ulint			fsp_flags,
1235 	TruncateLogger*		&logger,
1236 	dberr_t			err)
1237 {
1238 	bool	is_file_per_table = dict_table_is_file_per_table(table);
1239 
1240 	if (table->memcached_sync_count == DICT_TABLE_IN_DDL) {
1241 		/* We need to set the memcached sync back to 0, unblock
1242 		memcached operations. */
1243 		table->memcached_sync_count = 0;
1244 	}
1245 
1246 	/* Add the table back to FTS optimize background thread. */
1247 	if (table->fts) {
1248 		fts_optimize_add_table(table);
1249 	}
1250 
1251 	row_mysql_unlock_data_dictionary(trx);
1252 
1253 	DEBUG_SYNC_C("ib_trunc_table_trunc_completing");
1254 
1255 	if (!dict_table_is_temporary(table)) {
1256 
1257 		DBUG_EXECUTE_IF("ib_trunc_crash_before_log_removal",
1258 				log_buffer_flush_to_disk();
1259 				os_thread_sleep(500000);
1260 				DBUG_SUICIDE(););
1261 
1262 		/* Note: We don't log-checkpoint instead we have written
1263 		a special REDO log record MLOG_TRUNCATE that is used to
1264 		avoid applying REDO records before truncate for crash
1265 		that happens post successful truncate completion. */
1266 
1267 		if (logger != NULL) {
1268 			logger->done();
1269 			UT_DELETE(logger);
1270 			logger = NULL;
1271 		}
1272 	}
1273 
1274 	/* If non-temp file-per-table tablespace... */
1275 	if (is_file_per_table
1276 	    && !dict_table_is_temporary(table)
1277 	    && fsp_flags != ULINT_UNDEFINED) {
1278 
1279 		/* This function will reset back the stop_new_ops
1280 		and is_being_truncated so that fil-ops can re-start. */
1281 		dberr_t err2 = truncate_t::truncate(
1282 			table->space,
1283 			table->data_dir_path,
1284 			table->name.m_name, fsp_flags, false);
1285 
1286 		if (err2 != DB_SUCCESS) {
1287 			return(err2);
1288 		}
1289 	}
1290 
1291 	if (err == DB_SUCCESS) {
1292 		dict_stats_update(table, DICT_STATS_EMPTY_TABLE);
1293 	}
1294 
1295 	trx->op_info = "";
1296 
1297 	/* For temporary tables or if there was an error, we need to reset
1298 	the dict operation flags. */
1299 	trx->ddl = false;
1300 	trx->dict_operation = TRX_DICT_OP_NONE;
1301 
1302 	ut_ad(!trx_is_started(trx));
1303 
1304 	srv_wake_master_thread();
1305 
1306 	DBUG_EXECUTE_IF("ib_trunc_crash_after_truncate_done",
1307 			DBUG_SUICIDE(););
1308 
1309 	return(err);
1310 }
1311 
1312 /**
1313 Handle FTS truncate issues.
1314 @param table		table being truncated
1315 @param new_id		new id for the table
1316 @param trx		transaction covering the truncate
1317 @return DB_SUCCESS or error code. */
1318 static MY_ATTRIBUTE((warn_unused_result))
1319 dberr_t
row_truncate_fts(dict_table_t * table,table_id_t new_id,trx_t * trx)1320 row_truncate_fts(
1321 	dict_table_t*	table,
1322 	table_id_t	new_id,
1323 	trx_t*		trx)
1324 {
1325 	dict_table_t	fts_table;
1326 
1327 	fts_table.id = new_id;
1328 	fts_table.name = table->name;
1329 	fts_table.flags2 = table->flags2;
1330 	fts_table.flags = table->flags;
1331 	fts_table.tablespace = table->tablespace;
1332 	fts_table.space = table->space;
1333 
1334 	/* table->data_dir_path is used for FTS AUX table
1335 	creation. */
1336 	if (DICT_TF_HAS_DATA_DIR(table->flags)
1337 	    && table->data_dir_path == NULL) {
1338 		dict_get_and_save_data_dir_path(table, true);
1339 		ut_ad(table->data_dir_path != NULL);
1340 	}
1341 
1342 	/* table->tablespace() may not be always populated or
1343 	if table->tablespace() uses "innodb_general" name,
1344 	fetch the real name. */
1345 	if (DICT_TF_HAS_SHARED_SPACE(table->flags)
1346 	    && (table->tablespace() == NULL
1347 		|| dict_table_has_temp_general_tablespace_name(
1348 			table->tablespace()))) {
1349 		dict_get_and_save_space_name(table, true);
1350 		ut_ad(table->tablespace() != NULL);
1351 		ut_ad(!dict_table_has_temp_general_tablespace_name(
1352 			table->tablespace()));
1353 	}
1354 
1355 	fts_table.tablespace = table->tablespace();
1356 	fts_table.data_dir_path = table->data_dir_path;
1357 
1358 	dberr_t		err;
1359 
1360 	err = fts_create_common_tables(
1361 		trx, &fts_table, table->name.m_name, TRUE);
1362 
1363 	for (ulint i = 0;
1364 	     i < ib_vector_size(table->fts->indexes) && err == DB_SUCCESS;
1365 	     i++) {
1366 
1367 		dict_index_t*	fts_index;
1368 
1369 		fts_index = static_cast<dict_index_t*>(
1370 			ib_vector_getp(table->fts->indexes, i));
1371 
1372 		err = fts_create_index_tables_low(
1373 			trx, fts_index, table->name.m_name, new_id);
1374 	}
1375 
1376 	DBUG_EXECUTE_IF("ib_err_trunc_during_fts_trunc",
1377 			err = DB_ERROR;);
1378 
1379 	if (err != DB_SUCCESS) {
1380 
1381 		trx->error_state = DB_SUCCESS;
1382 		trx_rollback_to_savepoint(trx, NULL);
1383 		trx->error_state = DB_SUCCESS;
1384 
1385 		ib::error() << "Unable to truncate FTS index for table "
1386 			<< table->name;
1387 	} else {
1388 
1389 		ut_ad(trx_is_started(trx));
1390 	}
1391 
1392 	return(err);
1393 }
1394 
1395 /**
1396 Update system table to reflect new table id.
1397 @param old_table_id		old table id
1398 @param new_table_id		new table id
1399 @param reserve_dict_mutex	if TRUE, acquire/release
1400 				dict_sys->mutex around call to pars_sql.
1401 @param trx			transaction
1402 @return error code or DB_SUCCESS */
1403 static MY_ATTRIBUTE((warn_unused_result))
1404 dberr_t
row_truncate_update_table_id(table_id_t old_table_id,table_id_t new_table_id,ibool reserve_dict_mutex,trx_t * trx)1405 row_truncate_update_table_id(
1406 	table_id_t	old_table_id,
1407 	table_id_t	new_table_id,
1408 	ibool		reserve_dict_mutex,
1409 	trx_t*		trx)
1410 {
1411 	pars_info_t*	info	= NULL;
1412 	dberr_t		err	= DB_SUCCESS;
1413 
1414 	/* Scan the SYS_XXXX table and update to reflect new table-id. */
1415 	info = pars_info_create();
1416 	pars_info_add_ull_literal(info, "old_id", old_table_id);
1417 	pars_info_add_ull_literal(info, "new_id", new_table_id);
1418 
1419 	/* As micro-SQL does not support int4 == int8 comparisons,
1420 	old and new IDs are added again under different names as
1421 	int4 values */
1422 	pars_info_add_int4_literal(info, "old_id_narrow", old_table_id);
1423 	pars_info_add_int4_literal(info, "new_id_narrow", new_table_id);
1424 
1425 	err = que_eval_sql(
1426 		info,
1427 		"PROCEDURE RENUMBER_TABLE_ID_PROC () IS\n"
1428 		"BEGIN\n"
1429 		"UPDATE SYS_TABLES"
1430 		" SET ID = :new_id\n"
1431 		" WHERE ID = :old_id;\n"
1432 		"UPDATE SYS_COLUMNS SET TABLE_ID = :new_id\n"
1433 		" WHERE TABLE_ID = :old_id;\n"
1434 		"UPDATE SYS_INDEXES"
1435 		" SET TABLE_ID = :new_id\n"
1436 		" WHERE TABLE_ID = :old_id;\n"
1437 		"UPDATE SYS_VIRTUAL"
1438 		" SET TABLE_ID = :new_id\n"
1439 		" WHERE TABLE_ID = :old_id;\n"
1440 		"UPDATE SYS_ZIP_DICT_COLS\n"
1441 		" SET TABLE_ID = :new_id_narrow\n"
1442 		" WHERE TABLE_ID = :old_id_narrow;\n"
1443 		"END;\n", reserve_dict_mutex, trx);
1444 
1445 	return(err);
1446 }
1447 
1448 /**
1449 Get the table id to truncate.
1450 @param truncate_t		old/new table id of table to truncate
1451 @return table_id_t		table_id to use in SYS_XXXX table update. */
1452 static MY_ATTRIBUTE((warn_unused_result))
1453 table_id_t
row_truncate_get_trunc_table_id(const truncate_t & truncate)1454 row_truncate_get_trunc_table_id(
1455 	const truncate_t&	truncate)
1456 {
1457 	TableLocator tableLocator(truncate.old_table_id());
1458 
1459 	SysIndexIterator().for_each(tableLocator);
1460 
1461 	return(tableLocator.is_table_found() ?
1462 		truncate.old_table_id(): truncate.new_table_id());
1463 }
1464 
1465 /**
1466 Update system table to reflect new table id and root page number.
1467 @param truncate_t		old/new table id of table to truncate
1468 				and updated root_page_no of indexes.
1469 @param new_table_id		new table id
1470 @param reserve_dict_mutex	if TRUE, acquire/release
1471 				dict_sys->mutex around call to pars_sql.
1472 @param mark_index_corrupted	if true, then mark index corrupted.
1473 @return error code or DB_SUCCESS */
1474 static MY_ATTRIBUTE((warn_unused_result))
1475 dberr_t
row_truncate_update_sys_tables_during_fix_up(const truncate_t & truncate,table_id_t new_table_id,ibool reserve_dict_mutex,bool mark_index_corrupted)1476 row_truncate_update_sys_tables_during_fix_up(
1477 	const truncate_t&	truncate,
1478 	table_id_t		new_table_id,
1479 	ibool			reserve_dict_mutex,
1480 	bool			mark_index_corrupted)
1481 {
1482 	trx_t*		trx = trx_allocate_for_background();
1483 
1484 	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
1485 
1486 	table_id_t	table_id = row_truncate_get_trunc_table_id(truncate);
1487 
1488 	/* Step-1: Update the root-page-no */
1489 
1490 	dberr_t	err;
1491 
1492 	err = truncate.update_root_page_no(
1493 		trx, table_id, reserve_dict_mutex, mark_index_corrupted);
1494 
1495 	if (err != DB_SUCCESS) {
1496 		return(err);
1497 	}
1498 
1499 	/* Step-2: Update table-id. */
1500 
1501 	err = row_truncate_update_table_id(
1502 		table_id, new_table_id, reserve_dict_mutex, trx);
1503 
1504 	if (err == DB_SUCCESS) {
1505 		dict_mutex_enter_for_mysql();
1506 
1507 		/* Remove the table with old table_id from cache. */
1508 		dict_table_t*	old_table = dict_table_open_on_id(
1509 			table_id, true, DICT_TABLE_OP_NORMAL);
1510 
1511 		if (old_table != NULL) {
1512 			dict_table_close(old_table, true, false);
1513 			dict_table_remove_from_cache(old_table);
1514 		}
1515 
1516 		/* Open table with new table_id and set table as
1517 		corrupted if it has FTS index. */
1518 
1519 		dict_table_t*	table = dict_table_open_on_id(
1520 			new_table_id, true, DICT_TABLE_OP_NORMAL);
1521 		ut_ad(table->id == new_table_id);
1522 
1523 		bool	has_internal_doc_id =
1524 			dict_table_has_fts_index(table)
1525 			|| DICT_TF2_FLAG_IS_SET(
1526 				table, DICT_TF2_FTS_HAS_DOC_ID);
1527 
1528 		if (has_internal_doc_id) {
1529 			trx->dict_operation_lock_mode = RW_X_LATCH;
1530 			fts_check_corrupt(table, trx);
1531 			trx->dict_operation_lock_mode = 0;
1532 		}
1533 
1534 		dict_table_close(table, true, false);
1535 		dict_mutex_exit_for_mysql();
1536 	}
1537 
1538 	trx_commit_for_mysql(trx);
1539 	trx_free_for_background(trx);
1540 
1541 	return(err);
1542 }
1543 
1544 /**
1545 Truncate also results in assignment of new table id, update the system
1546 SYSTEM TABLES with the new id.
1547 @param table,			table being truncated
1548 @param new_id,			new table id
1549 @param has_internal_doc_id,	has doc col (fts)
1550 @param no_redo			if true, turn-off redo logging
1551 @param trx			transaction handle
1552 @return	error code or DB_SUCCESS */
1553 static MY_ATTRIBUTE((warn_unused_result))
1554 dberr_t
row_truncate_update_system_tables(dict_table_t * table,table_id_t new_id,bool has_internal_doc_id,bool no_redo,trx_t * trx)1555 row_truncate_update_system_tables(
1556 	dict_table_t*	table,
1557 	table_id_t	new_id,
1558 	bool		has_internal_doc_id,
1559 	bool		no_redo,
1560 	trx_t*		trx)
1561 {
1562 	dberr_t		err	= DB_SUCCESS;
1563 
1564 	ut_a(!dict_table_is_temporary(table));
1565 
1566 	err = row_truncate_update_table_id(table->id, new_id, FALSE, trx);
1567 
1568 	DBUG_EXECUTE_IF("ib_err_trunc_during_sys_table_update",
1569 			err = DB_ERROR;);
1570 
1571 	if (err != DB_SUCCESS) {
1572 
1573 		row_truncate_rollback(
1574 			table, trx, new_id, has_internal_doc_id,
1575 			no_redo, true, false);
1576 
1577 		ib::error() << "Unable to assign a new identifier to table "
1578 			<< table->name << " after truncating it. Marked the"
1579 			" table as corrupted. In-memory representation is now"
1580 			" different from the on-disk representation.";
1581 		err = DB_ERROR;
1582 	} else {
1583 		/* Drop the old FTS index */
1584 		if (has_internal_doc_id) {
1585 
1586 			ut_ad(trx_is_started(trx));
1587 
1588 			fts_drop_tables(trx, table);
1589 
1590 			DBUG_EXECUTE_IF("ib_truncate_crash_while_fts_cleanup",
1591 					DBUG_SUICIDE(););
1592 
1593 			ut_ad(trx_is_started(trx));
1594 		}
1595 
1596 		DBUG_EXECUTE_IF("ib_trunc_crash_after_fts_drop",
1597 				log_buffer_flush_to_disk();
1598 				os_thread_sleep(2000000);
1599 				DBUG_SUICIDE(););
1600 
1601 		dict_table_change_id_in_cache(table, new_id);
1602 
1603 		/* Reset the Doc ID in cache to 0 */
1604 		if (has_internal_doc_id && table->fts->cache != NULL) {
1605 			DBUG_EXECUTE_IF("ib_trunc_sleep_before_fts_cache_clear",
1606 					os_thread_sleep(10000000););
1607 
1608 			table->fts->fts_status |= TABLE_DICT_LOCKED;
1609 			fts_update_next_doc_id(trx, table, NULL, 0);
1610 			fts_cache_clear(table->fts->cache);
1611 			fts_cache_init(table->fts->cache);
1612 			table->fts->fts_status &= ~TABLE_DICT_LOCKED;
1613 		}
1614 	}
1615 
1616 	return(err);
1617 }
1618 
1619 /**
1620 Prepare for the truncate process. On success all of the table's indexes will
1621 be locked in X mode.
1622 @param table		table to truncate
1623 @param flags		tablespace flags
1624 @return	error code or DB_SUCCESS */
1625 static MY_ATTRIBUTE((warn_unused_result))
1626 dberr_t
row_truncate_prepare(dict_table_t * table,ulint * flags)1627 row_truncate_prepare(dict_table_t* table, ulint* flags)
1628 {
1629 	ut_ad(!dict_table_is_temporary(table));
1630 	ut_ad(dict_table_is_file_per_table(table));
1631 
1632 	*flags = fil_space_get_flags(table->space);
1633 
1634 	ut_ad(!dict_table_is_temporary(table));
1635 
1636 	dict_get_and_save_data_dir_path(table, true);
1637 
1638 	dict_get_and_save_space_name(table, true);
1639 
1640 	if (*flags != ULINT_UNDEFINED) {
1641 
1642 		dberr_t	err = fil_prepare_for_truncate(table->space);
1643 
1644 		if (err != DB_SUCCESS) {
1645 			return(err);
1646 		}
1647 	}
1648 
1649 	return(DB_SUCCESS);
1650 }
1651 
1652 /**
1653 Do foreign key checks before starting TRUNCATE.
1654 @param table		table being truncated
1655 @param trx		transaction covering the truncate
1656 @return DB_SUCCESS or error code */
1657 static MY_ATTRIBUTE((warn_unused_result))
1658 dberr_t
row_truncate_foreign_key_checks(const dict_table_t * table,const trx_t * trx)1659 row_truncate_foreign_key_checks(
1660 	const dict_table_t*	table,
1661 	const trx_t*		trx)
1662 {
1663 	/* Check if the table is referenced by foreign key constraints from
1664 	some other table (not the table itself) */
1665 
1666 	dict_foreign_set::iterator	it
1667 		= std::find_if(table->referenced_set.begin(),
1668 			       table->referenced_set.end(),
1669 			       dict_foreign_different_tables());
1670 
1671 	if (!srv_read_only_mode
1672 	    && it != table->referenced_set.end()
1673 	    && trx->check_foreigns) {
1674 
1675 		dict_foreign_t*	foreign = *it;
1676 
1677 		FILE*	ef = dict_foreign_err_file;
1678 
1679 		/* We only allow truncating a referenced table if
1680 		FOREIGN_KEY_CHECKS is set to 0 */
1681 
1682 		mutex_enter(&dict_foreign_err_mutex);
1683 
1684 		rewind(ef);
1685 
1686 		ut_print_timestamp(ef);
1687 
1688 		fputs("  Cannot truncate table ", ef);
1689 		ut_print_name(ef, trx, table->name.m_name);
1690 		fputs(" by DROP+CREATE\n"
1691 		      "InnoDB: because it is referenced by ", ef);
1692 		ut_print_name(ef, trx, foreign->foreign_table_name);
1693 		putc('\n', ef);
1694 
1695 		mutex_exit(&dict_foreign_err_mutex);
1696 
1697 		return(DB_ERROR);
1698 	}
1699 
1700 	/* TODO: could we replace the counter n_foreign_key_checks_running
1701 	with lock checks on the table? Acquire here an exclusive lock on the
1702 	table, and rewrite lock0lock.cc and the lock wait in srv0srv.cc so that
1703 	they can cope with the table having been truncated here? Foreign key
1704 	checks take an IS or IX lock on the table. */
1705 
1706 	if (table->n_foreign_key_checks_running > 0) {
1707 		ib::warn() << "Cannot truncate table " << table->name
1708 			<< " because there is a foreign key check running on"
1709 			" it.";
1710 
1711 		return(DB_ERROR);
1712 	}
1713 
1714 	return(DB_SUCCESS);
1715 }
1716 
1717 /**
1718 Do some sanity checks before starting the actual TRUNCATE.
1719 @param table		table being truncated
1720 @return DB_SUCCESS or error code */
1721 static MY_ATTRIBUTE((warn_unused_result))
1722 dberr_t
row_truncate_sanity_checks(const dict_table_t * table)1723 row_truncate_sanity_checks(
1724 	const dict_table_t* table)
1725 {
1726 	if (dict_table_is_discarded(table)) {
1727 
1728 		return(DB_TABLESPACE_DELETED);
1729 
1730 	} else if (!table->is_readable()) {
1731 		if (fil_space_get(table->space) == NULL) {
1732 			return(DB_TABLESPACE_NOT_FOUND);
1733 
1734 		} else {
1735 			return(DB_DECRYPTION_FAILED);
1736 		}
1737 
1738 	} else if (dict_table_is_corrupted(table)) {
1739 
1740 		return(DB_TABLE_CORRUPT);
1741 	}
1742 
1743 	return(DB_SUCCESS);
1744 }
1745 
1746 /**
1747 Truncates a table for MySQL.
1748 @param table		table being truncated
1749 @param trx		transaction covering the truncate
1750 @return	error code or DB_SUCCESS */
1751 dberr_t
row_truncate_table_for_mysql(dict_table_t * table,trx_t * trx)1752 row_truncate_table_for_mysql(
1753 	dict_table_t* table,
1754 	trx_t* trx)
1755 {
1756 	bool	is_file_per_table = dict_table_is_file_per_table(table);
1757 	dberr_t		err;
1758 #ifdef UNIV_DEBUG
1759 	ulint		old_space = table->space;
1760 #endif /* UNIV_DEBUG */
1761 	TruncateLogger*	logger = NULL;
1762 
1763 	/* Understanding the truncate flow.
1764 
1765 	Step-1: Perform intiial sanity check to ensure table can be truncated.
1766 	This would include check for tablespace discard status, ibd file
1767 	missing, etc ....
1768 
1769 	Step-2: Start transaction (only for non-temp table as temp-table don't
1770 	modify any data on disk doesn't need transaction object).
1771 
1772 	Step-3: Validate ownership of needed locks (Exclusive lock).
1773 	Ownership will also ensure there is no active SQL queries, INSERT,
1774 	SELECT, .....
1775 
1776 	Step-4: Stop all the background process associated with table.
1777 
1778 	Step-5: There are few foreign key related constraint under which
1779 	we can't truncate table (due to referential integrity unless it is
1780 	turned off). Ensure this condition is satisfied.
1781 
1782 	Step-6: Truncate operation can be rolled back in case of error
1783 	till some point. Associate rollback segment to record undo log.
1784 
1785 	Step-7: Generate new table-id.
1786 	Why we need new table-id ?
1787 	Purge and rollback case: we assign a new table id for the table.
1788 	Since purge and rollback look for the table based on the table id,
1789 	they see the table as 'dropped' and discard their operations.
1790 
1791 	Step-8: Log information about tablespace which includes
1792 	table and index information. If there is a crash in the next step
1793 	then during recovery we will attempt to fixup the operation.
1794 
1795 	Step-9: Drop all indexes (this include freeing of the pages
1796 	associated with them).
1797 
1798 	Step-10: Re-create new indexes.
1799 
1800 	Step-11: Update new table-id to in-memory cache (dictionary),
1801 	on-disk (INNODB_SYS_TABLES). INNODB_SYS_INDEXES also needs to
1802 	be updated to reflect updated root-page-no of new index created
1803 	and updated table-id.
1804 
1805 	Step-12: Cleanup Stage. Reset auto-inc value to 1.
1806 	Release all the locks.
1807 	Commit the transaction. Update trx operation state.
1808 
1809 	Notes:
1810 	- On error, log checkpoint is done followed writing of magic number to
1811 	truncate log file. If servers crashes after truncate, fix-up action
1812 	will not be applied.
1813 
1814 	- log checkpoint is done before starting truncate table to ensure
1815 	that previous REDO log entries are not applied if current truncate
1816 	crashes. Consider following use-case:
1817 	 - create table .... insert/load table .... truncate table (crash)
1818 	 - on restart table is restored .... truncate table (crash)
1819 	 - on restart (assuming default log checkpoint is not done) will have
1820 	   2 REDO log entries for same table. (Note 2 REDO log entries
1821 	   for different table is not an issue).
1822 	For system-tablespace we can't truncate the tablespace so we need
1823 	to initiate a local cleanup that involves dropping of indexes and
1824 	re-creating them. If we apply stale entry we might end-up issuing
1825 	drop on wrong indexes.
1826 
1827 	- Insert buffer: TRUNCATE TABLE is analogous to DROP TABLE,
1828 	so we do not have to remove insert buffer records, as the
1829 	insert buffer works at a low level. If a freed page is later
1830 	reallocated, the allocator will remove the ibuf entries for
1831 	it. When we prepare to truncate *.ibd files, we remove all entries
1832 	for the table in the insert buffer tree. This is not strictly
1833 	necessary, but we can free up some space in the system tablespace.
1834 
1835 	- Linear readahead and random readahead: we use the same
1836 	method as in 3) to discard ongoing operations. (This is only
1837 	relevant for TRUNCATE TABLE by TRUNCATE TABLESPACE.)
1838 	Ensure that the table will be dropped by trx_rollback_active() in
1839 	case of a crash.
1840 	*/
1841 
1842 	/*-----------------------------------------------------------------*/
1843 	/* Step-1: Perform intiial sanity check to ensure table can be
1844 	truncated. This would include check for tablespace discard status,
1845 	ibd file missing, etc .... */
1846 	err = row_truncate_sanity_checks(table);
1847 	if (err != DB_SUCCESS) {
1848 		return(err);
1849 
1850 	}
1851 
1852 	/* Step-2: Start transaction (only for non-temp table as temp-table
1853 	don't modify any data on disk doesn't need transaction object). */
1854 	if (!dict_table_is_temporary(table)) {
1855 		/* Avoid transaction overhead for temporary table DDL. */
1856 		trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
1857 	}
1858 
1859 	/* Step-3: Validate ownership of needed locks (Exclusive lock).
1860 	Ownership will also ensure there is no active SQL queries, INSERT,
1861 	SELECT, .....*/
1862 	trx->op_info = "truncating table";
1863 	ut_a(trx->dict_operation_lock_mode == 0);
1864 	row_mysql_lock_data_dictionary(trx);
1865 	ut_ad(mutex_own(&dict_sys->mutex));
1866 	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
1867 
1868 	DEBUG_SYNC_C("truncate_table");
1869 
1870 	/* Step-4: Stop all the background process associated with table. */
1871 	dict_stats_wait_bg_to_stop_using_table(table, trx);
1872 	if (table->fts) {
1873 		/* Remove from FTS optimize thread. Unlock is needed to allow
1874 		finishing background operations in progress. */
1875 		row_mysql_unlock_data_dictionary(trx);
1876 		fts_optimize_remove_table(table);
1877 		row_mysql_lock_data_dictionary(trx);
1878 	}
1879 
1880 	/* Step-5: There are few foreign key related constraint under which
1881 	we can't truncate table (due to referential integrity unless it is
1882 	turned off). Ensure this condition is satisfied. */
1883 	ulint	fsp_flags = ULINT_UNDEFINED;
1884 	err = row_truncate_foreign_key_checks(table, trx);
1885 	if (err != DB_SUCCESS) {
1886 		trx_rollback_to_savepoint(trx, NULL);
1887 		return(row_truncate_complete(
1888 				table, trx, fsp_flags, logger, err));
1889 	}
1890 
1891 	/* Check if memcached DML is running on this table. if is, we don't
1892 	allow truncate this table. */
1893 	if (table->memcached_sync_count != 0) {
1894 		ib::error() << "Cannot truncate table "
1895 			<< table->name
1896 			<< " by DROP+CREATE because there are memcached"
1897 			" operations running on it.";
1898 		err = DB_ERROR;
1899 		trx_rollback_to_savepoint(trx, NULL);
1900 		return(row_truncate_complete(
1901 				table, trx, fsp_flags, logger, err));
1902 	} else {
1903                 /* We need to set this counter to -1 for blocking
1904                 memcached operations. */
1905 		table->memcached_sync_count = DICT_TABLE_IN_DDL;
1906         }
1907 
1908 	/* Remove all locks except the table-level X lock. */
1909 	lock_remove_all_on_table(table, FALSE);
1910 	trx->table_id = table->id;
1911 	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
1912 
1913 	/* Step-6: Truncate operation can be rolled back in case of error
1914 	till some point. Associate rollback segment to record undo log. */
1915 	if (!dict_table_is_temporary(table)) {
1916 
1917 		/* Temporary tables don't need undo logging for autocommit stmt.
1918 		On crash (i.e. mysql restart) temporary tables are anyway not
1919 		accessible. */
1920 		mutex_enter(&trx->undo_mutex);
1921 
1922 		err = trx_undo_assign_undo(
1923 			trx, &trx->rsegs.m_redo, TRX_UNDO_UPDATE);
1924 
1925 		mutex_exit(&trx->undo_mutex);
1926 
1927 		DBUG_EXECUTE_IF("ib_err_trunc_assigning_undo_log",
1928 				err = DB_ERROR;);
1929 		if (err != DB_SUCCESS) {
1930 			trx_rollback_to_savepoint(trx, NULL);
1931 			return(row_truncate_complete(
1932 				table, trx, fsp_flags, logger, err));
1933 		}
1934 	}
1935 
1936 	/* Step-7: Generate new table-id.
1937 	Why we need new table-id ?
1938 	Purge and rollback: we assign a new table id for the
1939 	table. Since purge and rollback look for the table based on
1940 	the table id, they see the table as 'dropped' and discard
1941 	their operations. */
1942 	table_id_t	new_id;
1943 	dict_hdr_get_new_id(&new_id, NULL, NULL, table, false);
1944 
1945 	/* Check if table involves FTS index. */
1946 	bool	has_internal_doc_id =
1947 		dict_table_has_fts_index(table)
1948 		|| DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID);
1949 
1950 	bool	no_redo = is_file_per_table && !has_internal_doc_id;
1951 
1952 	/* Step-8: Log information about tablespace which includes
1953 	table and index information. If there is a crash in the next step
1954 	then during recovery we will attempt to fixup the operation. */
1955 
1956 	/* Lock all index trees for this table, as we will truncate
1957 	the table/index and possibly change their metadata. All
1958 	DML/DDL are blocked by table level X lock, with a few exceptions
1959 	such as queries into information schema about the table,
1960 	MySQL could try to access index stats for this kind of query,
1961 	we need to use index locks to sync up */
1962 	dict_table_x_lock_indexes(table);
1963 
1964 	if (!dict_table_is_temporary(table)) {
1965 
1966 		if (is_file_per_table) {
1967 
1968 			err = row_truncate_prepare(table, &fsp_flags);
1969 
1970 			DBUG_EXECUTE_IF("ib_err_trunc_preparing_for_truncate",
1971 					err = DB_ERROR;);
1972 
1973 			if (err != DB_SUCCESS) {
1974 				row_truncate_rollback(
1975 					table, trx, new_id,
1976 					has_internal_doc_id,
1977 					no_redo, false, true);
1978 				return(row_truncate_complete(
1979 					table, trx, fsp_flags, logger, err));
1980 			}
1981 		} else {
1982 			fsp_flags = fil_space_get_flags(table->space);
1983 
1984 			DBUG_EXECUTE_IF("ib_err_trunc_preparing_for_truncate",
1985 					fsp_flags = ULINT_UNDEFINED;);
1986 
1987 			if (fsp_flags == ULINT_UNDEFINED) {
1988 				row_truncate_rollback(
1989 					table, trx, new_id,
1990 					has_internal_doc_id,
1991 					no_redo, false, true);
1992 				return(row_truncate_complete(
1993 						table, trx, fsp_flags,
1994 						logger, DB_ERROR));
1995 			}
1996 		}
1997 
1998 		logger = UT_NEW_NOKEY(TruncateLogger(
1999 				table, fsp_flags, new_id));
2000 
2001 		err = logger->init();
2002 		if (err != DB_SUCCESS) {
2003 			row_truncate_rollback(
2004 				table, trx, new_id, has_internal_doc_id,
2005 				no_redo, false, true);
2006 			return(row_truncate_complete(
2007 				table, trx, fsp_flags, logger, DB_ERROR));
2008 
2009 		}
2010 
2011 		err = SysIndexIterator().for_each(*logger);
2012 		if (err != DB_SUCCESS) {
2013 			row_truncate_rollback(
2014 				table, trx, new_id, has_internal_doc_id,
2015 				no_redo, false, true);
2016 			return(row_truncate_complete(
2017 				table, trx, fsp_flags, logger, DB_ERROR));
2018 
2019 		}
2020 
2021 		ut_ad(logger->debug());
2022 
2023 		err = logger->log();
2024 
2025 		if (err != DB_SUCCESS) {
2026 			row_truncate_rollback(
2027 				table, trx, new_id, has_internal_doc_id,
2028 				no_redo, false, true);
2029 			return(row_truncate_complete(
2030 				table, trx, fsp_flags, logger, DB_ERROR));
2031 		}
2032 	}
2033 
2034 	DBUG_EXECUTE_IF("ib_trunc_crash_after_redo_log_write_complete",
2035 			log_buffer_flush_to_disk();
2036 			os_thread_sleep(3000000);
2037 			DBUG_SUICIDE(););
2038 
2039 	/* Step-9: Drop all indexes (free index pages associated with these
2040 	indexes) */
2041 	if (!dict_table_is_temporary(table)) {
2042 
2043 		DropIndex	dropIndex(table, no_redo);
2044 
2045 		err = SysIndexIterator().for_each(dropIndex);
2046 
2047 		if (err != DB_SUCCESS) {
2048 
2049 			row_truncate_rollback(
2050 				table, trx, new_id, has_internal_doc_id,
2051 				no_redo, true, true);
2052 
2053 			return(row_truncate_complete(
2054 				table, trx, fsp_flags, logger, err));
2055 		}
2056 
2057 	} else {
2058 		/* For temporary tables we don't have entries in SYSTEM TABLES*/
2059 		for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
2060 		     index != NULL;
2061 		     index = UT_LIST_GET_NEXT(indexes, index)) {
2062 
2063 			err = dict_truncate_index_tree_in_mem(index);
2064 
2065 			if (err != DB_SUCCESS) {
2066 				row_truncate_rollback(
2067 					table, trx, new_id, has_internal_doc_id,
2068 					no_redo, true, true);
2069 				return(row_truncate_complete(
2070 					table, trx, fsp_flags, logger, err));
2071 			}
2072 
2073 			DBUG_EXECUTE_IF(
2074 				"ib_trunc_crash_during_drop_index_temp_table",
2075 				log_buffer_flush_to_disk();
2076 				os_thread_sleep(2000000);
2077 				DBUG_SUICIDE(););
2078 		}
2079 	}
2080 
2081 	if (is_file_per_table
2082 	    && !dict_table_is_temporary(table)
2083 	    && fsp_flags != ULINT_UNDEFINED) {
2084 
2085 		/* A single-table tablespace has initially
2086 		FIL_IBD_FILE_INITIAL_SIZE number of pages allocated and an
2087 		extra page is allocated for each of the indexes present. But in
2088 		the case of clust index 2 pages are allocated and as one is
2089 		covered in the calculation as part of table->indexes.count we
2090 		take care of the other page by adding 1. */
2091 		ulint	space_size = table->indexes.count +
2092 				FIL_IBD_FILE_INITIAL_SIZE + 1;
2093 
2094 		if (has_internal_doc_id) {
2095 			/* Since aux tables are created for fts indexes and
2096 			they use seperate tablespaces. */
2097 			space_size -= ib_vector_size(table->fts->indexes);
2098 		}
2099 
2100 		fil_reinit_space_header_for_table(table, space_size, trx);
2101 	}
2102 
2103 	DBUG_EXECUTE_IF("ib_trunc_crash_with_intermediate_log_checkpoint",
2104 			log_buffer_flush_to_disk();
2105 			os_thread_sleep(2000000);
2106 			log_checkpoint(TRUE, TRUE);
2107 			os_thread_sleep(1000000);
2108 			DBUG_SUICIDE(););
2109 
2110 	DBUG_EXECUTE_IF("ib_trunc_crash_drop_reinit_done_create_to_start",
2111 			log_buffer_flush_to_disk();
2112 			os_thread_sleep(2000000);
2113 			DBUG_SUICIDE(););
2114 
2115 	/* Step-10: Re-create new indexes. */
2116 	if (!dict_table_is_temporary(table)) {
2117 
2118 		CreateIndex	createIndex(table, no_redo);
2119 
2120 		err = SysIndexIterator().for_each(createIndex);
2121 
2122 		if (err != DB_SUCCESS) {
2123 
2124 			row_truncate_rollback(
2125 				table, trx, new_id, has_internal_doc_id,
2126 				no_redo, true, true);
2127 
2128 			return(row_truncate_complete(
2129 				table, trx, fsp_flags, logger, err));
2130 		}
2131 	}
2132 
2133 	/* Done with index truncation, release index tree locks,
2134 	subsequent work relates to table level metadata change */
2135 	dict_table_x_unlock_indexes(table);
2136 
2137 	if (has_internal_doc_id) {
2138 
2139 		err = row_truncate_fts(table, new_id, trx);
2140 
2141 		if (err != DB_SUCCESS) {
2142 
2143 			row_truncate_rollback(
2144 				table, trx, new_id, has_internal_doc_id,
2145 				no_redo, true, false);
2146 
2147 			return(row_truncate_complete(
2148 				table, trx, fsp_flags, logger, err));
2149 		}
2150 	}
2151 
2152 	/* Step-11: Update new table-id to in-memory cache (dictionary),
2153 	on-disk (INNODB_SYS_TABLES). INNODB_SYS_INDEXES also needs to
2154 	be updated to reflect updated root-page-no of new index created
2155 	and updated table-id. */
2156 	if (dict_table_is_temporary(table)) {
2157 
2158 		dict_table_change_id_in_cache(table, new_id);
2159 		err = DB_SUCCESS;
2160 
2161 	} else {
2162 
2163 		/* If this fails then we are in an inconsistent state and
2164 		the results are undefined. */
2165 		ut_ad(old_space == table->space);
2166 
2167 		err = row_truncate_update_system_tables(
2168 			table, new_id, has_internal_doc_id, no_redo, trx);
2169 
2170 		if (err != DB_SUCCESS) {
2171 			return(row_truncate_complete(
2172 				table, trx, fsp_flags, logger, err));
2173 		}
2174 	}
2175 
2176 	DBUG_EXECUTE_IF("ib_trunc_crash_on_updating_dict_sys_info",
2177 			log_buffer_flush_to_disk();
2178 			os_thread_sleep(2000000);
2179 			DBUG_SUICIDE(););
2180 
2181 	/* Step-12: Cleanup Stage. Reset auto-inc value to 1.
2182 	Release all the locks.
2183 	Commit the transaction. Update trx operation state. */
2184 	dict_table_autoinc_lock(table);
2185 	dict_table_autoinc_initialize(table, 1);
2186 	dict_table_autoinc_unlock(table);
2187 
2188 	if (trx_is_started(trx)) {
2189 
2190 		trx_commit_for_mysql(trx);
2191 	}
2192 
2193 	return(row_truncate_complete(table, trx, fsp_flags, logger, err));
2194 }
2195 
2196 /**
2197 Fix the table truncate by applying information parsed from TRUNCATE log.
2198 Fix-up includes re-creating table (drop and re-create indexes)
2199 @return	error code or DB_SUCCESS */
2200 dberr_t
fixup_tables_in_system_tablespace()2201 truncate_t::fixup_tables_in_system_tablespace()
2202 {
2203 	dberr_t	err = DB_SUCCESS;
2204 
2205 	/* Using the info cached during REDO log scan phase fix the
2206 	table truncate. */
2207 
2208 	for (tables_t::iterator it = s_tables.begin();
2209 	     it != s_tables.end();) {
2210 
2211 		if ((*it)->m_space_id == TRX_SYS_SPACE) {
2212 			/* Step-1: Drop and re-create indexes. */
2213 			ib::info() << "Completing truncate for table with "
2214 				"id (" << (*it)->m_old_table_id << ") "
2215 				"residing in the system tablespace.";
2216 
2217 			err = fil_recreate_table(
2218 				(*it)->m_space_id,
2219 				(*it)->m_format_flags,
2220 				(*it)->m_tablespace_flags,
2221 				(*it)->m_tablename,
2222 				**it);
2223 
2224 			/* Step-2: Update the SYS_XXXX tables to reflect
2225 			this new table_id and root_page_no. */
2226 			table_id_t	new_id;
2227 
2228 			dict_hdr_get_new_id(&new_id, NULL, NULL, NULL, true);
2229 
2230 			err = row_truncate_update_sys_tables_during_fix_up(
2231 				**it, new_id, TRUE,
2232 				(err == DB_SUCCESS) ? false : true);
2233 
2234 			if (err != DB_SUCCESS) {
2235 				break;
2236 			}
2237 
2238 			os_file_delete(
2239 				innodb_log_file_key, (*it)->m_log_file_name);
2240 			UT_DELETE(*it);
2241 			it = s_tables.erase(it);
2242 		} else {
2243 			++it;
2244 		}
2245 	}
2246 
2247 	/* Also clear the map used to track tablespace truncated. */
2248 	s_truncated_tables.clear();
2249 
2250 	return(err);
2251 }
2252 
2253 /**
2254 Fix the table truncate by applying information parsed from TRUNCATE log.
2255 Fix-up includes re-creating tablespace.
2256 @return	error code or DB_SUCCESS */
2257 dberr_t
fixup_tables_in_non_system_tablespace()2258 truncate_t::fixup_tables_in_non_system_tablespace()
2259 {
2260 	dberr_t	err = DB_SUCCESS;
2261 
2262 	/* Using the info cached during REDO log scan phase fix the
2263 	table truncate. */
2264 	tables_t::iterator end = s_tables.end();
2265 
2266 	for (tables_t::iterator it = s_tables.begin(); it != end; ++it) {
2267 
2268 		/* All tables in the system tablespace have already been
2269 		done and erased from this list. */
2270 		ut_a((*it)->m_space_id != TRX_SYS_SPACE);
2271 
2272 		/* Step-1: Drop tablespace (only for single-tablespace),
2273 		drop indexes and re-create indexes. */
2274 
2275 		if (fsp_is_file_per_table((*it)->m_space_id,
2276 					  (*it)->m_tablespace_flags)) {
2277 			/* The table is file_per_table */
2278 
2279 			ib::info() << "Completing truncate for table with "
2280 				"id (" << (*it)->m_old_table_id << ") "
2281 				"residing in file-per-table tablespace with "
2282 				"id (" << (*it)->m_space_id << ")";
2283 
2284 			if (!fil_space_get((*it)->m_space_id)) {
2285 
2286 				/* Create the database directory for name,
2287 				if it does not exist yet */
2288 				fil_create_directory_for_tablename(
2289 					(*it)->m_tablename);
2290 
2291 				CreateInfoEncryptionKeyId create_info_encryption_key_id(false,
2292 											(*it)->m_encryption_key_id);
2293 
2294 				err = fil_ibd_create(
2295 						(*it)->m_space_id,
2296 						(*it)->m_tablename,
2297 						(*it)->m_dir_path,
2298 						(*it)->m_tablespace_flags,
2299 						FIL_IBD_FILE_INITIAL_SIZE,
2300 						(*it)->m_encryption,
2301 						create_info_encryption_key_id
2302 					);
2303 				if (err != DB_SUCCESS) {
2304 					/* If checkpoint is not yet done
2305 					and table is dropped and then we might
2306 					still have REDO entries for this table
2307 					which are INVALID. Ignore them. */
2308 					ib::warn() << "Failed to create"
2309 						" tablespace for "
2310 						<< (*it)->m_space_id
2311 						<< " space-id";
2312 					err = DB_ERROR;
2313 					break;
2314 				}
2315 			}
2316 
2317 			ut_ad(fil_space_get((*it)->m_space_id));
2318 
2319 			err = fil_recreate_tablespace(
2320 				(*it)->m_space_id,
2321 				(*it)->m_format_flags,
2322 				(*it)->m_tablespace_flags,
2323 				(*it)->m_tablename,
2324 				**it, log_get_lsn());
2325 
2326 		} else {
2327 			/* Table is in a shared tablespace */
2328 
2329 			ib::info() << "Completing truncate for table with "
2330 				"id (" << (*it)->m_old_table_id << ") "
2331 				"residing in shared tablespace with "
2332 				"id (" << (*it)->m_space_id << ")";
2333 
2334 			/* Temp-tables in temp-tablespace are never restored.*/
2335 			ut_ad((*it)->m_space_id != srv_tmp_space.space_id());
2336 
2337 			err = fil_recreate_table(
2338 				(*it)->m_space_id,
2339 				(*it)->m_format_flags,
2340 				(*it)->m_tablespace_flags,
2341 				(*it)->m_tablename,
2342 				**it);
2343 		}
2344 
2345 		/* Step-2: Update the SYS_XXXX tables to reflect new
2346 		table-id and root_page_no. */
2347 		table_id_t	new_id;
2348 
2349 		dict_hdr_get_new_id(&new_id, NULL, NULL, NULL, true);
2350 
2351 		err = row_truncate_update_sys_tables_during_fix_up(
2352 			**it, new_id, TRUE, (err == DB_SUCCESS) ? false : true);
2353 
2354 		if (err != DB_SUCCESS) {
2355 			break;
2356 		}
2357 	}
2358 
2359 	if (err == DB_SUCCESS && s_tables.size() > 0) {
2360 
2361 		log_make_checkpoint_at(LSN_MAX, TRUE);
2362 	}
2363 
2364 	for (ulint i = 0; i < s_tables.size(); ++i) {
2365 		os_file_delete(
2366 			innodb_log_file_key, s_tables[i]->m_log_file_name);
2367 		UT_DELETE(s_tables[i]);
2368 	}
2369 
2370 	s_tables.clear();
2371 
2372 	return(err);
2373 }
2374 
2375 /**
2376 Constructor
2377 
2378 @param old_table_id	old table id assigned to table before truncate
2379 @param new_table_id	new table id that will be assigned to table
2380 			after truncate
2381 @param dir_path		directory path */
2382 
truncate_t(table_id_t old_table_id,table_id_t new_table_id,const char * dir_path)2383 truncate_t::truncate_t(
2384 	table_id_t	old_table_id,
2385 	table_id_t	new_table_id,
2386 	const char*	dir_path)
2387 	:
2388 	m_space_id(),
2389 	m_old_table_id(old_table_id),
2390 	m_new_table_id(new_table_id),
2391 	m_dir_path(),
2392 	m_tablename(),
2393 	m_tablespace_flags(),
2394 	m_format_flags(),
2395 	m_indexes(),
2396 	m_log_lsn(),
2397 	m_log_file_name(),
2398 	m_encryption(FIL_ENCRYPTION_DEFAULT)
2399 {
2400 	if (dir_path != NULL) {
2401 		m_dir_path = mem_strdup(dir_path);
2402 	}
2403 }
2404 
2405 /**
2406 Consturctor
2407 
2408 @param log_file_name	parse the log file during recovery to populate
2409 			information related to table to truncate */
truncate_t(const char * log_file_name)2410 truncate_t::truncate_t(
2411 	const char*	log_file_name)
2412 	:
2413 	m_space_id(),
2414 	m_old_table_id(),
2415 	m_new_table_id(),
2416 	m_dir_path(),
2417 	m_tablename(),
2418 	m_tablespace_flags(),
2419 	m_format_flags(),
2420 	m_indexes(),
2421 	m_log_lsn(),
2422 	m_log_file_name(),
2423 	m_encryption(FIL_ENCRYPTION_DEFAULT)
2424 {
2425 	m_log_file_name = mem_strdup(log_file_name);
2426 	if (m_log_file_name == NULL) {
2427 		ib::fatal() << "Failed creating truncate_t; out of memory";
2428 	}
2429 }
2430 
2431 /** Constructor */
2432 
index_t()2433 truncate_t::index_t::index_t()
2434 	:
2435 	m_id(),
2436 	m_type(),
2437 	m_root_page_no(FIL_NULL),
2438 	m_new_root_page_no(FIL_NULL),
2439 	m_n_fields(),
2440 	m_trx_id_pos(ULINT_UNDEFINED),
2441 	m_fields()
2442 {
2443 	/* Do nothing */
2444 }
2445 
2446 /** Destructor */
2447 
~truncate_t()2448 truncate_t::~truncate_t()
2449 {
2450 	if (m_dir_path != NULL) {
2451 		ut_free(m_dir_path);
2452 		m_dir_path = NULL;
2453 	}
2454 
2455 	if (m_tablename != NULL) {
2456 		ut_free(m_tablename);
2457 		m_tablename = NULL;
2458 	}
2459 
2460 	if (m_log_file_name != NULL) {
2461 		ut_free(m_log_file_name);
2462 		m_log_file_name = NULL;
2463 	}
2464 
2465 	m_indexes.clear();
2466 }
2467 
2468 /**
2469 @return number of indexes parsed from the log record */
2470 
2471 size_t
indexes() const2472 truncate_t::indexes() const
2473 {
2474 	return(m_indexes.size());
2475 }
2476 
2477 /**
2478 Update root page number in SYS_XXXX tables.
2479 
2480 @param trx			transaction object
2481 @param table_id			table id for which information needs to
2482 				be updated.
2483 @param reserve_dict_mutex	if TRUE, acquire/release
2484 				dict_sys->mutex around call to pars_sql.
2485 @param mark_index_corrupted	if true, then mark index corrupted.
2486 @return DB_SUCCESS or error code */
2487 
2488 dberr_t
update_root_page_no(trx_t * trx,table_id_t table_id,ibool reserve_dict_mutex,bool mark_index_corrupted) const2489 truncate_t::update_root_page_no(
2490 	trx_t*		trx,
2491 	table_id_t	table_id,
2492 	ibool		reserve_dict_mutex,
2493 	bool		mark_index_corrupted) const
2494 {
2495 	indexes_t::const_iterator end = m_indexes.end();
2496 
2497 	dberr_t	err = DB_SUCCESS;
2498 
2499 	for (indexes_t::const_iterator it = m_indexes.begin();
2500 	     it != end;
2501 	     ++it) {
2502 
2503 		pars_info_t*	info = pars_info_create();
2504 
2505 		pars_info_add_int4_literal(
2506 			info, "page_no", it->m_new_root_page_no);
2507 
2508 		pars_info_add_ull_literal(info, "table_id", table_id);
2509 
2510 		pars_info_add_ull_literal(
2511 			info, "index_id",
2512 			(mark_index_corrupted ? -1 : it->m_id));
2513 
2514 		err = que_eval_sql(
2515 			info,
2516 			"PROCEDURE RENUMBER_IDX_PAGE_NO_PROC () IS\n"
2517 			"BEGIN\n"
2518 			"UPDATE SYS_INDEXES"
2519 			" SET PAGE_NO = :page_no\n"
2520 			" WHERE TABLE_ID = :table_id"
2521 			" AND ID = :index_id;\n"
2522 			"END;\n", reserve_dict_mutex, trx);
2523 
2524 		if (err != DB_SUCCESS) {
2525 			break;
2526 		}
2527 	}
2528 
2529 	return(err);
2530 }
2531 
2532 /**
2533 Check whether a tablespace was truncated during recovery
2534 @param space_id	tablespace id to check
2535 @return true if the tablespace was truncated */
2536 
2537 bool
is_tablespace_truncated(ulint space_id)2538 truncate_t::is_tablespace_truncated(ulint space_id)
2539 {
2540 	tables_t::iterator end = s_tables.end();
2541 
2542 	for (tables_t::iterator it = s_tables.begin(); it != end; ++it) {
2543 
2544 		if ((*it)->m_space_id == space_id) {
2545 
2546 			return(true);
2547 		}
2548 	}
2549 
2550 	return(false);
2551 }
2552 
2553 /** Was tablespace truncated (on crash before checkpoint).
2554 If the MLOG_TRUNCATE redo-record is still available then tablespace
2555 was truncated and checkpoint is yet to happen.
2556 @param[in]	space_id	tablespace id to check.
2557 @return true if tablespace is was truncated. */
2558 bool
was_tablespace_truncated(ulint space_id)2559 truncate_t::was_tablespace_truncated(ulint space_id)
2560 {
2561 	return(s_truncated_tables.find(space_id) != s_truncated_tables.end());
2562 }
2563 
2564 /** Get the lsn associated with space.
2565 @param[in]	space_id	tablespace id to check.
2566 @return associated lsn. */
2567 lsn_t
get_truncated_tablespace_init_lsn(ulint space_id)2568 truncate_t::get_truncated_tablespace_init_lsn(ulint space_id)
2569 {
2570 	ut_ad(was_tablespace_truncated(space_id));
2571 
2572 	return(s_truncated_tables.find(space_id)->second);
2573 }
2574 
2575 /**
2576 Parses log record during recovery
2577 @param start_ptr	buffer containing log body to parse
2578 @param end_ptr		buffer end
2579 
2580 @return DB_SUCCESS or error code */
2581 
2582 dberr_t
parse(byte * start_ptr,const byte * end_ptr)2583 truncate_t::parse(
2584 	byte*		start_ptr,
2585 	const byte*	end_ptr)
2586 {
2587 	/* Parse lsn, space-id, format-flags and tablespace-flags. */
2588 	if (end_ptr < start_ptr + (8 + 4 + 4 + 4)) {
2589 		return(DB_FAIL);
2590 	}
2591 
2592 	m_log_lsn = mach_read_from_8(start_ptr);
2593 	start_ptr += 8;
2594 
2595 	m_space_id = mach_read_from_4(start_ptr);
2596 	start_ptr += 4;
2597 
2598 	m_format_flags = mach_read_from_4(start_ptr);
2599 	start_ptr += 4;
2600 
2601 	m_tablespace_flags = mach_read_from_4(start_ptr);
2602 	start_ptr += 4;
2603 
2604 	/* Parse table-name. */
2605 	if (end_ptr < start_ptr + (2)) {
2606 		return(DB_FAIL);
2607 	}
2608 
2609 	ulint n_tablename_len = mach_read_from_2(start_ptr);
2610 	start_ptr += 2;
2611 
2612 	if (n_tablename_len > 0) {
2613 		if (end_ptr < start_ptr + n_tablename_len) {
2614 			return(DB_FAIL);
2615 		}
2616 		m_tablename = mem_strdup(reinterpret_cast<char*>(start_ptr));
2617 		ut_ad(m_tablename[n_tablename_len - 1] == 0);
2618 		start_ptr += n_tablename_len;
2619 	}
2620 
2621 
2622 	/* Parse and read old/new table-id, number of indexes */
2623 	if (end_ptr < start_ptr + (8 + 8 + 2 + 2)) {
2624 		return(DB_FAIL);
2625 	}
2626 
2627 	ut_ad(m_indexes.empty());
2628 
2629 	m_old_table_id = mach_read_from_8(start_ptr);
2630 	start_ptr += 8;
2631 
2632 	m_new_table_id = mach_read_from_8(start_ptr);
2633 	start_ptr += 8;
2634 
2635 	ulint n_indexes = mach_read_from_2(start_ptr);
2636 	start_ptr += 2;
2637 
2638 	/* Parse the remote directory from TRUNCATE log record */
2639 	{
2640 		ulint	n_tabledirpath_len = mach_read_from_2(start_ptr);
2641 		start_ptr += 2;
2642 
2643 		if (end_ptr < start_ptr + n_tabledirpath_len) {
2644 			return(DB_FAIL);
2645 		}
2646 
2647 		if (n_tabledirpath_len > 0) {
2648 
2649 			m_dir_path = mem_strdup(reinterpret_cast<char*>(start_ptr));
2650 			ut_ad(m_dir_path[n_tabledirpath_len - 1] == 0);
2651 			start_ptr += n_tabledirpath_len;
2652 		}
2653 	}
2654 
2655 	/* Parse index ids and types from TRUNCATE log record */
2656 	for (ulint i = 0; i < n_indexes; ++i) {
2657 		index_t	index;
2658 
2659 		if (end_ptr < start_ptr + (8 + 4 + 4 + 4)) {
2660 			return(DB_FAIL);
2661 		}
2662 
2663 		index.m_id = mach_read_from_8(start_ptr);
2664 		start_ptr += 8;
2665 
2666 		index.m_type = mach_read_from_4(start_ptr);
2667 		start_ptr += 4;
2668 
2669 		index.m_root_page_no = mach_read_from_4(start_ptr);
2670 		start_ptr += 4;
2671 
2672 		index.m_trx_id_pos = mach_read_from_4(start_ptr);
2673 		start_ptr += 4;
2674 
2675 		if (!(index.m_type & DICT_FTS)) {
2676 			m_indexes.push_back(index);
2677 		}
2678 	}
2679 
2680 	ut_ad(!m_indexes.empty());
2681 
2682 	if (fsp_flags_is_compressed(m_tablespace_flags)) {
2683 
2684 		/* Parse the number of index fields from TRUNCATE log record */
2685 		for (ulint i = 0; i < m_indexes.size(); ++i) {
2686 
2687 			if (end_ptr < start_ptr + (2 + 2)) {
2688 				return(DB_FAIL);
2689 			}
2690 
2691 			m_indexes[i].m_n_fields = mach_read_from_2(start_ptr);
2692 			start_ptr += 2;
2693 
2694 			ulint	len = mach_read_from_2(start_ptr);
2695 			start_ptr += 2;
2696 
2697 			if (end_ptr < start_ptr + len) {
2698 				return(DB_FAIL);
2699 			}
2700 
2701 			index_t&	index = m_indexes[i];
2702 
2703 			/* Should be NUL terminated. */
2704 			ut_ad((start_ptr)[len - 1] == 0);
2705 
2706 			index_t::fields_t::iterator	end;
2707 
2708 			end = index.m_fields.end();
2709 
2710 			index.m_fields.insert(
2711 				end, start_ptr, &(start_ptr)[len]);
2712 
2713 			start_ptr += len;
2714 		}
2715 	}
2716 
2717 	return(DB_SUCCESS);
2718 }
2719 
2720 /** Parse log record from REDO log file during recovery.
2721 @param[in,out]	start_ptr	buffer containing log body to parse
2722 @param[in]	end_ptr		buffer end
2723 @param[in]	space_id	tablespace identifier
2724 @return parsed upto or NULL. */
2725 byte*
parse_redo_entry(byte * start_ptr,const byte * end_ptr,ulint space_id)2726 truncate_t::parse_redo_entry(
2727 	byte*		start_ptr,
2728 	const byte*	end_ptr,
2729 	ulint		space_id)
2730 {
2731 	lsn_t	lsn;
2732 
2733 	/* Parse space-id, lsn */
2734 	if (end_ptr < (start_ptr + 8)) {
2735 		return(NULL);
2736 	}
2737 
2738 	lsn = mach_read_from_8(start_ptr);
2739 	start_ptr += 8;
2740 
2741 	/* Tablespace can't exist in both state.
2742 	(scheduled-for-truncate, was-truncated). */
2743 	if (!is_tablespace_truncated(space_id)) {
2744 
2745 		truncated_tables_t::iterator	it =
2746 				s_truncated_tables.find(space_id);
2747 
2748 		if (it == s_truncated_tables.end()) {
2749 			s_truncated_tables.insert(
2750 				std::pair<ulint, lsn_t>(space_id, lsn));
2751 		} else {
2752 			it->second = lsn;
2753 		}
2754 	}
2755 
2756 	return(start_ptr);
2757 }
2758 
2759 /**
2760 Set the truncate log values for a compressed table.
2761 @param index	index from which recreate infoormation needs to be extracted
2762 @return DB_SUCCESS or error code */
2763 
2764 dberr_t
set(const dict_index_t * index)2765 truncate_t::index_t::set(
2766 	const dict_index_t* index)
2767 {
2768 	/* Get trx-id column position (set only for clustered index) */
2769 	if (dict_index_is_clust(index)) {
2770 		m_trx_id_pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
2771 		ut_ad(m_trx_id_pos > 0);
2772 		ut_ad(m_trx_id_pos != ULINT_UNDEFINED);
2773 	} else {
2774 		m_trx_id_pos = 0;
2775 	}
2776 
2777 	/* Original logic set this field differently if page is not leaf.
2778 	For truncate case this being first page to get created it is
2779 	always a leaf page and so we don't need that condition here. */
2780 	m_n_fields = dict_index_get_n_fields(index);
2781 
2782 	/* See requirements of page_zip_fields_encode for size. */
2783 	ulint	encoded_buf_size = (m_n_fields + 1) * 2;
2784 	byte*	encoded_buf = UT_NEW_ARRAY_NOKEY(byte, encoded_buf_size);
2785 
2786 	if (encoded_buf == NULL) {
2787 		return(DB_OUT_OF_MEMORY);
2788 	}
2789 
2790 	ulint len = page_zip_fields_encode(
2791 		m_n_fields, index, m_trx_id_pos, encoded_buf);
2792 	ut_a(len <= encoded_buf_size);
2793 
2794 	/* Append the encoded fields data. */
2795 	m_fields.insert(m_fields.end(), &encoded_buf[0], &encoded_buf[len]);
2796 
2797 	/* NUL terminate the encoded data */
2798 	m_fields.push_back(0);
2799 
2800 	UT_DELETE_ARRAY(encoded_buf);
2801 
2802 	return(DB_SUCCESS);
2803 }
2804 
2805 /** Create an index for a table.
2806 @param[in]	table_name		table name, for which to create
2807 the index
2808 @param[in]	space_id		space id where we have to
2809 create the index
2810 @param[in]	page_size		page size of the .ibd file
2811 @param[in]	index_type		type of index to truncate
2812 @param[in]	index_id		id of index to truncate
2813 @param[in]	btr_redo_create_info	control info for ::btr_create()
2814 @param[in,out]	mtr			mini-transaction covering the
2815 create index
2816 @return root page no or FIL_NULL on failure */
2817 ulint
create_index(const char * table_name,ulint space_id,const page_size_t & page_size,ulint index_type,index_id_t index_id,const btr_create_t & btr_redo_create_info,mtr_t * mtr) const2818 truncate_t::create_index(
2819 	const char*		table_name,
2820 	ulint			space_id,
2821 	const page_size_t&	page_size,
2822 	ulint			index_type,
2823 	index_id_t		index_id,
2824 	const btr_create_t&	btr_redo_create_info,
2825 	mtr_t*			mtr) const
2826 {
2827 	ulint	root_page_no = btr_create(
2828 		index_type, space_id, page_size, index_id,
2829 		NULL, &btr_redo_create_info, mtr);
2830 
2831 	if (root_page_no == FIL_NULL) {
2832 
2833 		ib::info() << "innodb_force_recovery was set to "
2834 			<< srv_force_recovery << ". Continuing crash recovery"
2835 			" even though we failed to create index " << index_id
2836 			<< " for compressed table '" << table_name << "' with"
2837 			" tablespace " << space_id << " during recovery";
2838 	}
2839 
2840 	return(root_page_no);
2841 }
2842 
2843 /** Check if index has been modified since TRUNCATE log snapshot
2844 was recorded.
2845 @param space_id		space_id where table/indexes resides.
2846 @param root_page_no	root page of index that needs to be verified.
2847 @return true if modified else false */
2848 
2849 bool
is_index_modified_since_logged(ulint space_id,ulint root_page_no) const2850 truncate_t::is_index_modified_since_logged(
2851 	ulint		space_id,
2852 	ulint		root_page_no) const
2853 {
2854 	mtr_t			mtr;
2855 	bool			found;
2856 	const page_size_t&	page_size = fil_space_get_page_size(space_id,
2857 								    &found);
2858 
2859 	ut_ad(found);
2860 
2861 	mtr_start(&mtr);
2862 
2863 	/* Root page could be in free state if truncate crashed after drop_index
2864 	and page was not allocated for any other object. */
2865 	buf_block_t* block= buf_page_get_gen(
2866 		page_id_t(space_id, root_page_no), page_size, RW_X_LATCH, NULL,
2867 		BUF_GET_POSSIBLY_FREED, __FILE__, __LINE__, &mtr);
2868 
2869 	page_t* root = buf_block_get_frame(block);
2870 
2871 #ifdef UNIV_DEBUG
2872 	/* If the root page has been freed as part of truncate drop_index action
2873 	and not yet allocated for any object still the pagelsn > snapshot lsn */
2874 	if (block->page.file_page_was_freed) {
2875 		ut_ad(mach_read_from_8(root + FIL_PAGE_LSN) > m_log_lsn);
2876 	}
2877 #endif /* UNIV_DEBUG */
2878 
2879 	lsn_t page_lsn = mach_read_from_8(root + FIL_PAGE_LSN);
2880 
2881 	mtr_commit(&mtr);
2882 
2883 	if (page_lsn > m_log_lsn) {
2884 		return(true);
2885 	}
2886 
2887 	return(false);
2888 }
2889 
2890 /** Drop indexes for a table.
2891 @param space_id		space_id where table/indexes resides. */
2892 
2893 void
drop_indexes(ulint space_id) const2894 truncate_t::drop_indexes(
2895 	ulint		space_id) const
2896 {
2897 	mtr_t           mtr;
2898 	ulint		root_page_no = FIL_NULL;
2899 
2900 	indexes_t::const_iterator       end = m_indexes.end();
2901 
2902 	for (indexes_t::const_iterator it = m_indexes.begin();
2903 	     it != end;
2904 	     ++it) {
2905 
2906 		root_page_no = it->m_root_page_no;
2907 
2908 		bool			found;
2909 		const page_size_t&	page_size
2910 			= fil_space_get_page_size(space_id, &found);
2911 
2912 		ut_ad(found);
2913 
2914 		if (is_index_modified_since_logged(
2915 			space_id, root_page_no)) {
2916 			/* Page has been modified since TRUNCATE log snapshot
2917 			was recorded so not safe to drop the index. */
2918 			continue;
2919 		}
2920 
2921 		mtr_start(&mtr);
2922 
2923 		if (space_id != TRX_SYS_SPACE) {
2924 			/* Do not log changes for single-table
2925 			tablespaces, we are in recovery mode. */
2926 			mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
2927 		}
2928 
2929 		if (root_page_no != FIL_NULL) {
2930 			const page_id_t	root_page_id(space_id, root_page_no);
2931 
2932 			btr_free_if_exists(
2933 				root_page_id, page_size, it->m_id, &mtr);
2934 		}
2935 
2936 		/* If tree is already freed then we might return immediately
2937 		in which case we need to release the lock we have acquired
2938 		on root_page. */
2939 		mtr_commit(&mtr);
2940 	}
2941 }
2942 
2943 
2944 /** Create the indexes for a table
2945 @param[in]	table_name	table name, for which to create the indexes
2946 @param[in]	space_id	space id where we have to create the indexes
2947 @param[in]	page_size	page size of the .ibd file
2948 @param[in]	flags		tablespace flags
2949 @param[in]	format_flags	page format flags
2950 @return DB_SUCCESS or error code. */
2951 dberr_t
create_indexes(const char * table_name,ulint space_id,const page_size_t & page_size,ulint flags,ulint format_flags)2952 truncate_t::create_indexes(
2953 	const char*		table_name,
2954 	ulint			space_id,
2955 	const page_size_t&	page_size,
2956 	ulint			flags,
2957 	ulint			format_flags)
2958 {
2959 	mtr_t           mtr;
2960 
2961 	mtr_start(&mtr);
2962 
2963 	if (space_id != TRX_SYS_SPACE) {
2964 		/* Do not log changes for single-table tablespaces, we
2965 		are in recovery mode. */
2966 		mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
2967 	}
2968 
2969 	/* Create all new index trees with table format, index ids, index
2970 	types, number of index fields and index field information taken
2971 	out from the TRUNCATE log record. */
2972 
2973 	ulint   root_page_no = FIL_NULL;
2974 	indexes_t::iterator       end = m_indexes.end();
2975 	for (indexes_t::iterator it = m_indexes.begin();
2976 	     it != end;
2977 	     ++it) {
2978 
2979 		btr_create_t    btr_redo_create_info(
2980 			fsp_flags_is_compressed(flags)
2981 			? &it->m_fields[0] : NULL);
2982 
2983 		btr_redo_create_info.format_flags = format_flags;
2984 
2985 		if (fsp_flags_is_compressed(flags)) {
2986 
2987 			btr_redo_create_info.n_fields = it->m_n_fields;
2988 			/* Skip the NUL appended field */
2989 			btr_redo_create_info.field_len =
2990 				it->m_fields.size() - 1;
2991 			btr_redo_create_info.trx_id_pos = it->m_trx_id_pos;
2992 		}
2993 
2994 		root_page_no = create_index(
2995 			table_name, space_id, page_size, it->m_type, it->m_id,
2996 			btr_redo_create_info, &mtr);
2997 
2998 		if (root_page_no == FIL_NULL) {
2999 			break;
3000 		}
3001 
3002 		it->m_new_root_page_no = root_page_no;
3003 	}
3004 
3005 	mtr_commit(&mtr);
3006 
3007 	return(root_page_no == FIL_NULL ? DB_ERROR : DB_SUCCESS);
3008 }
3009 
3010 /**
3011 Write a TRUNCATE log record for fixing up table if truncate crashes.
3012 @param start_ptr	buffer to write log record
3013 @param end_ptr		buffer end
3014 @param space_id		space id
3015 @param tablename	the table name in the usual databasename/tablename
3016 			format of InnoDB
3017 @param flags		tablespace flags
3018 @param format_flags	page format
3019 @param lsn		lsn while logging
3020 @return DB_SUCCESS or error code */
3021 
3022 dberr_t
write(byte * start_ptr,byte * end_ptr,ulint space_id,const char * tablename,ulint flags,ulint format_flags,lsn_t lsn) const3023 truncate_t::write(
3024 	byte*		start_ptr,
3025 	byte*		end_ptr,
3026 	ulint		space_id,
3027 	const char*	tablename,
3028 	ulint		flags,
3029 	ulint		format_flags,
3030 	lsn_t		lsn) const
3031 {
3032 	if (end_ptr < start_ptr) {
3033 		return(DB_FAIL);
3034 	}
3035 
3036 	/* LSN, Type, Space-ID, format-flag (also know as log_flag.
3037 	Stored in page_no field), tablespace flags */
3038 	if (end_ptr < (start_ptr + (8 + 4 + 4 + 4)))  {
3039 		return(DB_FAIL);
3040 	}
3041 
3042 	mach_write_to_8(start_ptr, lsn);
3043 	start_ptr += 8;
3044 
3045 	mach_write_to_4(start_ptr, space_id);
3046 	start_ptr += 4;
3047 
3048 	mach_write_to_4(start_ptr, format_flags);
3049 	start_ptr += 4;
3050 
3051 	mach_write_to_4(start_ptr, flags);
3052 	start_ptr += 4;
3053 
3054 	/* Name of the table. */
3055 	/* Include the NUL in the log record. */
3056 	ulint len = strlen(tablename) + 1;
3057 	if (end_ptr < (start_ptr + (len + 2))) {
3058 		return(DB_FAIL);
3059 	}
3060 
3061 	mach_write_to_2(start_ptr, len);
3062 	start_ptr += 2;
3063 
3064 	memcpy(start_ptr, tablename, len - 1);
3065 	start_ptr += len;
3066 
3067 	DBUG_EXECUTE_IF("ib_trunc_crash_while_writing_redo_log",
3068 			DBUG_SUICIDE(););
3069 
3070 	/* Old/New Table-ID, Number of Indexes and Tablespace dir-path-name. */
3071 	/* Write the remote directory of the table into mtr log */
3072 	len = m_dir_path != NULL ? strlen(m_dir_path) + 1 : 0;
3073 	if (end_ptr < (start_ptr + (len + 8 + 8 + 2 + 2))) {
3074 		return(DB_FAIL);
3075 	}
3076 
3077 	/* Write out old-table-id. */
3078 	mach_write_to_8(start_ptr, m_old_table_id);
3079 	start_ptr += 8;
3080 
3081 	/* Write out new-table-id. */
3082 	mach_write_to_8(start_ptr, m_new_table_id);
3083 	start_ptr += 8;
3084 
3085 	/* Write out the number of indexes. */
3086 	mach_write_to_2(start_ptr, m_indexes.size());
3087 	start_ptr += 2;
3088 
3089 	/* Write the length (NUL included) of the .ibd path. */
3090 	mach_write_to_2(start_ptr, len);
3091 	start_ptr += 2;
3092 
3093 	if (m_dir_path != NULL) {
3094 		memcpy(start_ptr, m_dir_path, len - 1);
3095 		start_ptr += len;
3096 	}
3097 
3098 	/* Indexes information (id, type) */
3099 	/* Write index ids, type, root-page-no into mtr log */
3100 	for (ulint i = 0; i < m_indexes.size(); ++i) {
3101 
3102 		if (end_ptr < (start_ptr + (8 + 4 + 4 + 4))) {
3103 			return(DB_FAIL);
3104 		}
3105 
3106 		mach_write_to_8(start_ptr, m_indexes[i].m_id);
3107 		start_ptr += 8;
3108 
3109 		mach_write_to_4(start_ptr, m_indexes[i].m_type);
3110 		start_ptr += 4;
3111 
3112 		mach_write_to_4(start_ptr, m_indexes[i].m_root_page_no);
3113 		start_ptr += 4;
3114 
3115 		mach_write_to_4(start_ptr, m_indexes[i].m_trx_id_pos);
3116 		start_ptr += 4;
3117 	}
3118 
3119 	/* If tablespace compressed then field info of each index. */
3120 	if (fsp_flags_is_compressed(flags)) {
3121 
3122 		for (ulint i = 0; i < m_indexes.size(); ++i) {
3123 
3124 			ulint len = m_indexes[i].m_fields.size();
3125 			if (end_ptr < (start_ptr + (len + 2 + 2))) {
3126 				return(DB_FAIL);
3127 			}
3128 
3129 			mach_write_to_2(
3130 				start_ptr, m_indexes[i].m_n_fields);
3131 			start_ptr += 2;
3132 
3133 			mach_write_to_2(start_ptr, len);
3134 			start_ptr += 2;
3135 
3136 			const byte*	ptr = &m_indexes[i].m_fields[0];
3137 			memcpy(start_ptr, ptr, len - 1);
3138 			start_ptr += len;
3139 		}
3140 	}
3141 
3142 	return(DB_SUCCESS);
3143 }
3144 
3145