1 /*****************************************************************************
2 
3 Copyright (c) 2013, 2021, Oracle and/or its affiliates.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file row/row0trunc.cc
29 TRUNCATE implementation
30 
31 Created 2013-04-12 Sunny Bains
32 *******************************************************/
33 
34 #include "row0mysql.h"
35 #include "pars0pars.h"
36 #include "dict0crea.h"
37 #include "dict0boot.h"
38 #include "dict0stats.h"
39 #include "dict0stats_bg.h"
40 #include "lock0lock.h"
41 #include "fts0fts.h"
42 #include "fsp0sysspace.h"
43 #include "srv0start.h"
44 #include "row0trunc.h"
45 #include "os0file.h"
46 #include <vector>
47 
48 bool	truncate_t::s_fix_up_active = false;
49 truncate_t::tables_t		truncate_t::s_tables;
50 truncate_t::truncated_tables_t	truncate_t::s_truncated_tables;
51 
52 /**
53 Iterator over the the raw records in an index, doesn't support MVCC. */
54 class IndexIterator {
55 
56 public:
57 	/**
58 	Iterate over an indexes records
59 	@param index		index to iterate over */
IndexIterator(dict_index_t * index)60 	explicit IndexIterator(dict_index_t* index)
61 		:
62 		m_index(index)
63 	{
64 		/* Do nothing */
65 	}
66 
67 	/**
68 	Search for key. Position the cursor on a record GE key.
69 	@return DB_SUCCESS or error code. */
search(dtuple_t & key,bool noredo)70 	dberr_t search(dtuple_t& key, bool noredo)
71 	{
72 		mtr_start(&m_mtr);
73 
74 		if (noredo) {
75 			mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO);
76 		}
77 
78 		btr_pcur_open_on_user_rec(
79 			m_index,
80 			&key,
81 			PAGE_CUR_GE,
82 			BTR_MODIFY_LEAF,
83 			&m_pcur, &m_mtr);
84 
85 		return(DB_SUCCESS);
86 	}
87 
88 	/**
89 	Iterate over all the records
90 	@return DB_SUCCESS or error code */
91 	template <typename Callback>
for_each(Callback & callback)92 	dberr_t for_each(Callback& callback)
93 	{
94 		dberr_t	err = DB_SUCCESS;
95 
96 		for (;;) {
97 
98 			if (!btr_pcur_is_on_user_rec(&m_pcur)
99 			    || !callback.match(&m_mtr, &m_pcur)) {
100 
101 				/* The end of of the index has been reached. */
102 				err = DB_END_OF_INDEX;
103 				break;
104 			}
105 
106 			rec_t*	rec = btr_pcur_get_rec(&m_pcur);
107 
108 			if (!rec_get_deleted_flag(rec, FALSE)) {
109 
110 				err = callback(&m_mtr, &m_pcur);
111 
112 				if (err != DB_SUCCESS) {
113 					break;
114 				}
115 			}
116 
117 			btr_pcur_move_to_next_user_rec(&m_pcur, &m_mtr);
118 		}
119 
120 		btr_pcur_close(&m_pcur);
121 		mtr_commit(&m_mtr);
122 
123 		return(err == DB_END_OF_INDEX ? DB_SUCCESS : err);
124 	}
125 
126 private:
127 	// Disable copying
128 	IndexIterator(const IndexIterator&);
129 	IndexIterator& operator=(const IndexIterator&);
130 
131 private:
132 	mtr_t		m_mtr;
133 	btr_pcur_t	m_pcur;
134 	dict_index_t*	m_index;
135 };
136 
137 /** SysIndex table iterator, iterate over records for a table. */
138 class SysIndexIterator {
139 
140 public:
141 	/**
142 	Iterate over all the records that match the table id.
143 	@return DB_SUCCESS or error code */
144 	template <typename Callback>
for_each(Callback & callback) const145 	dberr_t for_each(Callback& callback) const
146 	{
147 		dict_index_t*	sys_index;
148 		byte		buf[DTUPLE_EST_ALLOC(1)];
149 		dtuple_t*	tuple =
150 			dtuple_create_from_mem(buf, sizeof(buf), 1, 0);
151 		dfield_t*	dfield = dtuple_get_nth_field(tuple, 0);
152 
153 		dfield_set_data(
154 			dfield,
155 			callback.table_id(),
156 			sizeof(*callback.table_id()));
157 
158 		sys_index = dict_table_get_first_index(dict_sys->sys_indexes);
159 
160 		dict_index_copy_types(tuple, sys_index, 1);
161 
162 		IndexIterator	iterator(sys_index);
163 
164 		/* Search on the table id and position the cursor
165 		on GE table_id. */
166 		iterator.search(*tuple, callback.get_logging_status());
167 
168 		return(iterator.for_each(callback));
169 	}
170 };
171 
172 /** Generic callback abstract class. */
173 class Callback
174 {
175 
176 public:
177 	/**
178 	Constructor
179 	@param	table_id		id of the table being operated.
180 	@param	noredo			if true turn off logging. */
Callback(table_id_t table_id,bool noredo)181 	Callback(table_id_t table_id, bool noredo)
182 		:
183 		m_id(),
184 		m_noredo(noredo)
185 	{
186 		/* Convert to storage byte order. */
187 		mach_write_to_8(&m_id, table_id);
188 	}
189 
190 	/**
191 	Destructor */
~Callback()192 	virtual ~Callback()
193 	{
194 		/* Do nothing */
195 	}
196 
197 	/**
198 	@param mtr		mini-transaction covering the iteration
199 	@param pcur		persistent cursor used for iteration
200 	@return true if the table id column matches. */
match(mtr_t * mtr,btr_pcur_t * pcur) const201 	bool match(mtr_t* mtr, btr_pcur_t* pcur) const
202 	{
203 		ulint		len;
204 		const byte*	field;
205 		rec_t*		rec = btr_pcur_get_rec(pcur);
206 
207 		field = rec_get_nth_field_old(
208 			rec, DICT_FLD__SYS_INDEXES__TABLE_ID, &len);
209 
210 		ut_ad(len == 8);
211 
212 		return(memcmp(&m_id, field, len) == 0);
213 	}
214 
215 	/**
216 	@return pointer to table id storage format buffer */
table_id() const217 	const table_id_t* table_id() const
218 	{
219 		return(&m_id);
220 	}
221 
222 	/**
223 	@return	return if logging needs to be turned off. */
get_logging_status() const224 	bool get_logging_status() const
225 	{
226 		return(m_noredo);
227 	}
228 
229 protected:
230 	// Disably copying
231 	Callback(const Callback&);
232 	Callback& operator=(const Callback&);
233 
234 protected:
235 	/** Table id in storage format */
236 	table_id_t		m_id;
237 
238 	/** Turn off logging. */
239 	const bool		m_noredo;
240 };
241 
242 /**
243 Creates a TRUNCATE log record with space id, table name, data directory path,
244 tablespace flags, table format, index ids, index types, number of index fields
245 and index field information of the table. */
246 class TruncateLogger : public Callback {
247 
248 public:
249 	/**
250 	Constructor
251 
252 	@param table	Table to truncate
253 	@param flags	tablespace falgs */
TruncateLogger(dict_table_t * table,ulint flags,table_id_t new_table_id)254 	TruncateLogger(
255 		dict_table_t*	table,
256 		ulint		flags,
257 		table_id_t	new_table_id)
258 		:
259 		Callback(table->id, false),
260 		m_table(table),
261 		m_flags(flags),
262 		m_truncate(table->id, new_table_id, table->data_dir_path),
263 		m_log_file_name()
264 	{
265 		/* Do nothing */
266 	}
267 
268 	/**
269 	Initialize Truncate Logger by constructing Truncate Log File Name.
270 
271 	@return DB_SUCCESS or error code. */
init()272 	dberr_t init()
273 	{
274 		/* Construct log file name. */
275 		ulint	log_file_name_buf_sz =
276 			strlen(srv_log_group_home_dir) + 22 + 22 + 1 /* NUL */
277 			+ strlen(TruncateLogger::s_log_prefix)
278 			+ strlen(TruncateLogger::s_log_ext);
279 
280 		m_log_file_name = UT_NEW_ARRAY_NOKEY(char, log_file_name_buf_sz);
281 		if (m_log_file_name == NULL) {
282 			return(DB_OUT_OF_MEMORY);
283 		}
284 		memset(m_log_file_name, 0, log_file_name_buf_sz);
285 
286 		strcpy(m_log_file_name, srv_log_group_home_dir);
287 		ulint	log_file_name_len = strlen(m_log_file_name);
288 		if (m_log_file_name[log_file_name_len - 1]
289 			!= OS_PATH_SEPARATOR) {
290 
291 			m_log_file_name[log_file_name_len]
292 				= OS_PATH_SEPARATOR;
293 			log_file_name_len = strlen(m_log_file_name);
294 		}
295 
296 		ut_snprintf(m_log_file_name + log_file_name_len,
297 			    log_file_name_buf_sz - log_file_name_len,
298 			    "%s%lu_%lu_%s",
299 			    TruncateLogger::s_log_prefix,
300 			    (ulong) m_table->space,
301 			    (ulong) m_table->id,
302 			    TruncateLogger::s_log_ext);
303 
304 		return(DB_SUCCESS);
305 
306 	}
307 
308 	/**
309 	Destructor */
~TruncateLogger()310 	~TruncateLogger()
311 	{
312 		if (m_log_file_name != NULL) {
313 			bool exist;
314 			os_file_delete_if_exists(
315 				innodb_log_file_key, m_log_file_name, &exist);
316 			UT_DELETE_ARRAY(m_log_file_name);
317 			m_log_file_name = NULL;
318 		}
319 	}
320 
321 	/**
322 	@param mtr	mini-transaction covering the read
323 	@param pcur	persistent cursor used for reading
324 	@return DB_SUCCESS or error code */
325 	dberr_t operator()(mtr_t* mtr, btr_pcur_t* pcur);
326 
327 	/** Called after iteratoring over the records.
328 	@return true if invariant satisfied. */
debug() const329 	bool debug() const
330 	{
331 		/* We must find all the index entries on disk. */
332 		return(UT_LIST_GET_LEN(m_table->indexes)
333 		       == m_truncate.indexes());
334 	}
335 
336 	/**
337 	Write the TRUNCATE log
338 	@return DB_SUCCESS or error code */
log() const339 	dberr_t log() const
340 	{
341 		dberr_t	err = DB_SUCCESS;
342 
343 		if (m_log_file_name == 0) {
344 			return(DB_ERROR);
345 		}
346 
347 		bool		ret;
348 		pfs_os_file_t	handle = os_file_create(
349 			innodb_log_file_key, m_log_file_name,
350 			OS_FILE_CREATE, OS_FILE_NORMAL,
351 			OS_LOG_FILE, srv_read_only_mode, &ret);
352 		if (!ret) {
353 			return(DB_IO_ERROR);
354 		}
355 
356 
357 		ulint	sz = UNIV_PAGE_SIZE;
358 		void*	buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE);
359 		if (buf == 0) {
360 			os_file_close(handle);
361 			return(DB_OUT_OF_MEMORY);
362 		}
363 
364 		/* Align the memory for file i/o if we might have O_DIRECT set*/
365 		byte*	log_buf = static_cast<byte*>(
366 			ut_align(buf, UNIV_PAGE_SIZE));
367 
368 		lsn_t	lsn = log_get_lsn();
369 
370 		/* Generally loop should exit in single go but
371 		just for those 1% of rare cases we need to assume
372 		corner case. */
373 		do {
374 			/* First 4 bytes are reserved for magic number
375 			which is currently 0. */
376 			err = m_truncate.write(
377 				log_buf + 4, log_buf + sz - 4,
378 				m_table->space, m_table->name.m_name,
379 				m_flags, m_table->flags, lsn);
380 
381 			DBUG_EXECUTE_IF("ib_err_trunc_oom_logging",
382 					err = DB_FAIL;);
383 
384 			if (err != DB_SUCCESS) {
385 				ut_ad(err == DB_FAIL);
386 				ut_free(buf);
387 				sz *= 2;
388 				buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE);
389 				DBUG_EXECUTE_IF("ib_err_trunc_oom_logging",
390 						ut_free(buf);
391 						buf = 0;);
392 				if (buf == 0) {
393 					os_file_close(handle);
394 					return(DB_OUT_OF_MEMORY);
395 				}
396 				log_buf = static_cast<byte*>(
397 					ut_align(buf, UNIV_PAGE_SIZE));
398 			}
399 
400 		} while (err != DB_SUCCESS);
401 
402 		dberr_t	io_err;
403 
404 		IORequest	request(IORequest::WRITE);
405 
406 		request.disable_compression();
407 
408 		io_err = os_file_write(
409 			request, m_log_file_name, handle, log_buf, 0, sz);
410 
411 		if (io_err != DB_SUCCESS) {
412 
413 			ib::error()
414 				<< "IO: Failed to write the file size to '"
415 				<< m_log_file_name << "'";
416 
417 			/* Preserve the original error code */
418 			if (err == DB_SUCCESS) {
419 				err = io_err;
420 			}
421 		}
422 
423 		os_file_flush(handle);
424 		os_file_close(handle);
425 
426 		ut_free(buf);
427 
428 		/* Why we need MLOG_TRUNCATE when we have truncate_log for
429 		recovery?
430 		- truncate log can protect us if crash happens while truncate
431 		  is active. Once truncate is done truncate log is removed.
432 		- If crash happens post truncate and system is yet to
433 		  checkpoint, on recovery we would see REDO records from action
434 		  before truncate (unless we explicitly checkpoint before
435 		  returning from truncate API. Costly alternative so rejected).
436 		- These REDO records may reference a page that doesn't exist
437 		  post truncate so we need a mechanism to skip all such REDO
438 		  records. MLOG_TRUNCATE records space_id and lsn that exactly
439 		  serve the purpose.
440 		- If checkpoint happens post truncate and crash happens post
441 		  this point then neither MLOG_TRUNCATE nor REDO record
442 		  from action before truncate are accessible. */
443 		if (!is_system_tablespace(m_table->space)) {
444 			mtr_t	mtr;
445 			byte*	log_ptr;
446 
447 			mtr_start(&mtr);
448 
449 			log_ptr = mlog_open(&mtr, 11 + 8);
450 			log_ptr = mlog_write_initial_log_record_low(
451 				MLOG_TRUNCATE, m_table->space, 0,
452 				log_ptr, &mtr);
453 
454 			mach_write_to_8(log_ptr, lsn);
455 			log_ptr += 8;
456 
457 			mlog_close(&mtr, log_ptr);
458 			mtr_commit(&mtr);
459 		}
460 
461 		return(err);
462 	}
463 
464 	/**
465 	Indicate completion of truncate log by writing magic-number.
466 	File will be removed from the system but to protect against
467 	unlink (File-System) anomalies we ensure we write magic-number. */
done()468 	void done()
469 	{
470 		if (m_log_file_name == 0) {
471 			return;
472 		}
473 
474 		bool	ret;
475 		pfs_os_file_t handle = os_file_create_simple_no_error_handling(
476 			innodb_log_file_key, m_log_file_name,
477 			OS_FILE_OPEN, OS_FILE_READ_WRITE,
478 			srv_read_only_mode, &ret);
479 		DBUG_EXECUTE_IF("ib_err_trunc_writing_magic_number",
480 				os_file_close(handle);
481 				ret = false;);
482 		if (!ret) {
483 			ib::error() << "Failed to open truncate log file "
484 				<< m_log_file_name << "."
485 				" If server crashes before truncate log is"
486 				" removed make sure it is manually removed"
487 				" before restarting server";
488 			os_file_delete(innodb_log_file_key, m_log_file_name);
489 			return;
490 		}
491 
492 		byte	buffer[sizeof(TruncateLogger::s_magic)];
493 		mach_write_to_4(buffer, TruncateLogger::s_magic);
494 
495 		dberr_t	err;
496 
497 		IORequest	request(IORequest::WRITE);
498 
499 		request.disable_compression();
500 
501 		err = os_file_write(
502 			request,
503 			m_log_file_name, handle, buffer, 0, sizeof(buffer));
504 
505 		if (err != DB_SUCCESS) {
506 
507 			ib::error()
508 				<< "IO: Failed to write the magic number to '"
509 				<< m_log_file_name << "'";
510 		}
511 
512 		DBUG_EXECUTE_IF("ib_trunc_crash_after_updating_magic_no",
513 				DBUG_SUICIDE(););
514 		os_file_flush(handle);
515 		os_file_close(handle);
516 		DBUG_EXECUTE_IF("ib_trunc_crash_after_logging_complete",
517 				log_buffer_flush_to_disk();
518 				os_thread_sleep(1000000);
519 				DBUG_SUICIDE(););
520 		os_file_delete(innodb_log_file_key, m_log_file_name);
521 	}
522 
523 private:
524 	// Disably copying
525 	TruncateLogger(const TruncateLogger&);
526 	TruncateLogger& operator=(const TruncateLogger&);
527 
528 private:
529 	/** Lookup the index using the index id.
530 	@return index instance if found else NULL */
find(index_id_t id) const531 	const dict_index_t* find(index_id_t id) const
532 	{
533 		for (const dict_index_t* index = UT_LIST_GET_FIRST(
534 				m_table->indexes);
535 		     index != NULL;
536 		     index = UT_LIST_GET_NEXT(indexes, index)) {
537 
538 			if (index->id == id) {
539 				return(index);
540 			}
541 		}
542 
543 		return(NULL);
544 	}
545 
546 private:
547 	/** Table to be truncated */
548 	dict_table_t*		m_table;
549 
550 	/** Tablespace flags */
551 	ulint			m_flags;
552 
553 	/** Collect table to truncate information */
554 	truncate_t		m_truncate;
555 
556 	/** Truncate log file name. */
557 	char*			m_log_file_name;
558 
559 
560 public:
561 	/** Magic Number to indicate truncate action is complete. */
562 	const static ib_uint32_t	s_magic;
563 
564 	/** Truncate Log file Prefix. */
565 	const static char*		s_log_prefix;
566 
567 	/** Truncate Log file Extension. */
568 	const static char*		s_log_ext;
569 };
570 
571 const ib_uint32_t	TruncateLogger::s_magic = 32743712;
572 const char*		TruncateLogger::s_log_prefix = "ib_";
573 const char*		TruncateLogger::s_log_ext = "trunc.log";
574 
575 /**
576 Scan to find out truncate log file from the given directory path.
577 
578 @param dir_path		look for log directory in following path.
579 @param log_files	cache to hold truncate log file name found.
580 @return DB_SUCCESS or error code. */
581 dberr_t
scan(const char * dir_path,trunc_log_files_t & log_files)582 TruncateLogParser::scan(
583 	const char*		dir_path,
584 	trunc_log_files_t&	log_files)
585 {
586 	os_file_dir_t	dir;
587 	os_file_stat_t	fileinfo;
588 	dberr_t		err = DB_SUCCESS;
589 	ulint		ext_len = strlen(TruncateLogger::s_log_ext);
590 	ulint		prefix_len = strlen(TruncateLogger::s_log_prefix);
591 	ulint		dir_len = strlen(dir_path);
592 
593 	/* Scan and look out for the truncate log files. */
594 	dir = os_file_opendir(dir_path, true);
595 	if (dir == NULL) {
596 		return(DB_IO_ERROR);
597 	}
598 
599 	while (fil_file_readdir_next_file(
600 			&err, dir_path, dir, &fileinfo) == 0) {
601 
602 		ulint nm_len = strlen(fileinfo.name);
603 
604 		if (fileinfo.type == OS_FILE_TYPE_FILE
605 		    && nm_len > ext_len + prefix_len
606 		    && (0 == strncmp(fileinfo.name + nm_len - ext_len,
607 				     TruncateLogger::s_log_ext, ext_len))
608 		    && (0 == strncmp(fileinfo.name,
609 				     TruncateLogger::s_log_prefix,
610 				     prefix_len))) {
611 
612 			if (fileinfo.size == 0) {
613 				/* Truncate log not written. Remove the file. */
614 				os_file_delete(
615 					innodb_log_file_key, fileinfo.name);
616 				continue;
617 			}
618 
619 			/* Construct file name by appending directory path */
620 			ulint	sz = dir_len + 22 + 22 + 1 + ext_len + prefix_len;
621 			char*	log_file_name = UT_NEW_ARRAY_NOKEY(char, sz);
622 			if (log_file_name == NULL) {
623 				err = DB_OUT_OF_MEMORY;
624 				break;
625 			}
626 			memset(log_file_name, 0, sz);
627 
628 			memcpy(log_file_name, dir_path, dir_len);
629 			ulint	log_file_name_len = strlen(log_file_name);
630 			if (log_file_name[log_file_name_len - 1]
631 				!= OS_PATH_SEPARATOR) {
632 
633 				log_file_name[log_file_name_len]
634 					= OS_PATH_SEPARATOR;
635 				log_file_name_len = strlen(log_file_name);
636 			}
637 			strcat(log_file_name, fileinfo.name);
638 			log_files.push_back(log_file_name);
639 		}
640 	}
641 
642 	os_file_closedir(dir);
643 
644 	return(err);
645 }
646 
647 /**
648 Parse the log file and populate table to truncate information.
649 (Add this table to truncate information to central vector that is then
650  used by truncate fix-up routine to fix-up truncate action of the table.)
651 
652 @param	log_file_name	log file to parse
653 @return DB_SUCCESS or error code. */
654 dberr_t
parse(const char * log_file_name)655 TruncateLogParser::parse(
656 	const char*	log_file_name)
657 {
658 	dberr_t		err = DB_SUCCESS;
659 	truncate_t*	truncate = NULL;
660 
661 	/* Open the file and read magic-number to findout if truncate action
662 	was completed. */
663 	bool		ret;
664 	pfs_os_file_t	handle = os_file_create_simple(
665 		innodb_log_file_key, log_file_name,
666 		OS_FILE_OPEN, OS_FILE_READ_ONLY, srv_read_only_mode, &ret);
667 	if (!ret) {
668 		ib::error() << "Error opening truncate log file: "
669 			<< log_file_name;
670 		return(DB_IO_ERROR);
671 	}
672 
673 	ulint	sz = UNIV_PAGE_SIZE;
674 	void*	buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE);
675 	if (buf == 0) {
676 		os_file_close(handle);
677 		return(DB_OUT_OF_MEMORY);
678 	}
679 
680 	IORequest	request(IORequest::READ);
681 
682 	request.disable_compression();
683 
684 	/* Align the memory for file i/o if we might have O_DIRECT set*/
685 	byte*	log_buf = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE));
686 
687 	do {
688 		err = os_file_read(request, handle, log_buf, 0, sz);
689 
690 		if (err != DB_SUCCESS) {
691 			os_file_close(handle);
692 			break;
693 		}
694 
695 		ulint	magic_n = mach_read_from_4(log_buf);
696 		if (magic_n == TruncateLogger::s_magic) {
697 
698 			/* Truncate action completed. Avoid parsing the file. */
699 			os_file_close(handle);
700 
701 			os_file_delete(innodb_log_file_key, log_file_name);
702 			break;
703 		}
704 
705 		if (truncate == NULL) {
706 			truncate = UT_NEW_NOKEY(truncate_t(log_file_name));
707 			if (truncate == NULL) {
708 				os_file_close(handle);
709 				err = DB_OUT_OF_MEMORY;
710 				break;
711 			}
712 		}
713 
714 		err = truncate->parse(log_buf + 4, log_buf + sz - 4);
715 
716 		if (err != DB_SUCCESS) {
717 
718 			ut_ad(err == DB_FAIL);
719 
720 			ut_free(buf);
721 			buf = 0;
722 
723 			sz *= 2;
724 
725 			buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE);
726 
727 			if (buf == 0) {
728 				os_file_close(handle);
729 				err = DB_OUT_OF_MEMORY;
730 				UT_DELETE(truncate);
731 				truncate = NULL;
732 				break;
733 			}
734 
735 			log_buf = static_cast<byte*>(
736 				ut_align(buf, UNIV_PAGE_SIZE));
737 		}
738 	} while (err != DB_SUCCESS);
739 
740 	ut_free(buf);
741 
742 	if (err == DB_SUCCESS && truncate != NULL) {
743 		truncate_t::add(truncate);
744 		os_file_close(handle);
745 	}
746 
747 	return(err);
748 }
749 
750 /**
751 Scan and Parse truncate log files.
752 
753 @param dir_path		look for log directory in following path
754 @return DB_SUCCESS or error code. */
755 dberr_t
scan_and_parse(const char * dir_path)756 TruncateLogParser::scan_and_parse(
757 	const char*	dir_path)
758 {
759 	dberr_t			err;
760 	trunc_log_files_t	log_files;
761 
762 	/* Scan and trace all the truncate log files. */
763 	err = TruncateLogParser::scan(dir_path, log_files);
764 
765 	/* Parse truncate lof files if scan was successful. */
766 	if (err == DB_SUCCESS) {
767 
768 		for (ulint i = 0;
769 		     i < log_files.size() && err == DB_SUCCESS;
770 		     i++) {
771 			err = TruncateLogParser::parse(log_files[i]);
772 		}
773 	}
774 
775 	trunc_log_files_t::const_iterator end = log_files.end();
776 	for (trunc_log_files_t::const_iterator it = log_files.begin();
777 	     it != end;
778 	     ++it) {
779 		if (*it != NULL) {
780 			UT_DELETE_ARRAY(*it);
781 		}
782 	}
783 	log_files.clear();
784 
785 	return(err);
786 }
787 
788 /** Callback to drop indexes during TRUNCATE */
789 class DropIndex : public Callback {
790 
791 public:
792 	/**
793 	Constructor
794 
795 	@param[in,out]	table	Table to truncate
796 	@param[in]	noredo	whether to disable redo logging */
DropIndex(dict_table_t * table,bool noredo)797 	DropIndex(dict_table_t* table, bool noredo)
798 		:
799 		Callback(table->id, noredo),
800 		m_table(table)
801 	{
802 		/* No op */
803 	}
804 
805 	/**
806 	@param mtr	mini-transaction covering the read
807 	@param pcur	persistent cursor used for reading
808 	@return DB_SUCCESS or error code */
809 	dberr_t operator()(mtr_t* mtr, btr_pcur_t* pcur) const;
810 
811 private:
812 	/** Table to be truncated */
813 	dict_table_t*		m_table;
814 };
815 
816 /** Callback to create the indexes during TRUNCATE */
817 class CreateIndex : public Callback {
818 
819 public:
820 	/**
821 	Constructor
822 
823 	@param[in,out]	table	Table to truncate
824 	@param[in]	noredo	whether to disable redo logging */
CreateIndex(dict_table_t * table,bool noredo)825 	CreateIndex(dict_table_t* table, bool noredo)
826 		:
827 		Callback(table->id, noredo),
828 		m_table(table)
829 	{
830 		/* No op */
831 	}
832 
833 	/**
834 	Create the new index and update the root page number in the
835 	SysIndex table.
836 
837 	@param mtr	mini-transaction covering the read
838 	@param pcur	persistent cursor used for reading
839 	@return DB_SUCCESS or error code */
840 	dberr_t operator()(mtr_t* mtr, btr_pcur_t* pcur) const;
841 
842 private:
843 	// Disably copying
844 	CreateIndex(const CreateIndex&);
845 	CreateIndex& operator=(const CreateIndex&);
846 
847 private:
848 	/** Table to be truncated */
849 	dict_table_t*		m_table;
850 };
851 
852 /** Check for presence of table-id in SYS_XXXX tables. */
853 class TableLocator : public Callback {
854 
855 public:
856 	/**
857 	Constructor
858 	@param table_id	table_id to look for */
TableLocator(table_id_t table_id)859 	explicit TableLocator(table_id_t table_id)
860 		:
861 		Callback(table_id, false),
862 		m_table_found()
863 	{
864 		/* No op */
865 	}
866 
867 	/**
868 	@return true if table is found */
is_table_found() const869 	bool is_table_found() const
870 	{
871 		return(m_table_found);
872 	}
873 
874 	/**
875 	Look for table-id in SYS_XXXX tables without loading the table.
876 
877 	@param mtr	mini-transaction covering the read
878 	@param pcur	persistent cursor used for reading
879 	@return DB_SUCCESS or error code */
880 	dberr_t operator()(mtr_t* mtr, btr_pcur_t* pcur);
881 
882 private:
883 	// Disably copying
884 	TableLocator(const TableLocator&);
885 	TableLocator& operator=(const TableLocator&);
886 
887 private:
888 	/** Set to true if table is present */
889 	bool			m_table_found;
890 };
891 
892 /**
893 @param mtr	mini-transaction covering the read
894 @param pcur	persistent cursor used for reading
895 @return DB_SUCCESS or error code */
896 dberr_t
operator ()(mtr_t * mtr,btr_pcur_t * pcur)897 TruncateLogger::operator()(mtr_t* mtr, btr_pcur_t* pcur)
898 {
899 	ulint			len;
900 	const byte*		field;
901 	rec_t*			rec = btr_pcur_get_rec(pcur);
902 	truncate_t::index_t	index;
903 
904 	field = rec_get_nth_field_old(
905 		rec, DICT_FLD__SYS_INDEXES__TYPE, &len);
906 	ut_ad(len == 4);
907 	index.m_type = mach_read_from_4(field);
908 
909 	field = rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__ID, &len);
910 	ut_ad(len == 8);
911 	index.m_id = mach_read_from_8(field);
912 
913 	field = rec_get_nth_field_old(
914 			rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len);
915 	ut_ad(len == 4);
916 	index.m_root_page_no = mach_read_from_4(field);
917 
918 	/* For compressed tables we need to store extra meta-data
919 	required during btr_create(). */
920 	if (fsp_flags_is_compressed(m_flags)) {
921 
922 		const dict_index_t* dict_index = find(index.m_id);
923 
924 		if (dict_index != NULL) {
925 
926 			dberr_t err = index.set(dict_index);
927 
928 			if (err != DB_SUCCESS) {
929 				m_truncate.clear();
930 				return(err);
931 			}
932 
933 		} else {
934 			ib::warn() << "Index id " << index.m_id
935 				<< " not found";
936 		}
937 	}
938 
939 	m_truncate.add(index);
940 
941 	return(DB_SUCCESS);
942 }
943 
944 /**
945 Drop an index in the table.
946 
947 @param mtr	mini-transaction covering the read
948 @param pcur	persistent cursor used for reading
949 @return DB_SUCCESS or error code */
950 dberr_t
operator ()(mtr_t * mtr,btr_pcur_t * pcur) const951 DropIndex::operator()(mtr_t* mtr, btr_pcur_t* pcur) const
952 {
953 	rec_t*	rec = btr_pcur_get_rec(pcur);
954 
955 	bool	freed = dict_drop_index_tree(rec, pcur, mtr);
956 
957 #ifdef UNIV_DEBUG
958 	{
959 		ulint		len;
960 		const byte*	field;
961 		ulint		index_type;
962 
963 		field = rec_get_nth_field_old(
964 			btr_pcur_get_rec(pcur), DICT_FLD__SYS_INDEXES__TYPE,
965 			&len);
966 		ut_ad(len == 4);
967 
968 		index_type = mach_read_from_4(field);
969 
970 		if (index_type & DICT_CLUSTERED) {
971 			/* Clustered index */
972 			DBUG_EXECUTE_IF("ib_trunc_crash_on_drop_of_clust_index",
973 					log_buffer_flush_to_disk();
974 					os_thread_sleep(2000000);
975 					DBUG_SUICIDE(););
976 		} else if (index_type & DICT_UNIQUE) {
977 			/* Unique index */
978 			DBUG_EXECUTE_IF("ib_trunc_crash_on_drop_of_uniq_index",
979 					log_buffer_flush_to_disk();
980 					os_thread_sleep(2000000);
981 					DBUG_SUICIDE(););
982 		} else if (index_type == 0) {
983 			/* Secondary index */
984 			DBUG_EXECUTE_IF("ib_trunc_crash_on_drop_of_sec_index",
985 					log_buffer_flush_to_disk();
986 					os_thread_sleep(2000000);
987 					DBUG_SUICIDE(););
988 		}
989 	}
990 #endif /* UNIV_DEBUG */
991 
992 	DBUG_EXECUTE_IF("ib_err_trunc_drop_index",
993 			freed = false;);
994 
995 	if (freed) {
996 
997 		/* We will need to commit and restart the
998 		mini-transaction in order to avoid deadlocks.
999 		The dict_drop_index_tree() call has freed
1000 		a page in this mini-transaction, and the rest
1001 		of this loop could latch another index page.*/
1002 		const mtr_log_t log_mode = mtr->get_log_mode();
1003 		mtr_commit(mtr);
1004 
1005 		mtr_start(mtr);
1006 		mtr->set_log_mode(log_mode);
1007 
1008 		btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);
1009 	} else {
1010 		/* Check if the .ibd file is missing. */
1011 		bool	found;
1012 
1013 		fil_space_get_page_size(m_table->space, &found);
1014 
1015 		DBUG_EXECUTE_IF("ib_err_trunc_drop_index",
1016 				found = false;);
1017 
1018 		if (!found) {
1019 			return(DB_ERROR);
1020 		}
1021 	}
1022 
1023 	return(DB_SUCCESS);
1024 }
1025 
1026 /**
1027 Create the new index and update the root page number in the
1028 SysIndex table.
1029 
1030 @param mtr	mini-transaction covering the read
1031 @param pcur	persistent cursor used for reading
1032 @return DB_SUCCESS or error code */
1033 dberr_t
operator ()(mtr_t * mtr,btr_pcur_t * pcur) const1034 CreateIndex::operator()(mtr_t* mtr, btr_pcur_t* pcur) const
1035 {
1036 	ulint	root_page_no;
1037 
1038 	root_page_no = dict_recreate_index_tree(m_table, pcur, mtr);
1039 
1040 #ifdef UNIV_DEBUG
1041 	{
1042 		ulint		len;
1043 		const byte*	field;
1044 		ulint		index_type;
1045 
1046 		field = rec_get_nth_field_old(
1047 			btr_pcur_get_rec(pcur), DICT_FLD__SYS_INDEXES__TYPE,
1048 			&len);
1049 		ut_ad(len == 4);
1050 
1051 		index_type = mach_read_from_4(field);
1052 
1053 		if (index_type & DICT_CLUSTERED) {
1054 			/* Clustered index */
1055 			DBUG_EXECUTE_IF(
1056 				"ib_trunc_crash_on_create_of_clust_index",
1057 				log_buffer_flush_to_disk();
1058 				os_thread_sleep(2000000);
1059 				DBUG_SUICIDE(););
1060 		} else if (index_type & DICT_UNIQUE) {
1061 			/* Unique index */
1062 			DBUG_EXECUTE_IF(
1063 				"ib_trunc_crash_on_create_of_uniq_index",
1064 				log_buffer_flush_to_disk();
1065 				os_thread_sleep(2000000);
1066 				DBUG_SUICIDE(););
1067 		} else if (index_type == 0) {
1068 			/* Secondary index */
1069 			DBUG_EXECUTE_IF(
1070 				"ib_trunc_crash_on_create_of_sec_index",
1071 				log_buffer_flush_to_disk();
1072 				os_thread_sleep(2000000);
1073 				DBUG_SUICIDE(););
1074 		}
1075 	}
1076 #endif /* UNIV_DEBUG */
1077 
1078 	DBUG_EXECUTE_IF("ib_err_trunc_create_index",
1079 			root_page_no = FIL_NULL;);
1080 
1081 	if (root_page_no != FIL_NULL) {
1082 
1083 		rec_t*	rec = btr_pcur_get_rec(pcur);
1084 
1085 		page_rec_write_field(
1086 			rec, DICT_FLD__SYS_INDEXES__PAGE_NO,
1087 			root_page_no, mtr);
1088 
1089 		/* We will need to commit and restart the
1090 		mini-transaction in order to avoid deadlocks.
1091 		The dict_create_index_tree() call has allocated
1092 		a page in this mini-transaction, and the rest of
1093 		this loop could latch another index page. */
1094 		mtr_commit(mtr);
1095 
1096 		mtr_start(mtr);
1097 
1098 		btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);
1099 
1100 	} else {
1101 		bool	found;
1102 		fil_space_get_page_size(m_table->space, &found);
1103 
1104 		DBUG_EXECUTE_IF("ib_err_trunc_create_index",
1105 				found = false;);
1106 
1107 		if (!found) {
1108 			return(DB_ERROR);
1109 		}
1110 	}
1111 
1112 	return(DB_SUCCESS);
1113 }
1114 
1115 /**
1116 Look for table-id in SYS_XXXX tables without loading the table.
1117 
1118 @param mtr	mini-transaction covering the read
1119 @param pcur	persistent cursor used for reading
1120 @return DB_SUCCESS */
1121 dberr_t
operator ()(mtr_t * mtr,btr_pcur_t * pcur)1122 TableLocator::operator()(mtr_t* mtr, btr_pcur_t* pcur)
1123 {
1124 	m_table_found = true;
1125 
1126 	return(DB_SUCCESS);
1127 }
1128 
1129 /**
1130 Rollback the transaction and release the index locks.
1131 Drop indexes if table is corrupted so that drop/create
1132 sequence works as expected.
1133 
1134 @param table			table to truncate
1135 @param trx			transaction covering the TRUNCATE
1136 @param new_id			new table id that was suppose to get assigned
1137 				to the table if truncate executed successfully.
1138 @param has_internal_doc_id	indicate existence of fts index
1139 @param no_redo			if true, turn-off redo logging
1140 @param corrupted		table corrupted status
1141 @param unlock_index		if true then unlock indexes before action */
1142 static
1143 void
row_truncate_rollback(dict_table_t * table,trx_t * trx,table_id_t new_id,bool has_internal_doc_id,bool no_redo,bool corrupted,bool unlock_index)1144 row_truncate_rollback(
1145 	dict_table_t*	table,
1146 	trx_t*		trx,
1147 	table_id_t	new_id,
1148 	bool		has_internal_doc_id,
1149 	bool		no_redo,
1150 	bool		corrupted,
1151 	bool		unlock_index)
1152 {
1153 	if (unlock_index) {
1154 		dict_table_x_unlock_indexes(table);
1155 	}
1156 
1157 	trx->error_state = DB_SUCCESS;
1158 
1159 	trx_rollback_to_savepoint(trx, NULL);
1160 
1161 	trx->error_state = DB_SUCCESS;
1162 
1163 	if (corrupted && !dict_table_is_temporary(table)) {
1164 
1165 		/* Cleanup action to ensure we don't left over stale entries
1166 		if we are marking table as corrupted. This will ensure
1167 		it can be recovered using drop/create sequence. */
1168 		dict_table_x_lock_indexes(table);
1169 
1170 		DropIndex       dropIndex(table, no_redo);
1171 
1172 		SysIndexIterator().for_each(dropIndex);
1173 
1174 		dict_table_x_unlock_indexes(table);
1175 
1176 		for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
1177 		     index != NULL;
1178 		     index = UT_LIST_GET_NEXT(indexes, index)) {
1179 
1180 			dict_set_corrupted(index, trx, "TRUNCATE TABLE");
1181 		}
1182 
1183 		if (has_internal_doc_id) {
1184 
1185 			ut_ad(!trx_is_started(trx));
1186 
1187 			table_id_t      id = table->id;
1188 
1189 			table->id = new_id;
1190 
1191 			fts_drop_tables(trx, table);
1192 
1193 			table->id = id;
1194 
1195 			ut_ad(trx_is_started(trx));
1196 
1197 			trx_commit_for_mysql(trx);
1198 		}
1199 
1200 	} else if (corrupted && dict_table_is_temporary(table)) {
1201 
1202 		dict_table_x_lock_indexes(table);
1203 
1204 		for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
1205 		     index != NULL;
1206 		     index = UT_LIST_GET_NEXT(indexes, index)) {
1207 
1208 			dict_drop_index_tree_in_mem(index, index->page);
1209 
1210 			index->page = FIL_NULL;
1211 		}
1212 
1213 		dict_table_x_unlock_indexes(table);
1214 	}
1215 
1216 	table->corrupted = corrupted;
1217 }
1218 
1219 /**
1220 Finish the TRUNCATE operations for both commit and rollback.
1221 
1222 @param table		table being truncated
1223 @param trx		transaction covering the truncate
1224 @param fsp_flags	tablespace flags
1225 @param logger		table to truncate information logger
1226 @param err		status of truncate operation
1227 
1228 @return DB_SUCCESS or error code */
1229 static MY_ATTRIBUTE((warn_unused_result))
1230 dberr_t
row_truncate_complete(dict_table_t * table,trx_t * trx,ulint fsp_flags,TruncateLogger * & logger,dberr_t err)1231 row_truncate_complete(
1232 	dict_table_t*		table,
1233 	trx_t*			trx,
1234 	ulint			fsp_flags,
1235 	TruncateLogger*		&logger,
1236 	dberr_t			err)
1237 {
1238 	bool	is_file_per_table = dict_table_is_file_per_table(table);
1239 
1240 	if (table->memcached_sync_count == DICT_TABLE_IN_DDL) {
1241 		/* We need to set the memcached sync back to 0, unblock
1242 		memcached operations. */
1243 		table->memcached_sync_count = 0;
1244 	}
1245 
1246 	/* Add the table back to FTS optimize background thread. */
1247 	if (table->fts) {
1248 		fts_optimize_add_table(table);
1249 	}
1250 
1251 	row_mysql_unlock_data_dictionary(trx);
1252 
1253 	DEBUG_SYNC_C("ib_trunc_table_trunc_completing");
1254 
1255 	if (!dict_table_is_temporary(table)) {
1256 
1257 		DBUG_EXECUTE_IF("ib_trunc_crash_before_log_removal",
1258 				log_buffer_flush_to_disk();
1259 				os_thread_sleep(500000);
1260 				DBUG_SUICIDE(););
1261 
1262 		/* Note: We don't log-checkpoint instead we have written
1263 		a special REDO log record MLOG_TRUNCATE that is used to
1264 		avoid applying REDO records before truncate for crash
1265 		that happens post successful truncate completion. */
1266 
1267 		if (logger != NULL) {
1268 			logger->done();
1269 			UT_DELETE(logger);
1270 			logger = NULL;
1271 		}
1272 	}
1273 
1274 	/* If non-temp file-per-table tablespace... */
1275 	if (is_file_per_table
1276 	    && !dict_table_is_temporary(table)
1277 	    && fsp_flags != ULINT_UNDEFINED) {
1278 
1279 		/* This function will reset back the stop_new_ops
1280 		and is_being_truncated so that fil-ops can re-start. */
1281 		dberr_t err2 = truncate_t::truncate(
1282 			table->space,
1283 			table->data_dir_path,
1284 			table->name.m_name, fsp_flags, false);
1285 
1286 		if (err2 != DB_SUCCESS) {
1287 			return(err2);
1288 		}
1289 	}
1290 
1291 	if (err == DB_SUCCESS) {
1292 		dict_stats_update(table, DICT_STATS_EMPTY_TABLE);
1293 	}
1294 
1295 	trx->op_info = "";
1296 
1297 	/* For temporary tables or if there was an error, we need to reset
1298 	the dict operation flags. */
1299 	trx->ddl = false;
1300 	trx->dict_operation = TRX_DICT_OP_NONE;
1301 
1302 	ut_ad(!trx_is_started(trx));
1303 
1304 	srv_wake_master_thread();
1305 
1306 	DBUG_EXECUTE_IF("ib_trunc_crash_after_truncate_done",
1307 			DBUG_SUICIDE(););
1308 
1309 	return(err);
1310 }
1311 
1312 /**
1313 Handle FTS truncate issues.
1314 @param table		table being truncated
1315 @param new_id		new id for the table
1316 @param trx		transaction covering the truncate
1317 @return DB_SUCCESS or error code. */
1318 static MY_ATTRIBUTE((warn_unused_result))
1319 dberr_t
row_truncate_fts(dict_table_t * table,table_id_t new_id,trx_t * trx)1320 row_truncate_fts(
1321 	dict_table_t*	table,
1322 	table_id_t	new_id,
1323 	trx_t*		trx)
1324 {
1325 	dict_table_t	fts_table;
1326 
1327 	fts_table.id = new_id;
1328 	fts_table.name = table->name;
1329 	fts_table.flags2 = table->flags2;
1330 	fts_table.flags = table->flags;
1331 	fts_table.tablespace = table->tablespace;
1332 	fts_table.space = table->space;
1333 
1334 	/* table->data_dir_path is used for FTS AUX table
1335 	creation. */
1336 	if (DICT_TF_HAS_DATA_DIR(table->flags)
1337 	    && table->data_dir_path == NULL) {
1338 		dict_get_and_save_data_dir_path(table, true);
1339 		ut_ad(table->data_dir_path != NULL);
1340 	}
1341 
1342 	/* table->tablespace() may not be always populated or
1343 	if table->tablespace() uses "innodb_general" name,
1344 	fetch the real name. */
1345 	if (DICT_TF_HAS_SHARED_SPACE(table->flags)
1346 	    && (table->tablespace() == NULL
1347 		|| dict_table_has_temp_general_tablespace_name(
1348 			table->tablespace()))) {
1349 		dict_get_and_save_space_name(table, true);
1350 		ut_ad(table->tablespace() != NULL);
1351 		ut_ad(!dict_table_has_temp_general_tablespace_name(
1352 			table->tablespace()));
1353 	}
1354 
1355 	fts_table.tablespace = table->tablespace();
1356 	fts_table.data_dir_path = table->data_dir_path;
1357 
1358 	dberr_t		err;
1359 
1360 	err = fts_create_common_tables(
1361 		trx, &fts_table, table->name.m_name, TRUE);
1362 
1363 	for (ulint i = 0;
1364 	     i < ib_vector_size(table->fts->indexes) && err == DB_SUCCESS;
1365 	     i++) {
1366 
1367 		dict_index_t*	fts_index;
1368 
1369 		fts_index = static_cast<dict_index_t*>(
1370 			ib_vector_getp(table->fts->indexes, i));
1371 
1372 		err = fts_create_index_tables_low(
1373 			trx, fts_index, table->name.m_name, new_id);
1374 	}
1375 
1376 	DBUG_EXECUTE_IF("ib_err_trunc_during_fts_trunc",
1377 			err = DB_ERROR;);
1378 
1379 	if (err != DB_SUCCESS) {
1380 
1381 		trx->error_state = DB_SUCCESS;
1382 		trx_rollback_to_savepoint(trx, NULL);
1383 		trx->error_state = DB_SUCCESS;
1384 
1385 		ib::error() << "Unable to truncate FTS index for table "
1386 			<< table->name;
1387 	} else {
1388 
1389 		ut_ad(trx_is_started(trx));
1390 	}
1391 
1392 	return(err);
1393 }
1394 
1395 /**
1396 Update system table to reflect new table id.
1397 @param old_table_id		old table id
1398 @param new_table_id		new table id
1399 @param reserve_dict_mutex	if TRUE, acquire/release
1400 				dict_sys->mutex around call to pars_sql.
1401 @param trx			transaction
1402 @return error code or DB_SUCCESS */
1403 static MY_ATTRIBUTE((warn_unused_result))
1404 dberr_t
row_truncate_update_table_id(table_id_t old_table_id,table_id_t new_table_id,ibool reserve_dict_mutex,trx_t * trx)1405 row_truncate_update_table_id(
1406 	table_id_t	old_table_id,
1407 	table_id_t	new_table_id,
1408 	ibool		reserve_dict_mutex,
1409 	trx_t*		trx)
1410 {
1411 	pars_info_t*	info	= NULL;
1412 	dberr_t		err	= DB_SUCCESS;
1413 
1414 	/* Scan the SYS_XXXX table and update to reflect new table-id. */
1415 	info = pars_info_create();
1416 	pars_info_add_ull_literal(info, "old_id", old_table_id);
1417 	pars_info_add_ull_literal(info, "new_id", new_table_id);
1418 
1419 	err = que_eval_sql(
1420 		info,
1421 		"PROCEDURE RENUMBER_TABLE_ID_PROC () IS\n"
1422 		"BEGIN\n"
1423 		"UPDATE SYS_TABLES"
1424 		" SET ID = :new_id\n"
1425 		" WHERE ID = :old_id;\n"
1426 		"UPDATE SYS_COLUMNS SET TABLE_ID = :new_id\n"
1427 		" WHERE TABLE_ID = :old_id;\n"
1428 		"UPDATE SYS_INDEXES"
1429 		" SET TABLE_ID = :new_id\n"
1430 		" WHERE TABLE_ID = :old_id;\n"
1431 		"UPDATE SYS_VIRTUAL"
1432 		" SET TABLE_ID = :new_id\n"
1433 		" WHERE TABLE_ID = :old_id;\n"
1434 		"END;\n", reserve_dict_mutex, trx);
1435 
1436 	return(err);
1437 }
1438 
1439 /**
1440 Get the table id to truncate.
1441 @param truncate_t		old/new table id of table to truncate
1442 @return table_id_t		table_id to use in SYS_XXXX table update. */
1443 static MY_ATTRIBUTE((warn_unused_result))
1444 table_id_t
row_truncate_get_trunc_table_id(const truncate_t & truncate)1445 row_truncate_get_trunc_table_id(
1446 	const truncate_t&	truncate)
1447 {
1448 	TableLocator tableLocator(truncate.old_table_id());
1449 
1450 	SysIndexIterator().for_each(tableLocator);
1451 
1452 	return(tableLocator.is_table_found() ?
1453 		truncate.old_table_id(): truncate.new_table_id());
1454 }
1455 
1456 /**
1457 Update system table to reflect new table id and root page number.
1458 @param truncate_t		old/new table id of table to truncate
1459 				and updated root_page_no of indexes.
1460 @param new_table_id		new table id
1461 @param reserve_dict_mutex	if TRUE, acquire/release
1462 				dict_sys->mutex around call to pars_sql.
1463 @param mark_index_corrupted	if true, then mark index corrupted.
1464 @return error code or DB_SUCCESS */
1465 static MY_ATTRIBUTE((warn_unused_result))
1466 dberr_t
row_truncate_update_sys_tables_during_fix_up(const truncate_t & truncate,table_id_t new_table_id,ibool reserve_dict_mutex,bool mark_index_corrupted)1467 row_truncate_update_sys_tables_during_fix_up(
1468 	const truncate_t&	truncate,
1469 	table_id_t		new_table_id,
1470 	ibool			reserve_dict_mutex,
1471 	bool			mark_index_corrupted)
1472 {
1473 	trx_t*		trx = trx_allocate_for_background();
1474 
1475 	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
1476 
1477 	table_id_t	table_id = row_truncate_get_trunc_table_id(truncate);
1478 
1479 	/* Step-1: Update the root-page-no */
1480 
1481 	dberr_t	err;
1482 
1483 	err = truncate.update_root_page_no(
1484 		trx, table_id, reserve_dict_mutex, mark_index_corrupted);
1485 
1486 	if (err != DB_SUCCESS) {
1487 		return(err);
1488 	}
1489 
1490 	/* Step-2: Update table-id. */
1491 
1492 	err = row_truncate_update_table_id(
1493 		table_id, new_table_id, reserve_dict_mutex, trx);
1494 
1495 	if (err == DB_SUCCESS) {
1496 		dict_mutex_enter_for_mysql();
1497 
1498 		/* Remove the table with old table_id from cache. */
1499 		dict_table_t*	old_table = dict_table_open_on_id(
1500 			table_id, true, DICT_TABLE_OP_NORMAL);
1501 
1502 		if (old_table != NULL) {
1503 			dict_table_close(old_table, true, false);
1504 			dict_table_remove_from_cache(old_table);
1505 		}
1506 
1507 		/* Open table with new table_id and set table as
1508 		corrupted if it has FTS index. */
1509 
1510 		dict_table_t*	table = dict_table_open_on_id(
1511 			new_table_id, true, DICT_TABLE_OP_NORMAL);
1512 		ut_ad(table->id == new_table_id);
1513 
1514 		bool	has_internal_doc_id =
1515 			dict_table_has_fts_index(table)
1516 			|| DICT_TF2_FLAG_IS_SET(
1517 				table, DICT_TF2_FTS_HAS_DOC_ID);
1518 
1519 		if (has_internal_doc_id) {
1520 			trx->dict_operation_lock_mode = RW_X_LATCH;
1521 			fts_check_corrupt(table, trx);
1522 			trx->dict_operation_lock_mode = 0;
1523 		}
1524 
1525 		dict_table_close(table, true, false);
1526 		dict_mutex_exit_for_mysql();
1527 	}
1528 
1529 	trx_commit_for_mysql(trx);
1530 	trx_free_for_background(trx);
1531 
1532 	return(err);
1533 }
1534 
1535 /**
1536 Truncate also results in assignment of new table id, update the system
1537 SYSTEM TABLES with the new id.
1538 @param table,			table being truncated
1539 @param new_id,			new table id
1540 @param has_internal_doc_id,	has doc col (fts)
1541 @param no_redo			if true, turn-off redo logging
1542 @param trx			transaction handle
1543 @return	error code or DB_SUCCESS */
1544 static MY_ATTRIBUTE((warn_unused_result))
1545 dberr_t
row_truncate_update_system_tables(dict_table_t * table,table_id_t new_id,bool has_internal_doc_id,bool no_redo,trx_t * trx)1546 row_truncate_update_system_tables(
1547 	dict_table_t*	table,
1548 	table_id_t	new_id,
1549 	bool		has_internal_doc_id,
1550 	bool		no_redo,
1551 	trx_t*		trx)
1552 {
1553 	dberr_t		err	= DB_SUCCESS;
1554 
1555 	ut_a(!dict_table_is_temporary(table));
1556 
1557 	err = row_truncate_update_table_id(table->id, new_id, FALSE, trx);
1558 
1559 	DBUG_EXECUTE_IF("ib_err_trunc_during_sys_table_update",
1560 			err = DB_ERROR;);
1561 
1562 	if (err != DB_SUCCESS) {
1563 
1564 		row_truncate_rollback(
1565 			table, trx, new_id, has_internal_doc_id,
1566 			no_redo, true, false);
1567 
1568 		ib::error() << "Unable to assign a new identifier to table "
1569 			<< table->name << " after truncating it. Marked the"
1570 			" table as corrupted. In-memory representation is now"
1571 			" different from the on-disk representation.";
1572 		err = DB_ERROR;
1573 	} else {
1574 		/* Drop the old FTS index */
1575 		if (has_internal_doc_id) {
1576 
1577 			ut_ad(trx_is_started(trx));
1578 
1579 			fts_drop_tables(trx, table);
1580 
1581 			DBUG_EXECUTE_IF("ib_truncate_crash_while_fts_cleanup",
1582 					DBUG_SUICIDE(););
1583 
1584 			ut_ad(trx_is_started(trx));
1585 		}
1586 
1587 		DBUG_EXECUTE_IF("ib_trunc_crash_after_fts_drop",
1588 				log_buffer_flush_to_disk();
1589 				os_thread_sleep(2000000);
1590 				DBUG_SUICIDE(););
1591 
1592 		dict_table_change_id_in_cache(table, new_id);
1593 
1594 		/* Reset the Doc ID in cache to 0 */
1595 		if (has_internal_doc_id && table->fts->cache != NULL) {
1596 			DBUG_EXECUTE_IF("ib_trunc_sleep_before_fts_cache_clear",
1597 					os_thread_sleep(10000000););
1598 
1599 			table->fts->fts_status |= TABLE_DICT_LOCKED;
1600 			fts_update_next_doc_id(trx, table, NULL, 0);
1601 			fts_cache_clear(table->fts->cache);
1602 			fts_cache_init(table->fts->cache);
1603 			table->fts->fts_status &= ~TABLE_DICT_LOCKED;
1604 		}
1605 	}
1606 
1607 	return(err);
1608 }
1609 
1610 /**
1611 Prepare for the truncate process. On success all of the table's indexes will
1612 be locked in X mode.
1613 @param table		table to truncate
1614 @param flags		tablespace flags
1615 @return	error code or DB_SUCCESS */
1616 static MY_ATTRIBUTE((warn_unused_result))
1617 dberr_t
row_truncate_prepare(dict_table_t * table,ulint * flags)1618 row_truncate_prepare(dict_table_t* table, ulint* flags)
1619 {
1620 	ut_ad(!dict_table_is_temporary(table));
1621 	ut_ad(dict_table_is_file_per_table(table));
1622 
1623 	*flags = fil_space_get_flags(table->space);
1624 
1625 	ut_ad(!dict_table_is_temporary(table));
1626 
1627 	dict_get_and_save_data_dir_path(table, true);
1628 
1629 	dict_get_and_save_space_name(table, true);
1630 
1631 	if (*flags != ULINT_UNDEFINED) {
1632 
1633 		dberr_t	err = fil_prepare_for_truncate(table->space);
1634 
1635 		if (err != DB_SUCCESS) {
1636 			return(err);
1637 		}
1638 	}
1639 
1640 	return(DB_SUCCESS);
1641 }
1642 
1643 /**
1644 Do foreign key checks before starting TRUNCATE.
1645 @param table		table being truncated
1646 @param trx		transaction covering the truncate
1647 @return DB_SUCCESS or error code */
1648 static MY_ATTRIBUTE((warn_unused_result))
1649 dberr_t
row_truncate_foreign_key_checks(const dict_table_t * table,const trx_t * trx)1650 row_truncate_foreign_key_checks(
1651 	const dict_table_t*	table,
1652 	const trx_t*		trx)
1653 {
1654 	/* Check if the table is referenced by foreign key constraints from
1655 	some other table (not the table itself) */
1656 
1657 	dict_foreign_set::iterator	it
1658 		= std::find_if(table->referenced_set.begin(),
1659 			       table->referenced_set.end(),
1660 			       dict_foreign_different_tables());
1661 
1662 	if (!srv_read_only_mode
1663 	    && it != table->referenced_set.end()
1664 	    && trx->check_foreigns) {
1665 
1666 		dict_foreign_t*	foreign = *it;
1667 
1668 		FILE*	ef = dict_foreign_err_file;
1669 
1670 		/* We only allow truncating a referenced table if
1671 		FOREIGN_KEY_CHECKS is set to 0 */
1672 
1673 		mutex_enter(&dict_foreign_err_mutex);
1674 
1675 		rewind(ef);
1676 
1677 		ut_print_timestamp(ef);
1678 
1679 		fputs("  Cannot truncate table ", ef);
1680 		ut_print_name(ef, trx, table->name.m_name);
1681 		fputs(" by DROP+CREATE\n"
1682 		      "InnoDB: because it is referenced by ", ef);
1683 		ut_print_name(ef, trx, foreign->foreign_table_name);
1684 		putc('\n', ef);
1685 
1686 		mutex_exit(&dict_foreign_err_mutex);
1687 
1688 		return(DB_ERROR);
1689 	}
1690 
1691 	/* TODO: could we replace the counter n_foreign_key_checks_running
1692 	with lock checks on the table? Acquire here an exclusive lock on the
1693 	table, and rewrite lock0lock.cc and the lock wait in srv0srv.cc so that
1694 	they can cope with the table having been truncated here? Foreign key
1695 	checks take an IS or IX lock on the table. */
1696 
1697 	if (table->n_foreign_key_checks_running > 0) {
1698 		ib::warn() << "Cannot truncate table " << table->name
1699 			<< " because there is a foreign key check running on"
1700 			" it.";
1701 
1702 		return(DB_ERROR);
1703 	}
1704 
1705 	return(DB_SUCCESS);
1706 }
1707 
1708 /**
1709 Do some sanity checks before starting the actual TRUNCATE.
1710 @param table		table being truncated
1711 @return DB_SUCCESS or error code */
1712 static MY_ATTRIBUTE((warn_unused_result))
1713 dberr_t
row_truncate_sanity_checks(const dict_table_t * table)1714 row_truncate_sanity_checks(
1715 	const dict_table_t* table)
1716 {
1717 	if (dict_table_is_discarded(table)) {
1718 
1719 		return(DB_TABLESPACE_DELETED);
1720 
1721 	} else if (table->ibd_file_missing) {
1722 
1723 		return(DB_TABLESPACE_NOT_FOUND);
1724 
1725 	} else if (dict_table_is_corrupted(table)) {
1726 
1727 		return(DB_TABLE_CORRUPT);
1728 	}
1729 
1730 	return(DB_SUCCESS);
1731 }
1732 
1733 /**
1734 Truncates a table for MySQL.
1735 @param table		table being truncated
1736 @param trx		transaction covering the truncate
1737 @return	error code or DB_SUCCESS */
1738 dberr_t
row_truncate_table_for_mysql(dict_table_t * table,trx_t * trx)1739 row_truncate_table_for_mysql(
1740 	dict_table_t* table,
1741 	trx_t* trx)
1742 {
1743 	bool	is_file_per_table = dict_table_is_file_per_table(table);
1744 	dberr_t		err;
1745 #ifdef UNIV_DEBUG
1746 	ulint		old_space = table->space;
1747 #endif /* UNIV_DEBUG */
1748 	TruncateLogger*	logger = NULL;
1749 
1750 	/* Understanding the truncate flow.
1751 
1752 	Step-1: Perform intiial sanity check to ensure table can be truncated.
1753 	This would include check for tablespace discard status, ibd file
1754 	missing, etc ....
1755 
1756 	Step-2: Start transaction (only for non-temp table as temp-table don't
1757 	modify any data on disk doesn't need transaction object).
1758 
1759 	Step-3: Validate ownership of needed locks (Exclusive lock).
1760 	Ownership will also ensure there is no active SQL queries, INSERT,
1761 	SELECT, .....
1762 
1763 	Step-4: Stop all the background process associated with table.
1764 
1765 	Step-5: There are few foreign key related constraint under which
1766 	we can't truncate table (due to referential integrity unless it is
1767 	turned off). Ensure this condition is satisfied.
1768 
1769 	Step-6: Truncate operation can be rolled back in case of error
1770 	till some point. Associate rollback segment to record undo log.
1771 
1772 	Step-7: Generate new table-id.
1773 	Why we need new table-id ?
1774 	Purge and rollback case: we assign a new table id for the table.
1775 	Since purge and rollback look for the table based on the table id,
1776 	they see the table as 'dropped' and discard their operations.
1777 
1778 	Step-8: Log information about tablespace which includes
1779 	table and index information. If there is a crash in the next step
1780 	then during recovery we will attempt to fixup the operation.
1781 
1782 	Step-9: Drop all indexes (this include freeing of the pages
1783 	associated with them).
1784 
1785 	Step-10: Re-create new indexes.
1786 
1787 	Step-11: Update new table-id to in-memory cache (dictionary),
1788 	on-disk (INNODB_SYS_TABLES). INNODB_SYS_INDEXES also needs to
1789 	be updated to reflect updated root-page-no of new index created
1790 	and updated table-id.
1791 
1792 	Step-12: Cleanup Stage. Reset auto-inc value to 1.
1793 	Release all the locks.
1794 	Commit the transaction. Update trx operation state.
1795 
1796 	Notes:
1797 	- On error, log checkpoint is done followed writing of magic number to
1798 	truncate log file. If servers crashes after truncate, fix-up action
1799 	will not be applied.
1800 
1801 	- log checkpoint is done before starting truncate table to ensure
1802 	that previous REDO log entries are not applied if current truncate
1803 	crashes. Consider following use-case:
1804 	 - create table .... insert/load table .... truncate table (crash)
1805 	 - on restart table is restored .... truncate table (crash)
1806 	 - on restart (assuming default log checkpoint is not done) will have
1807 	   2 REDO log entries for same table. (Note 2 REDO log entries
1808 	   for different table is not an issue).
1809 	For system-tablespace we can't truncate the tablespace so we need
1810 	to initiate a local cleanup that involves dropping of indexes and
1811 	re-creating them. If we apply stale entry we might end-up issuing
1812 	drop on wrong indexes.
1813 
1814 	- Insert buffer: TRUNCATE TABLE is analogous to DROP TABLE,
1815 	so we do not have to remove insert buffer records, as the
1816 	insert buffer works at a low level. If a freed page is later
1817 	reallocated, the allocator will remove the ibuf entries for
1818 	it. When we prepare to truncate *.ibd files, we remove all entries
1819 	for the table in the insert buffer tree. This is not strictly
1820 	necessary, but we can free up some space in the system tablespace.
1821 
1822 	- Linear readahead and random readahead: we use the same
1823 	method as in 3) to discard ongoing operations. (This is only
1824 	relevant for TRUNCATE TABLE by TRUNCATE TABLESPACE.)
1825 	Ensure that the table will be dropped by trx_rollback_active() in
1826 	case of a crash.
1827 	*/
1828 
1829 	/*-----------------------------------------------------------------*/
1830 	/* Step-1: Perform intiial sanity check to ensure table can be
1831 	truncated. This would include check for tablespace discard status,
1832 	ibd file missing, etc .... */
1833 	err = row_truncate_sanity_checks(table);
1834 	if (err != DB_SUCCESS) {
1835 		return(err);
1836 
1837 	}
1838 
1839 	/* Step-2: Start transaction (only for non-temp table as temp-table
1840 	don't modify any data on disk doesn't need transaction object). */
1841 	if (!dict_table_is_temporary(table)) {
1842 		/* Avoid transaction overhead for temporary table DDL. */
1843 		trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
1844 	}
1845 
1846 	/* Step-3: Validate ownership of needed locks (Exclusive lock).
1847 	Ownership will also ensure there is no active SQL queries, INSERT,
1848 	SELECT, .....*/
1849 	trx->op_info = "truncating table";
1850 	ut_a(trx->dict_operation_lock_mode == 0);
1851 	row_mysql_lock_data_dictionary(trx);
1852 	ut_ad(mutex_own(&dict_sys->mutex));
1853 	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
1854 
1855 	/* Step-4: Stop all the background process associated with table. */
1856 	dict_stats_wait_bg_to_stop_using_table(table, trx);
1857 	if (table->fts) {
1858 		/* Remove from FTS optimize thread. Unlock is needed to allow
1859 		finishing background operations in progress. */
1860 		row_mysql_unlock_data_dictionary(trx);
1861 		fts_optimize_remove_table(table);
1862 		row_mysql_lock_data_dictionary(trx);
1863 	}
1864 
1865 	/* Step-5: There are few foreign key related constraint under which
1866 	we can't truncate table (due to referential integrity unless it is
1867 	turned off). Ensure this condition is satisfied. */
1868 	ulint	fsp_flags = ULINT_UNDEFINED;
1869 	err = row_truncate_foreign_key_checks(table, trx);
1870 	if (err != DB_SUCCESS) {
1871 		trx_rollback_to_savepoint(trx, NULL);
1872 		return(row_truncate_complete(
1873 				table, trx, fsp_flags, logger, err));
1874 	}
1875 
1876 	/* Check if memcached DML is running on this table. if is, we don't
1877 	allow truncate this table. */
1878 	if (table->memcached_sync_count != 0) {
1879 		ib::error() << "Cannot truncate table "
1880 			<< table->name
1881 			<< " by DROP+CREATE because there are memcached"
1882 			" operations running on it.";
1883 		err = DB_ERROR;
1884 		trx_rollback_to_savepoint(trx, NULL);
1885 		return(row_truncate_complete(
1886 				table, trx, fsp_flags, logger, err));
1887 	} else {
1888                 /* We need to set this counter to -1 for blocking
1889                 memcached operations. */
1890 		table->memcached_sync_count = DICT_TABLE_IN_DDL;
1891         }
1892 
1893 	/* Remove all locks except the table-level X lock. */
1894 	lock_remove_all_on_table(table, FALSE);
1895 	trx->table_id = table->id;
1896 	trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
1897 
1898 	/* Step-6: Truncate operation can be rolled back in case of error
1899 	till some point. Associate rollback segment to record undo log. */
1900 	if (!dict_table_is_temporary(table)) {
1901 
1902 		/* Temporary tables don't need undo logging for autocommit stmt.
1903 		On crash (i.e. mysql restart) temporary tables are anyway not
1904 		accessible. */
1905 		mutex_enter(&trx->undo_mutex);
1906 
1907 		err = trx_undo_assign_undo(
1908 			trx, &trx->rsegs.m_redo, TRX_UNDO_UPDATE);
1909 
1910 		mutex_exit(&trx->undo_mutex);
1911 
1912 		DBUG_EXECUTE_IF("ib_err_trunc_assigning_undo_log",
1913 				err = DB_ERROR;);
1914 		if (err != DB_SUCCESS) {
1915 			trx_rollback_to_savepoint(trx, NULL);
1916 			return(row_truncate_complete(
1917 				table, trx, fsp_flags, logger, err));
1918 		}
1919 	}
1920 
1921 	/* Step-7: Generate new table-id.
1922 	Why we need new table-id ?
1923 	Purge and rollback: we assign a new table id for the
1924 	table. Since purge and rollback look for the table based on
1925 	the table id, they see the table as 'dropped' and discard
1926 	their operations. */
1927 	table_id_t	new_id;
1928 	dict_hdr_get_new_id(&new_id, NULL, NULL, table, false);
1929 
1930 	/* Check if table involves FTS index. */
1931 	bool	has_internal_doc_id =
1932 		dict_table_has_fts_index(table)
1933 		|| DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID);
1934 
1935 	bool	no_redo = is_file_per_table && !has_internal_doc_id;
1936 
1937 	/* Step-8: Log information about tablespace which includes
1938 	table and index information. If there is a crash in the next step
1939 	then during recovery we will attempt to fixup the operation. */
1940 
1941 	/* Lock all index trees for this table, as we will truncate
1942 	the table/index and possibly change their metadata. All
1943 	DML/DDL are blocked by table level X lock, with a few exceptions
1944 	such as queries into information schema about the table,
1945 	MySQL could try to access index stats for this kind of query,
1946 	we need to use index locks to sync up */
1947 	dict_table_x_lock_indexes(table);
1948 
1949 	if (!dict_table_is_temporary(table)) {
1950 
1951 		if (is_file_per_table) {
1952 
1953 			err = row_truncate_prepare(table, &fsp_flags);
1954 
1955 			DBUG_EXECUTE_IF("ib_err_trunc_preparing_for_truncate",
1956 					err = DB_ERROR;);
1957 
1958 			if (err != DB_SUCCESS) {
1959 				row_truncate_rollback(
1960 					table, trx, new_id,
1961 					has_internal_doc_id,
1962 					no_redo, false, true);
1963 				return(row_truncate_complete(
1964 					table, trx, fsp_flags, logger, err));
1965 			}
1966 		} else {
1967 			fsp_flags = fil_space_get_flags(table->space);
1968 
1969 			DBUG_EXECUTE_IF("ib_err_trunc_preparing_for_truncate",
1970 					fsp_flags = ULINT_UNDEFINED;);
1971 
1972 			if (fsp_flags == ULINT_UNDEFINED) {
1973 				row_truncate_rollback(
1974 					table, trx, new_id,
1975 					has_internal_doc_id,
1976 					no_redo, false, true);
1977 				return(row_truncate_complete(
1978 						table, trx, fsp_flags,
1979 						logger, DB_ERROR));
1980 			}
1981 		}
1982 
1983 		logger = UT_NEW_NOKEY(TruncateLogger(
1984 				table, fsp_flags, new_id));
1985 
1986 		err = logger->init();
1987 		if (err != DB_SUCCESS) {
1988 			row_truncate_rollback(
1989 				table, trx, new_id, has_internal_doc_id,
1990 				no_redo, false, true);
1991 			return(row_truncate_complete(
1992 				table, trx, fsp_flags, logger, DB_ERROR));
1993 
1994 		}
1995 
1996 		err = SysIndexIterator().for_each(*logger);
1997 		if (err != DB_SUCCESS) {
1998 			row_truncate_rollback(
1999 				table, trx, new_id, has_internal_doc_id,
2000 				no_redo, false, true);
2001 			return(row_truncate_complete(
2002 				table, trx, fsp_flags, logger, DB_ERROR));
2003 
2004 		}
2005 
2006 		ut_ad(logger->debug());
2007 
2008 		err = logger->log();
2009 
2010 		if (err != DB_SUCCESS) {
2011 			row_truncate_rollback(
2012 				table, trx, new_id, has_internal_doc_id,
2013 				no_redo, false, true);
2014 			return(row_truncate_complete(
2015 				table, trx, fsp_flags, logger, DB_ERROR));
2016 		}
2017 	}
2018 
2019 	DBUG_EXECUTE_IF("ib_trunc_crash_after_redo_log_write_complete",
2020 			log_buffer_flush_to_disk();
2021 			os_thread_sleep(3000000);
2022 			DBUG_SUICIDE(););
2023 
2024 	/* Step-9: Drop all indexes (free index pages associated with these
2025 	indexes) */
2026 	if (!dict_table_is_temporary(table)) {
2027 
2028 		DropIndex	dropIndex(table, no_redo);
2029 
2030 		err = SysIndexIterator().for_each(dropIndex);
2031 
2032 		if (err != DB_SUCCESS) {
2033 
2034 			row_truncate_rollback(
2035 				table, trx, new_id, has_internal_doc_id,
2036 				no_redo, true, true);
2037 
2038 			return(row_truncate_complete(
2039 				table, trx, fsp_flags, logger, err));
2040 		}
2041 
2042 	} else {
2043 		/* For temporary tables we don't have entries in SYSTEM TABLES*/
2044 		for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
2045 		     index != NULL;
2046 		     index = UT_LIST_GET_NEXT(indexes, index)) {
2047 
2048 			err = dict_truncate_index_tree_in_mem(index);
2049 
2050 			if (err != DB_SUCCESS) {
2051 				row_truncate_rollback(
2052 					table, trx, new_id, has_internal_doc_id,
2053 					no_redo, true, true);
2054 				return(row_truncate_complete(
2055 					table, trx, fsp_flags, logger, err));
2056 			}
2057 
2058 			DBUG_EXECUTE_IF(
2059 				"ib_trunc_crash_during_drop_index_temp_table",
2060 				log_buffer_flush_to_disk();
2061 				os_thread_sleep(2000000);
2062 				DBUG_SUICIDE(););
2063 		}
2064 	}
2065 
2066 	if (is_file_per_table
2067 	    && !dict_table_is_temporary(table)
2068 	    && fsp_flags != ULINT_UNDEFINED) {
2069 
2070 		/* A single-table tablespace has initially
2071 		FIL_IBD_FILE_INITIAL_SIZE number of pages allocated and an
2072 		extra page is allocated for each of the indexes present. But in
2073 		the case of clust index 2 pages are allocated and as one is
2074 		covered in the calculation as part of table->indexes.count we
2075 		take care of the other page by adding 1. */
2076 		ulint	space_size = table->indexes.count +
2077 				FIL_IBD_FILE_INITIAL_SIZE + 1;
2078 
2079 		if (has_internal_doc_id) {
2080 			/* Since aux tables are created for fts indexes and
2081 			they use seperate tablespaces. */
2082 			space_size -= ib_vector_size(table->fts->indexes);
2083 		}
2084 
2085 		fil_reinit_space_header_for_table(table, space_size, trx);
2086 	}
2087 
2088 	DBUG_EXECUTE_IF("ib_trunc_crash_with_intermediate_log_checkpoint",
2089 			log_buffer_flush_to_disk();
2090 			os_thread_sleep(2000000);
2091 			log_checkpoint(TRUE, TRUE);
2092 			os_thread_sleep(1000000);
2093 			DBUG_SUICIDE(););
2094 
2095 	DBUG_EXECUTE_IF("ib_trunc_crash_drop_reinit_done_create_to_start",
2096 			log_buffer_flush_to_disk();
2097 			os_thread_sleep(2000000);
2098 			DBUG_SUICIDE(););
2099 
2100 	/* Step-10: Re-create new indexes. */
2101 	if (!dict_table_is_temporary(table)) {
2102 
2103 		CreateIndex	createIndex(table, no_redo);
2104 
2105 		err = SysIndexIterator().for_each(createIndex);
2106 
2107 		if (err != DB_SUCCESS) {
2108 
2109 			row_truncate_rollback(
2110 				table, trx, new_id, has_internal_doc_id,
2111 				no_redo, true, true);
2112 
2113 			return(row_truncate_complete(
2114 				table, trx, fsp_flags, logger, err));
2115 		}
2116 	}
2117 
2118 	/* Done with index truncation, release index tree locks,
2119 	subsequent work relates to table level metadata change */
2120 	dict_table_x_unlock_indexes(table);
2121 
2122 	if (has_internal_doc_id) {
2123 
2124 		err = row_truncate_fts(table, new_id, trx);
2125 
2126 		if (err != DB_SUCCESS) {
2127 
2128 			row_truncate_rollback(
2129 				table, trx, new_id, has_internal_doc_id,
2130 				no_redo, true, false);
2131 
2132 			return(row_truncate_complete(
2133 				table, trx, fsp_flags, logger, err));
2134 		}
2135 	}
2136 
2137 	/* Step-11: Update new table-id to in-memory cache (dictionary),
2138 	on-disk (INNODB_SYS_TABLES). INNODB_SYS_INDEXES also needs to
2139 	be updated to reflect updated root-page-no of new index created
2140 	and updated table-id. */
2141 	if (dict_table_is_temporary(table)) {
2142 
2143 		dict_table_change_id_in_cache(table, new_id);
2144 		err = DB_SUCCESS;
2145 
2146 	} else {
2147 
2148 		/* If this fails then we are in an inconsistent state and
2149 		the results are undefined. */
2150 		ut_ad(old_space == table->space);
2151 
2152 		err = row_truncate_update_system_tables(
2153 			table, new_id, has_internal_doc_id, no_redo, trx);
2154 
2155 		if (err != DB_SUCCESS) {
2156 			return(row_truncate_complete(
2157 				table, trx, fsp_flags, logger, err));
2158 		}
2159 	}
2160 
2161 	DBUG_EXECUTE_IF("ib_trunc_crash_on_updating_dict_sys_info",
2162 			log_buffer_flush_to_disk();
2163 			os_thread_sleep(2000000);
2164 			DBUG_SUICIDE(););
2165 
2166 	/* Step-12: Cleanup Stage. Reset auto-inc value to 1.
2167 	Release all the locks.
2168 	Commit the transaction. Update trx operation state. */
2169 	dict_table_autoinc_lock(table);
2170 	dict_table_autoinc_initialize(table, 1);
2171 	dict_table_autoinc_unlock(table);
2172 
2173 	if (trx_is_started(trx)) {
2174 
2175 		trx_commit_for_mysql(trx);
2176 	}
2177 
2178 	return(row_truncate_complete(table, trx, fsp_flags, logger, err));
2179 }
2180 
2181 /**
2182 Fix the table truncate by applying information parsed from TRUNCATE log.
2183 Fix-up includes re-creating table (drop and re-create indexes)
2184 @return	error code or DB_SUCCESS */
2185 dberr_t
fixup_tables_in_system_tablespace()2186 truncate_t::fixup_tables_in_system_tablespace()
2187 {
2188 	dberr_t	err = DB_SUCCESS;
2189 
2190 	/* Using the info cached during REDO log scan phase fix the
2191 	table truncate. */
2192 
2193 	for (tables_t::iterator it = s_tables.begin();
2194 	     it != s_tables.end();) {
2195 
2196 		if ((*it)->m_space_id == TRX_SYS_SPACE) {
2197 			/* Step-1: Drop and re-create indexes. */
2198 			ib::info() << "Completing truncate for table with "
2199 				"id (" << (*it)->m_old_table_id << ") "
2200 				"residing in the system tablespace.";
2201 
2202 			err = fil_recreate_table(
2203 				(*it)->m_space_id,
2204 				(*it)->m_format_flags,
2205 				(*it)->m_tablespace_flags,
2206 				(*it)->m_tablename,
2207 				**it);
2208 
2209 			/* Step-2: Update the SYS_XXXX tables to reflect
2210 			this new table_id and root_page_no. */
2211 			table_id_t	new_id;
2212 
2213 			dict_hdr_get_new_id(&new_id, NULL, NULL, NULL, true);
2214 
2215 			err = row_truncate_update_sys_tables_during_fix_up(
2216 				**it, new_id, TRUE,
2217 				(err == DB_SUCCESS) ? false : true);
2218 
2219 			if (err != DB_SUCCESS) {
2220 				break;
2221 			}
2222 
2223 			os_file_delete(
2224 				innodb_log_file_key, (*it)->m_log_file_name);
2225 			UT_DELETE(*it);
2226 			it = s_tables.erase(it);
2227 		} else {
2228 			++it;
2229 		}
2230 	}
2231 
2232 	/* Also clear the map used to track tablespace truncated. */
2233 	s_truncated_tables.clear();
2234 
2235 	return(err);
2236 }
2237 
2238 /**
2239 Fix the table truncate by applying information parsed from TRUNCATE log.
2240 Fix-up includes re-creating tablespace.
2241 @return	error code or DB_SUCCESS */
2242 dberr_t
fixup_tables_in_non_system_tablespace()2243 truncate_t::fixup_tables_in_non_system_tablespace()
2244 {
2245 	dberr_t	err = DB_SUCCESS;
2246 
2247 	/* Using the info cached during REDO log scan phase fix the
2248 	table truncate. */
2249 	tables_t::iterator end = s_tables.end();
2250 
2251 	for (tables_t::iterator it = s_tables.begin(); it != end; ++it) {
2252 
2253 		/* All tables in the system tablespace have already been
2254 		done and erased from this list. */
2255 		ut_a((*it)->m_space_id != TRX_SYS_SPACE);
2256 
2257 		/* Step-1: Drop tablespace (only for single-tablespace),
2258 		drop indexes and re-create indexes. */
2259 
2260 		if (fsp_is_file_per_table((*it)->m_space_id,
2261 					  (*it)->m_tablespace_flags)) {
2262 			/* The table is file_per_table */
2263 
2264 			ib::info() << "Completing truncate for table with "
2265 				"id (" << (*it)->m_old_table_id << ") "
2266 				"residing in file-per-table tablespace with "
2267 				"id (" << (*it)->m_space_id << ")";
2268 
2269 			if (!fil_space_get((*it)->m_space_id)) {
2270 
2271 				/* Create the database directory for name,
2272 				if it does not exist yet */
2273 				fil_create_directory_for_tablename(
2274 					(*it)->m_tablename);
2275 
2276 				err = fil_ibd_create(
2277 					(*it)->m_space_id,
2278 					(*it)->m_tablename,
2279 					(*it)->m_dir_path,
2280 					(*it)->m_tablespace_flags,
2281 					FIL_IBD_FILE_INITIAL_SIZE);
2282 				if (err != DB_SUCCESS) {
2283 					/* If checkpoint is not yet done
2284 					and table is dropped and then we might
2285 					still have REDO entries for this table
2286 					which are INVALID. Ignore them. */
2287 					ib::warn() << "Failed to create"
2288 						" tablespace for "
2289 						<< (*it)->m_space_id
2290 						<< " space-id";
2291 					err = DB_ERROR;
2292 					break;
2293 				}
2294 			}
2295 
2296 			ut_ad(fil_space_get((*it)->m_space_id));
2297 
2298 			err = fil_recreate_tablespace(
2299 				(*it)->m_space_id,
2300 				(*it)->m_format_flags,
2301 				(*it)->m_tablespace_flags,
2302 				(*it)->m_tablename,
2303 				**it, log_get_lsn());
2304 
2305 		} else {
2306 			/* Table is in a shared tablespace */
2307 
2308 			ib::info() << "Completing truncate for table with "
2309 				"id (" << (*it)->m_old_table_id << ") "
2310 				"residing in shared tablespace with "
2311 				"id (" << (*it)->m_space_id << ")";
2312 
2313 			/* Temp-tables in temp-tablespace are never restored.*/
2314 			ut_ad((*it)->m_space_id != srv_tmp_space.space_id());
2315 
2316 			err = fil_recreate_table(
2317 				(*it)->m_space_id,
2318 				(*it)->m_format_flags,
2319 				(*it)->m_tablespace_flags,
2320 				(*it)->m_tablename,
2321 				**it);
2322 		}
2323 
2324 		/* Step-2: Update the SYS_XXXX tables to reflect new
2325 		table-id and root_page_no. */
2326 		table_id_t	new_id;
2327 
2328 		dict_hdr_get_new_id(&new_id, NULL, NULL, NULL, true);
2329 
2330 		err = row_truncate_update_sys_tables_during_fix_up(
2331 			**it, new_id, TRUE, (err == DB_SUCCESS) ? false : true);
2332 
2333 		if (err != DB_SUCCESS) {
2334 			break;
2335 		}
2336 	}
2337 
2338 	if (err == DB_SUCCESS && s_tables.size() > 0) {
2339 
2340 		log_make_checkpoint_at(LSN_MAX, TRUE);
2341 	}
2342 
2343 	for (ulint i = 0; i < s_tables.size(); ++i) {
2344 		os_file_delete(
2345 			innodb_log_file_key, s_tables[i]->m_log_file_name);
2346 		UT_DELETE(s_tables[i]);
2347 	}
2348 
2349 	s_tables.clear();
2350 
2351 	return(err);
2352 }
2353 
2354 /**
2355 Constructor
2356 
2357 @param old_table_id	old table id assigned to table before truncate
2358 @param new_table_id	new table id that will be assigned to table
2359 			after truncate
2360 @param dir_path		directory path */
2361 
truncate_t(table_id_t old_table_id,table_id_t new_table_id,const char * dir_path)2362 truncate_t::truncate_t(
2363 	table_id_t	old_table_id,
2364 	table_id_t	new_table_id,
2365 	const char*	dir_path)
2366 	:
2367 	m_space_id(),
2368 	m_old_table_id(old_table_id),
2369 	m_new_table_id(new_table_id),
2370 	m_dir_path(),
2371 	m_tablename(),
2372 	m_tablespace_flags(),
2373 	m_format_flags(),
2374 	m_indexes(),
2375 	m_log_lsn(),
2376 	m_log_file_name()
2377 {
2378 	if (dir_path != NULL) {
2379 		m_dir_path = mem_strdup(dir_path);
2380 	}
2381 }
2382 
2383 /**
2384 Consturctor
2385 
2386 @param log_file_name	parse the log file during recovery to populate
2387 			information related to table to truncate */
truncate_t(const char * log_file_name)2388 truncate_t::truncate_t(
2389 	const char*	log_file_name)
2390 	:
2391 	m_space_id(),
2392 	m_old_table_id(),
2393 	m_new_table_id(),
2394 	m_dir_path(),
2395 	m_tablename(),
2396 	m_tablespace_flags(),
2397 	m_format_flags(),
2398 	m_indexes(),
2399 	m_log_lsn(),
2400 	m_log_file_name()
2401 {
2402 	m_log_file_name = mem_strdup(log_file_name);
2403 	if (m_log_file_name == NULL) {
2404 		ib::fatal() << "Failed creating truncate_t; out of memory";
2405 	}
2406 }
2407 
2408 /** Constructor */
2409 
index_t()2410 truncate_t::index_t::index_t()
2411 	:
2412 	m_id(),
2413 	m_type(),
2414 	m_root_page_no(FIL_NULL),
2415 	m_new_root_page_no(FIL_NULL),
2416 	m_n_fields(),
2417 	m_trx_id_pos(ULINT_UNDEFINED),
2418 	m_fields()
2419 {
2420 	/* Do nothing */
2421 }
2422 
2423 /** Destructor */
2424 
~truncate_t()2425 truncate_t::~truncate_t()
2426 {
2427 	if (m_dir_path != NULL) {
2428 		ut_free(m_dir_path);
2429 		m_dir_path = NULL;
2430 	}
2431 
2432 	if (m_tablename != NULL) {
2433 		ut_free(m_tablename);
2434 		m_tablename = NULL;
2435 	}
2436 
2437 	if (m_log_file_name != NULL) {
2438 		ut_free(m_log_file_name);
2439 		m_log_file_name = NULL;
2440 	}
2441 
2442 	m_indexes.clear();
2443 }
2444 
2445 /**
2446 @return number of indexes parsed from the log record */
2447 
2448 size_t
indexes() const2449 truncate_t::indexes() const
2450 {
2451 	return(m_indexes.size());
2452 }
2453 
2454 /**
2455 Update root page number in SYS_XXXX tables.
2456 
2457 @param trx			transaction object
2458 @param table_id			table id for which information needs to
2459 				be updated.
2460 @param reserve_dict_mutex	if TRUE, acquire/release
2461 				dict_sys->mutex around call to pars_sql.
2462 @param mark_index_corrupted	if true, then mark index corrupted.
2463 @return DB_SUCCESS or error code */
2464 
2465 dberr_t
update_root_page_no(trx_t * trx,table_id_t table_id,ibool reserve_dict_mutex,bool mark_index_corrupted) const2466 truncate_t::update_root_page_no(
2467 	trx_t*		trx,
2468 	table_id_t	table_id,
2469 	ibool		reserve_dict_mutex,
2470 	bool		mark_index_corrupted) const
2471 {
2472 	indexes_t::const_iterator end = m_indexes.end();
2473 
2474 	dberr_t	err = DB_SUCCESS;
2475 
2476 	for (indexes_t::const_iterator it = m_indexes.begin();
2477 	     it != end;
2478 	     ++it) {
2479 
2480 		pars_info_t*	info = pars_info_create();
2481 
2482 		pars_info_add_int4_literal(
2483 			info, "page_no", it->m_new_root_page_no);
2484 
2485 		pars_info_add_ull_literal(info, "table_id", table_id);
2486 
2487 		pars_info_add_ull_literal(
2488 			info, "index_id",
2489 			(mark_index_corrupted ? -1 : it->m_id));
2490 
2491 		err = que_eval_sql(
2492 			info,
2493 			"PROCEDURE RENUMBER_IDX_PAGE_NO_PROC () IS\n"
2494 			"BEGIN\n"
2495 			"UPDATE SYS_INDEXES"
2496 			" SET PAGE_NO = :page_no\n"
2497 			" WHERE TABLE_ID = :table_id"
2498 			" AND ID = :index_id;\n"
2499 			"END;\n", reserve_dict_mutex, trx);
2500 
2501 		if (err != DB_SUCCESS) {
2502 			break;
2503 		}
2504 	}
2505 
2506 	return(err);
2507 }
2508 
2509 /**
2510 Check whether a tablespace was truncated during recovery
2511 @param space_id	tablespace id to check
2512 @return true if the tablespace was truncated */
2513 
2514 bool
is_tablespace_truncated(ulint space_id)2515 truncate_t::is_tablespace_truncated(ulint space_id)
2516 {
2517 	tables_t::iterator end = s_tables.end();
2518 
2519 	for (tables_t::iterator it = s_tables.begin(); it != end; ++it) {
2520 
2521 		if ((*it)->m_space_id == space_id) {
2522 
2523 			return(true);
2524 		}
2525 	}
2526 
2527 	return(false);
2528 }
2529 
2530 /** Was tablespace truncated (on crash before checkpoint).
2531 If the MLOG_TRUNCATE redo-record is still available then tablespace
2532 was truncated and checkpoint is yet to happen.
2533 @param[in]	space_id	tablespace id to check.
2534 @return true if tablespace is was truncated. */
2535 bool
was_tablespace_truncated(ulint space_id)2536 truncate_t::was_tablespace_truncated(ulint space_id)
2537 {
2538 	return(s_truncated_tables.find(space_id) != s_truncated_tables.end());
2539 }
2540 
2541 /** Get the lsn associated with space.
2542 @param[in]	space_id	tablespace id to check.
2543 @return associated lsn. */
2544 lsn_t
get_truncated_tablespace_init_lsn(ulint space_id)2545 truncate_t::get_truncated_tablespace_init_lsn(ulint space_id)
2546 {
2547 	ut_ad(was_tablespace_truncated(space_id));
2548 
2549 	return(s_truncated_tables.find(space_id)->second);
2550 }
2551 
2552 /**
2553 Parses log record during recovery
2554 @param start_ptr	buffer containing log body to parse
2555 @param end_ptr		buffer end
2556 
2557 @return DB_SUCCESS or error code */
2558 
2559 dberr_t
parse(byte * start_ptr,const byte * end_ptr)2560 truncate_t::parse(
2561 	byte*		start_ptr,
2562 	const byte*	end_ptr)
2563 {
2564 	/* Parse lsn, space-id, format-flags and tablespace-flags. */
2565 	if (end_ptr < start_ptr + (8 + 4 + 4 + 4)) {
2566 		return(DB_FAIL);
2567 	}
2568 
2569 	m_log_lsn = mach_read_from_8(start_ptr);
2570 	start_ptr += 8;
2571 
2572 	m_space_id = mach_read_from_4(start_ptr);
2573 	start_ptr += 4;
2574 
2575 	m_format_flags = mach_read_from_4(start_ptr);
2576 	start_ptr += 4;
2577 
2578 	m_tablespace_flags = mach_read_from_4(start_ptr);
2579 	start_ptr += 4;
2580 
2581 	/* Parse table-name. */
2582 	if (end_ptr < start_ptr + (2)) {
2583 		return(DB_FAIL);
2584 	}
2585 
2586 	ulint n_tablename_len = mach_read_from_2(start_ptr);
2587 	start_ptr += 2;
2588 
2589 	if (n_tablename_len > 0) {
2590 		if (end_ptr < start_ptr + n_tablename_len) {
2591 			return(DB_FAIL);
2592 		}
2593 		m_tablename = mem_strdup(reinterpret_cast<char*>(start_ptr));
2594 		ut_ad(m_tablename[n_tablename_len - 1] == 0);
2595 		start_ptr += n_tablename_len;
2596 	}
2597 
2598 
2599 	/* Parse and read old/new table-id, number of indexes */
2600 	if (end_ptr < start_ptr + (8 + 8 + 2 + 2)) {
2601 		return(DB_FAIL);
2602 	}
2603 
2604 	ut_ad(m_indexes.empty());
2605 
2606 	m_old_table_id = mach_read_from_8(start_ptr);
2607 	start_ptr += 8;
2608 
2609 	m_new_table_id = mach_read_from_8(start_ptr);
2610 	start_ptr += 8;
2611 
2612 	ulint n_indexes = mach_read_from_2(start_ptr);
2613 	start_ptr += 2;
2614 
2615 	/* Parse the remote directory from TRUNCATE log record */
2616 	{
2617 		ulint	n_tabledirpath_len = mach_read_from_2(start_ptr);
2618 		start_ptr += 2;
2619 
2620 		if (end_ptr < start_ptr + n_tabledirpath_len) {
2621 			return(DB_FAIL);
2622 		}
2623 
2624 		if (n_tabledirpath_len > 0) {
2625 
2626 			m_dir_path = mem_strdup(reinterpret_cast<char*>(start_ptr));
2627 			ut_ad(m_dir_path[n_tabledirpath_len - 1] == 0);
2628 			start_ptr += n_tabledirpath_len;
2629 		}
2630 	}
2631 
2632 	/* Parse index ids and types from TRUNCATE log record */
2633 	for (ulint i = 0; i < n_indexes; ++i) {
2634 		index_t	index;
2635 
2636 		if (end_ptr < start_ptr + (8 + 4 + 4 + 4)) {
2637 			return(DB_FAIL);
2638 		}
2639 
2640 		index.m_id = mach_read_from_8(start_ptr);
2641 		start_ptr += 8;
2642 
2643 		index.m_type = mach_read_from_4(start_ptr);
2644 		start_ptr += 4;
2645 
2646 		index.m_root_page_no = mach_read_from_4(start_ptr);
2647 		start_ptr += 4;
2648 
2649 		index.m_trx_id_pos = mach_read_from_4(start_ptr);
2650 		start_ptr += 4;
2651 
2652 		if (!(index.m_type & DICT_FTS)) {
2653 			m_indexes.push_back(index);
2654 		}
2655 	}
2656 
2657 	ut_ad(!m_indexes.empty());
2658 
2659 	if (fsp_flags_is_compressed(m_tablespace_flags)) {
2660 
2661 		/* Parse the number of index fields from TRUNCATE log record */
2662 		for (ulint i = 0; i < m_indexes.size(); ++i) {
2663 
2664 			if (end_ptr < start_ptr + (2 + 2)) {
2665 				return(DB_FAIL);
2666 			}
2667 
2668 			m_indexes[i].m_n_fields = mach_read_from_2(start_ptr);
2669 			start_ptr += 2;
2670 
2671 			ulint	len = mach_read_from_2(start_ptr);
2672 			start_ptr += 2;
2673 
2674 			if (end_ptr < start_ptr + len) {
2675 				return(DB_FAIL);
2676 			}
2677 
2678 			index_t&	index = m_indexes[i];
2679 
2680 			/* Should be NUL terminated. */
2681 			ut_ad((start_ptr)[len - 1] == 0);
2682 
2683 			index_t::fields_t::iterator	end;
2684 
2685 			end = index.m_fields.end();
2686 
2687 			index.m_fields.insert(
2688 				end, start_ptr, &(start_ptr)[len]);
2689 
2690 			start_ptr += len;
2691 		}
2692 	}
2693 
2694 	return(DB_SUCCESS);
2695 }
2696 
2697 /** Parse log record from REDO log file during recovery.
2698 @param[in,out]	start_ptr	buffer containing log body to parse
2699 @param[in]	end_ptr		buffer end
2700 @param[in]	space_id	tablespace identifier
2701 @return parsed upto or NULL. */
2702 byte*
parse_redo_entry(byte * start_ptr,const byte * end_ptr,ulint space_id)2703 truncate_t::parse_redo_entry(
2704 	byte*		start_ptr,
2705 	const byte*	end_ptr,
2706 	ulint		space_id)
2707 {
2708 	lsn_t	lsn;
2709 
2710 	/* Parse space-id, lsn */
2711 	if (end_ptr < (start_ptr + 8)) {
2712 		return(NULL);
2713 	}
2714 
2715 	lsn = mach_read_from_8(start_ptr);
2716 	start_ptr += 8;
2717 
2718 	/* Tablespace can't exist in both state.
2719 	(scheduled-for-truncate, was-truncated). */
2720 	if (!is_tablespace_truncated(space_id)) {
2721 
2722 		truncated_tables_t::iterator	it =
2723 				s_truncated_tables.find(space_id);
2724 
2725 		if (it == s_truncated_tables.end()) {
2726 			s_truncated_tables.insert(
2727 				std::pair<ulint, lsn_t>(space_id, lsn));
2728 		} else {
2729 			it->second = lsn;
2730 		}
2731 	}
2732 
2733 	return(start_ptr);
2734 }
2735 
2736 /**
2737 Set the truncate log values for a compressed table.
2738 @param index	index from which recreate infoormation needs to be extracted
2739 @return DB_SUCCESS or error code */
2740 
2741 dberr_t
set(const dict_index_t * index)2742 truncate_t::index_t::set(
2743 	const dict_index_t* index)
2744 {
2745 	/* Get trx-id column position (set only for clustered index) */
2746 	if (dict_index_is_clust(index)) {
2747 		m_trx_id_pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
2748 		ut_ad(m_trx_id_pos > 0);
2749 		ut_ad(m_trx_id_pos != ULINT_UNDEFINED);
2750 	} else {
2751 		m_trx_id_pos = 0;
2752 	}
2753 
2754 	/* Original logic set this field differently if page is not leaf.
2755 	For truncate case this being first page to get created it is
2756 	always a leaf page and so we don't need that condition here. */
2757 	m_n_fields = dict_index_get_n_fields(index);
2758 
2759 	/* See requirements of page_zip_fields_encode for size. */
2760 	ulint	encoded_buf_size = (m_n_fields + 1) * 2;
2761 	byte*	encoded_buf = UT_NEW_ARRAY_NOKEY(byte, encoded_buf_size);
2762 
2763 	if (encoded_buf == NULL) {
2764 		return(DB_OUT_OF_MEMORY);
2765 	}
2766 
2767 	ulint len = page_zip_fields_encode(
2768 		m_n_fields, index, m_trx_id_pos, encoded_buf);
2769 	ut_a(len <= encoded_buf_size);
2770 
2771 	/* Append the encoded fields data. */
2772 	m_fields.insert(m_fields.end(), &encoded_buf[0], &encoded_buf[len]);
2773 
2774 	/* NUL terminate the encoded data */
2775 	m_fields.push_back(0);
2776 
2777 	UT_DELETE_ARRAY(encoded_buf);
2778 
2779 	return(DB_SUCCESS);
2780 }
2781 
2782 /** Create an index for a table.
2783 @param[in]	table_name		table name, for which to create
2784 the index
2785 @param[in]	space_id		space id where we have to
2786 create the index
2787 @param[in]	page_size		page size of the .ibd file
2788 @param[in]	index_type		type of index to truncate
2789 @param[in]	index_id		id of index to truncate
2790 @param[in]	btr_redo_create_info	control info for ::btr_create()
2791 @param[in,out]	mtr			mini-transaction covering the
2792 create index
2793 @return root page no or FIL_NULL on failure */
2794 ulint
create_index(const char * table_name,ulint space_id,const page_size_t & page_size,ulint index_type,index_id_t index_id,const btr_create_t & btr_redo_create_info,mtr_t * mtr) const2795 truncate_t::create_index(
2796 	const char*		table_name,
2797 	ulint			space_id,
2798 	const page_size_t&	page_size,
2799 	ulint			index_type,
2800 	index_id_t		index_id,
2801 	const btr_create_t&	btr_redo_create_info,
2802 	mtr_t*			mtr) const
2803 {
2804 	ulint	root_page_no = btr_create(
2805 		index_type, space_id, page_size, index_id,
2806 		NULL, &btr_redo_create_info, mtr);
2807 
2808 	if (root_page_no == FIL_NULL) {
2809 
2810 		ib::info() << "innodb_force_recovery was set to "
2811 			<< srv_force_recovery << ". Continuing crash recovery"
2812 			" even though we failed to create index " << index_id
2813 			<< " for compressed table '" << table_name << "' with"
2814 			" tablespace " << space_id << " during recovery";
2815 	}
2816 
2817 	return(root_page_no);
2818 }
2819 
2820 /** Check if index has been modified since TRUNCATE log snapshot
2821 was recorded.
2822 @param space_id		space_id where table/indexes resides.
2823 @param root_page_no	root page of index that needs to be verified.
2824 @return true if modified else false */
2825 
2826 bool
is_index_modified_since_logged(ulint space_id,ulint root_page_no) const2827 truncate_t::is_index_modified_since_logged(
2828 	ulint		space_id,
2829 	ulint		root_page_no) const
2830 {
2831 	mtr_t			mtr;
2832 	bool			found;
2833 	const page_size_t&	page_size = fil_space_get_page_size(space_id,
2834 								    &found);
2835 
2836 	ut_ad(found);
2837 
2838 	mtr_start(&mtr);
2839 
2840 	/* Root page could be in free state if truncate crashed after drop_index
2841 	and page was not allocated for any other object. */
2842 	buf_block_t* block= buf_page_get_gen(
2843 		page_id_t(space_id, root_page_no), page_size, RW_X_LATCH, NULL,
2844 		BUF_GET_POSSIBLY_FREED, __FILE__, __LINE__, &mtr);
2845 
2846 	page_t* root = buf_block_get_frame(block);
2847 
2848 #ifdef UNIV_DEBUG
2849 	/* If the root page has been freed as part of truncate drop_index action
2850 	and not yet allocated for any object still the pagelsn > snapshot lsn */
2851 	if (block->page.file_page_was_freed) {
2852 		ut_ad(mach_read_from_8(root + FIL_PAGE_LSN) > m_log_lsn);
2853 	}
2854 #endif /* UNIV_DEBUG */
2855 
2856 	lsn_t page_lsn = mach_read_from_8(root + FIL_PAGE_LSN);
2857 
2858 	mtr_commit(&mtr);
2859 
2860 	if (page_lsn > m_log_lsn) {
2861 		return(true);
2862 	}
2863 
2864 	return(false);
2865 }
2866 
2867 /** Drop indexes for a table.
2868 @param space_id		space_id where table/indexes resides. */
2869 
2870 void
drop_indexes(ulint space_id) const2871 truncate_t::drop_indexes(
2872 	ulint		space_id) const
2873 {
2874 	mtr_t           mtr;
2875 	ulint		root_page_no = FIL_NULL;
2876 
2877 	indexes_t::const_iterator       end = m_indexes.end();
2878 
2879 	for (indexes_t::const_iterator it = m_indexes.begin();
2880 	     it != end;
2881 	     ++it) {
2882 
2883 		root_page_no = it->m_root_page_no;
2884 
2885 		bool			found;
2886 		const page_size_t&	page_size
2887 			= fil_space_get_page_size(space_id, &found);
2888 
2889 		ut_ad(found);
2890 
2891 		if (is_index_modified_since_logged(
2892 			space_id, root_page_no)) {
2893 			/* Page has been modified since TRUNCATE log snapshot
2894 			was recorded so not safe to drop the index. */
2895 			continue;
2896 		}
2897 
2898 		mtr_start(&mtr);
2899 
2900 		if (space_id != TRX_SYS_SPACE) {
2901 			/* Do not log changes for single-table
2902 			tablespaces, we are in recovery mode. */
2903 			mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
2904 		}
2905 
2906 		if (root_page_no != FIL_NULL) {
2907 			const page_id_t	root_page_id(space_id, root_page_no);
2908 
2909 			btr_free_if_exists(
2910 				root_page_id, page_size, it->m_id, &mtr);
2911 		}
2912 
2913 		/* If tree is already freed then we might return immediately
2914 		in which case we need to release the lock we have acquired
2915 		on root_page. */
2916 		mtr_commit(&mtr);
2917 	}
2918 }
2919 
2920 
2921 /** Create the indexes for a table
2922 @param[in]	table_name	table name, for which to create the indexes
2923 @param[in]	space_id	space id where we have to create the indexes
2924 @param[in]	page_size	page size of the .ibd file
2925 @param[in]	flags		tablespace flags
2926 @param[in]	format_flags	page format flags
2927 @return DB_SUCCESS or error code. */
2928 dberr_t
create_indexes(const char * table_name,ulint space_id,const page_size_t & page_size,ulint flags,ulint format_flags)2929 truncate_t::create_indexes(
2930 	const char*		table_name,
2931 	ulint			space_id,
2932 	const page_size_t&	page_size,
2933 	ulint			flags,
2934 	ulint			format_flags)
2935 {
2936 	mtr_t           mtr;
2937 
2938 	mtr_start(&mtr);
2939 
2940 	if (space_id != TRX_SYS_SPACE) {
2941 		/* Do not log changes for single-table tablespaces, we
2942 		are in recovery mode. */
2943 		mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
2944 	}
2945 
2946 	/* Create all new index trees with table format, index ids, index
2947 	types, number of index fields and index field information taken
2948 	out from the TRUNCATE log record. */
2949 
2950 	ulint   root_page_no = FIL_NULL;
2951 	indexes_t::iterator       end = m_indexes.end();
2952 	for (indexes_t::iterator it = m_indexes.begin();
2953 	     it != end;
2954 	     ++it) {
2955 
2956 		btr_create_t    btr_redo_create_info(
2957 			fsp_flags_is_compressed(flags)
2958 			? &it->m_fields[0] : NULL);
2959 
2960 		btr_redo_create_info.format_flags = format_flags;
2961 
2962 		if (fsp_flags_is_compressed(flags)) {
2963 
2964 			btr_redo_create_info.n_fields = it->m_n_fields;
2965 			/* Skip the NUL appended field */
2966 			btr_redo_create_info.field_len =
2967 				it->m_fields.size() - 1;
2968 			btr_redo_create_info.trx_id_pos = it->m_trx_id_pos;
2969 		}
2970 
2971 		root_page_no = create_index(
2972 			table_name, space_id, page_size, it->m_type, it->m_id,
2973 			btr_redo_create_info, &mtr);
2974 
2975 		if (root_page_no == FIL_NULL) {
2976 			break;
2977 		}
2978 
2979 		it->m_new_root_page_no = root_page_no;
2980 	}
2981 
2982 	mtr_commit(&mtr);
2983 
2984 	return(root_page_no == FIL_NULL ? DB_ERROR : DB_SUCCESS);
2985 }
2986 
2987 /**
2988 Write a TRUNCATE log record for fixing up table if truncate crashes.
2989 @param start_ptr	buffer to write log record
2990 @param end_ptr		buffer end
2991 @param space_id		space id
2992 @param tablename	the table name in the usual databasename/tablename
2993 			format of InnoDB
2994 @param flags		tablespace flags
2995 @param format_flags	page format
2996 @param lsn		lsn while logging
2997 @return DB_SUCCESS or error code */
2998 
2999 dberr_t
write(byte * start_ptr,byte * end_ptr,ulint space_id,const char * tablename,ulint flags,ulint format_flags,lsn_t lsn) const3000 truncate_t::write(
3001 	byte*		start_ptr,
3002 	byte*		end_ptr,
3003 	ulint		space_id,
3004 	const char*	tablename,
3005 	ulint		flags,
3006 	ulint		format_flags,
3007 	lsn_t		lsn) const
3008 {
3009 	if (end_ptr < start_ptr) {
3010 		return(DB_FAIL);
3011 	}
3012 
3013 	/* LSN, Type, Space-ID, format-flag (also know as log_flag.
3014 	Stored in page_no field), tablespace flags */
3015 	if (end_ptr < (start_ptr + (8 + 4 + 4 + 4)))  {
3016 		return(DB_FAIL);
3017 	}
3018 
3019 	mach_write_to_8(start_ptr, lsn);
3020 	start_ptr += 8;
3021 
3022 	mach_write_to_4(start_ptr, space_id);
3023 	start_ptr += 4;
3024 
3025 	mach_write_to_4(start_ptr, format_flags);
3026 	start_ptr += 4;
3027 
3028 	mach_write_to_4(start_ptr, flags);
3029 	start_ptr += 4;
3030 
3031 	/* Name of the table. */
3032 	/* Include the NUL in the log record. */
3033 	ulint len = strlen(tablename) + 1;
3034 	if (end_ptr < (start_ptr + (len + 2))) {
3035 		return(DB_FAIL);
3036 	}
3037 
3038 	mach_write_to_2(start_ptr, len);
3039 	start_ptr += 2;
3040 
3041 	memcpy(start_ptr, tablename, len - 1);
3042 	start_ptr += len;
3043 
3044 	DBUG_EXECUTE_IF("ib_trunc_crash_while_writing_redo_log",
3045 			DBUG_SUICIDE(););
3046 
3047 	/* Old/New Table-ID, Number of Indexes and Tablespace dir-path-name. */
3048 	/* Write the remote directory of the table into mtr log */
3049 	len = m_dir_path != NULL ? strlen(m_dir_path) + 1 : 0;
3050 	if (end_ptr < (start_ptr + (len + 8 + 8 + 2 + 2))) {
3051 		return(DB_FAIL);
3052 	}
3053 
3054 	/* Write out old-table-id. */
3055 	mach_write_to_8(start_ptr, m_old_table_id);
3056 	start_ptr += 8;
3057 
3058 	/* Write out new-table-id. */
3059 	mach_write_to_8(start_ptr, m_new_table_id);
3060 	start_ptr += 8;
3061 
3062 	/* Write out the number of indexes. */
3063 	mach_write_to_2(start_ptr, m_indexes.size());
3064 	start_ptr += 2;
3065 
3066 	/* Write the length (NUL included) of the .ibd path. */
3067 	mach_write_to_2(start_ptr, len);
3068 	start_ptr += 2;
3069 
3070 	if (m_dir_path != NULL) {
3071 		memcpy(start_ptr, m_dir_path, len - 1);
3072 		start_ptr += len;
3073 	}
3074 
3075 	/* Indexes information (id, type) */
3076 	/* Write index ids, type, root-page-no into mtr log */
3077 	for (ulint i = 0; i < m_indexes.size(); ++i) {
3078 
3079 		if (end_ptr < (start_ptr + (8 + 4 + 4 + 4))) {
3080 			return(DB_FAIL);
3081 		}
3082 
3083 		mach_write_to_8(start_ptr, m_indexes[i].m_id);
3084 		start_ptr += 8;
3085 
3086 		mach_write_to_4(start_ptr, m_indexes[i].m_type);
3087 		start_ptr += 4;
3088 
3089 		mach_write_to_4(start_ptr, m_indexes[i].m_root_page_no);
3090 		start_ptr += 4;
3091 
3092 		mach_write_to_4(start_ptr, m_indexes[i].m_trx_id_pos);
3093 		start_ptr += 4;
3094 	}
3095 
3096 	/* If tablespace compressed then field info of each index. */
3097 	if (fsp_flags_is_compressed(flags)) {
3098 
3099 		for (ulint i = 0; i < m_indexes.size(); ++i) {
3100 
3101 			ulint len = m_indexes[i].m_fields.size();
3102 			if (end_ptr < (start_ptr + (len + 2 + 2))) {
3103 				return(DB_FAIL);
3104 			}
3105 
3106 			mach_write_to_2(
3107 				start_ptr, m_indexes[i].m_n_fields);
3108 			start_ptr += 2;
3109 
3110 			mach_write_to_2(start_ptr, len);
3111 			start_ptr += 2;
3112 
3113 			const byte*	ptr = &m_indexes[i].m_fields[0];
3114 			memcpy(start_ptr, ptr, len - 1);
3115 			start_ptr += len;
3116 		}
3117 	}
3118 
3119 	return(DB_SUCCESS);
3120 }
3121 
3122