1 /*****************************************************************************
2 
3 Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2017, 2022, MariaDB Corporation.
5 
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free Software
8 Foundation; version 2 of the License.
9 
10 This program is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 
14 You should have received a copy of the GNU General Public License along with
15 this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
17 
18 *****************************************************************************/
19 
20 /**************************************************//**
21 @file mtr/mtr0mtr.cc
22 Mini-transaction buffer
23 
24 Created 11/26/1995 Heikki Tuuri
25 *******************************************************/
26 
27 #include "mtr0mtr.h"
28 
29 #include "buf0buf.h"
30 #include "buf0flu.h"
31 #include "fsp0sysspace.h"
32 #include "page0types.h"
33 #include "mtr0log.h"
34 #include "log0recv.h"
35 
36 /** Iterate over a memo block in reverse. */
37 template <typename Functor>
38 struct CIterate {
CIterateCIterate39 	CIterate() : functor() {}
40 
CIterateCIterate41 	CIterate(const Functor& functor) : functor(functor) {}
42 
43 	/** @return false if the functor returns false. */
operator ()CIterate44 	bool operator()(mtr_buf_t::block_t* block) const
45 	{
46 		const mtr_memo_slot_t*	start =
47 			reinterpret_cast<const mtr_memo_slot_t*>(
48 				block->begin());
49 
50 		mtr_memo_slot_t*	slot =
51 			reinterpret_cast<mtr_memo_slot_t*>(
52 				block->end());
53 
54 		ut_ad(!(block->used() % sizeof(*slot)));
55 
56 		while (slot-- != start) {
57 
58 			if (!functor(slot)) {
59 				return(false);
60 			}
61 		}
62 
63 		return(true);
64 	}
65 
66 	Functor functor;
67 };
68 
69 template <typename Functor>
70 struct Iterate {
IterateIterate71 	Iterate() : functor() {}
72 
IterateIterate73 	Iterate(const Functor& functor) : functor(functor) {}
74 
75 	/** @return false if the functor returns false. */
operator ()Iterate76 	bool operator()(mtr_buf_t::block_t* block)
77 	{
78 		const mtr_memo_slot_t*	start =
79 			reinterpret_cast<const mtr_memo_slot_t*>(
80 				block->begin());
81 
82 		mtr_memo_slot_t*	slot =
83 			reinterpret_cast<mtr_memo_slot_t*>(
84 				block->end());
85 
86 		ut_ad(!(block->used() % sizeof(*slot)));
87 
88 		while (slot-- != start) {
89 
90 			if (!functor(slot)) {
91 				return(false);
92 			}
93 		}
94 
95 		return(true);
96 	}
97 
98 	Functor functor;
99 };
100 
101 /** Find specific object */
102 struct Find {
103 
104 	/** Constructor */
FindFind105 	Find(const void* object, ulint type)
106 		:
107 		m_slot(),
108 		m_type(type),
109 		m_object(object)
110 	{
111 		ut_a(object != NULL);
112 	}
113 
114 	/** @return false if the object was found. */
operator ()Find115 	bool operator()(mtr_memo_slot_t* slot)
116 	{
117 		if (m_object == slot->object && m_type == slot->type) {
118 			m_slot = slot;
119 			return(false);
120 		}
121 
122 		return(true);
123 	}
124 
125 	/** Slot if found */
126 	mtr_memo_slot_t*m_slot;
127 
128 	/** Type of the object to look for */
129 	const ulint	m_type;
130 
131 	/** The object instance to look for */
132 	const void*	m_object;
133 };
134 
135 /** Find a page frame */
136 struct FindPage
137 {
138 	/** Constructor
139 	@param[in]	ptr	pointer to within a page frame
140 	@param[in]	flags	MTR_MEMO flags to look for */
FindPageFindPage141 	FindPage(const void* ptr, ulint flags)
142 		: m_ptr(ptr), m_flags(flags), m_slot(NULL)
143 	{
144 		/* There must be some flags to look for. */
145 		ut_ad(flags);
146 		/* We can only look for page-related flags. */
147 		ut_ad(!(flags & ulint(~(MTR_MEMO_PAGE_S_FIX
148 					| MTR_MEMO_PAGE_X_FIX
149 					| MTR_MEMO_PAGE_SX_FIX
150 					| MTR_MEMO_BUF_FIX
151 					| MTR_MEMO_MODIFY))));
152 	}
153 
154 	/** Visit a memo entry.
155 	@param[in]	slot	memo entry to visit
156 	@retval	false	if a page was found
157 	@retval	true	if the iteration should continue */
operator ()FindPage158 	bool operator()(mtr_memo_slot_t* slot)
159 	{
160 		ut_ad(m_slot == NULL);
161 
162 		if (!(m_flags & slot->type) || slot->object == NULL) {
163 			return(true);
164 		}
165 
166 		buf_block_t* block = reinterpret_cast<buf_block_t*>(
167 			slot->object);
168 
169 		if (m_ptr < block->frame
170 		    || m_ptr >= block->frame + srv_page_size) {
171 			return(true);
172 		}
173 
174 		ut_ad(!(m_flags & (MTR_MEMO_PAGE_S_FIX
175 				   | MTR_MEMO_PAGE_SX_FIX
176 				   | MTR_MEMO_PAGE_X_FIX))
177 		      || rw_lock_own_flagged(&block->lock, m_flags));
178 
179 		m_slot = slot;
180 		return(false);
181 	}
182 
183 	/** @return the slot that was found */
get_slotFindPage184 	mtr_memo_slot_t* get_slot() const
185 	{
186 		ut_ad(m_slot != NULL);
187 		return(m_slot);
188 	}
189 	/** @return the block that was found */
get_blockFindPage190 	buf_block_t* get_block() const
191 	{
192 		return(reinterpret_cast<buf_block_t*>(get_slot()->object));
193 	}
194 private:
195 	/** Pointer inside a page frame to look for */
196 	const void*const	m_ptr;
197 	/** MTR_MEMO flags to look for */
198 	const ulint		m_flags;
199 	/** The slot corresponding to m_ptr */
200 	mtr_memo_slot_t*	m_slot;
201 };
202 
203 /** Release latches and decrement the buffer fix count.
204 @param slot	memo slot */
memo_slot_release(mtr_memo_slot_t * slot)205 static void memo_slot_release(mtr_memo_slot_t *slot)
206 {
207   switch (slot->type) {
208   case MTR_MEMO_S_LOCK:
209     rw_lock_s_unlock(reinterpret_cast<rw_lock_t*>(slot->object));
210     break;
211   case MTR_MEMO_SX_LOCK:
212     rw_lock_sx_unlock(reinterpret_cast<rw_lock_t*>(slot->object));
213     break;
214   case MTR_MEMO_SPACE_X_LOCK:
215     {
216       fil_space_t *space= static_cast<fil_space_t*>(slot->object);
217       space->set_committed_size();
218       rw_lock_x_unlock(&space->latch);
219     }
220     break;
221   case MTR_MEMO_X_LOCK:
222     rw_lock_x_unlock(reinterpret_cast<rw_lock_t*>(slot->object));
223     break;
224   default:
225 #ifdef UNIV_DEBUG
226     switch (slot->type & ~MTR_MEMO_MODIFY) {
227     case MTR_MEMO_BUF_FIX:
228     case MTR_MEMO_PAGE_S_FIX:
229     case MTR_MEMO_PAGE_SX_FIX:
230     case MTR_MEMO_PAGE_X_FIX:
231       break;
232     default:
233       ut_ad("invalid type" == 0);
234       break;
235     }
236 #endif /* UNIV_DEBUG */
237     buf_block_t *block= reinterpret_cast<buf_block_t*>(slot->object);
238     buf_page_release_latch(block, slot->type & ~MTR_MEMO_MODIFY);
239     block->unfix();
240     break;
241   }
242   slot->object= nullptr;
243 }
244 
245 /** Release the latches acquired by the mini-transaction. */
246 struct ReleaseLatches {
247   /** @return true always. */
operator ()ReleaseLatches248   bool operator()(mtr_memo_slot_t *slot) const
249   {
250     if (!slot->object)
251       return true;
252     switch (slot->type) {
253     case MTR_MEMO_S_LOCK:
254       rw_lock_s_unlock(reinterpret_cast<rw_lock_t*>(slot->object));
255       break;
256     case MTR_MEMO_SPACE_X_LOCK:
257       {
258         fil_space_t *space= static_cast<fil_space_t*>(slot->object);
259         space->set_committed_size();
260         rw_lock_x_unlock(&space->latch);
261       }
262       break;
263     case MTR_MEMO_X_LOCK:
264       rw_lock_x_unlock(reinterpret_cast<rw_lock_t*>(slot->object));
265       break;
266     case MTR_MEMO_SX_LOCK:
267       rw_lock_sx_unlock(reinterpret_cast<rw_lock_t*>(slot->object));
268       break;
269     default:
270 #ifdef UNIV_DEBUG
271       switch (slot->type & ~MTR_MEMO_MODIFY) {
272       case MTR_MEMO_BUF_FIX:
273       case MTR_MEMO_PAGE_S_FIX:
274       case MTR_MEMO_PAGE_SX_FIX:
275       case MTR_MEMO_PAGE_X_FIX:
276         break;
277       default:
278         ut_ad("invalid type" == 0);
279         break;
280       }
281 #endif /* UNIV_DEBUG */
282       buf_block_t *block= reinterpret_cast<buf_block_t*>(slot->object);
283       buf_page_release_latch(block, slot->type & ~MTR_MEMO_MODIFY);
284       block->unfix();
285       break;
286     }
287     slot->object= NULL;
288     return true;
289   }
290 };
291 
292 /** Release the latches and blocks acquired by the mini-transaction. */
293 struct ReleaseAll {
294   /** @return true always. */
operator ()ReleaseAll295   bool operator()(mtr_memo_slot_t *slot) const
296   {
297     if (slot->object)
298       memo_slot_release(slot);
299     return true;
300   }
301 };
302 
303 #ifdef UNIV_DEBUG
304 /** Check that all slots have been handled. */
305 struct DebugCheck {
306 	/** @return true always. */
operator ()DebugCheck307 	bool operator()(const mtr_memo_slot_t* slot) const
308 	{
309 		ut_ad(!slot->object);
310 		return(true);
311 	}
312 };
313 #endif
314 
315 /** Release page latches held by the mini-transaction. */
316 struct ReleaseBlocks
317 {
318   const lsn_t start, end;
319 #ifdef UNIV_DEBUG
320   const mtr_buf_t &memo;
321 
ReleaseBlocksReleaseBlocks322   ReleaseBlocks(lsn_t start, lsn_t end, const mtr_buf_t &memo) :
323     start(start), end(end), memo(memo)
324 #else /* UNIV_DEBUG */
325   ReleaseBlocks(lsn_t start, lsn_t end, const mtr_buf_t&) :
326     start(start), end(end)
327 #endif /* UNIV_DEBUG */
328   {
329     ut_ad(start);
330     ut_ad(end);
331   }
332 
333   /** @return true always */
operator ()ReleaseBlocks334   bool operator()(mtr_memo_slot_t* slot) const
335   {
336     if (!slot->object)
337       return true;
338     switch (slot->type) {
339     case MTR_MEMO_PAGE_X_MODIFY:
340     case MTR_MEMO_PAGE_SX_MODIFY:
341       break;
342     default:
343       ut_ad(!(slot->type & MTR_MEMO_MODIFY));
344       return true;
345     }
346 
347     buf_flush_note_modification(static_cast<buf_block_t*>(slot->object),
348                                 start, end);
349     return true;
350   }
351 };
352 
353 /** Start a mini-transaction. */
start()354 void mtr_t::start()
355 {
356   ut_ad(!m_freed_pages);
357   ut_ad(!m_freed_space);
358   MEM_UNDEFINED(this, sizeof *this);
359   MEM_MAKE_DEFINED(&m_freed_space, sizeof m_freed_space);
360   MEM_MAKE_DEFINED(&m_freed_pages, sizeof m_freed_pages);
361 
362   ut_d(m_start= true);
363   ut_d(m_commit= false);
364 
365   m_last= nullptr;
366   m_last_offset= 0;
367 
368   new(&m_memo) mtr_buf_t();
369   new(&m_log) mtr_buf_t();
370 
371   m_made_dirty= false;
372   m_inside_ibuf= false;
373   m_modifications= false;
374   m_log_mode= MTR_LOG_ALL;
375   ut_d(m_user_space_id= TRX_SYS_SPACE);
376   m_user_space= nullptr;
377   m_commit_lsn= 0;
378   m_trim_pages= false;
379 }
380 
381 /** Release the resources */
release_resources()382 inline void mtr_t::release_resources()
383 {
384   ut_ad(is_active());
385   ut_d(m_memo.for_each_block_in_reverse(CIterate<DebugCheck>()));
386   m_log.erase();
387   m_memo.erase();
388   ut_d(m_commit= true);
389 }
390 
391 /** Commit a mini-transaction. */
commit()392 void mtr_t::commit()
393 {
394   ut_ad(is_active());
395   ut_ad(!is_inside_ibuf());
396 
397   /* This is a dirty read, for debugging. */
398   ut_ad(!m_modifications || !recv_no_log_write);
399   ut_ad(!m_modifications || m_log_mode != MTR_LOG_NONE);
400 
401   if (m_modifications && (m_log_mode == MTR_LOG_NO_REDO || !m_log.empty()))
402   {
403     ut_ad(!srv_read_only_mode || m_log_mode == MTR_LOG_NO_REDO);
404 
405     std::pair<lsn_t,page_flush_ahead> lsns;
406 
407     if (UNIV_LIKELY(m_log_mode == MTR_LOG_ALL))
408     {
409       lsns= do_write();
410 
411       if (m_made_dirty)
412         mysql_mutex_lock(&log_sys.flush_order_mutex);
413 
414       /* It is now safe to release log_sys.mutex because the
415       buf_pool.flush_order_mutex will ensure that we are the first one
416       to insert into buf_pool.flush_list. */
417       mysql_mutex_unlock(&log_sys.mutex);
418     }
419     else
420     {
421       ut_ad(m_log_mode == MTR_LOG_NO_REDO);
422       ut_ad(m_log.size() == 0);
423       m_commit_lsn= log_sys.get_lsn();
424       lsns= { m_commit_lsn, PAGE_FLUSH_NO };
425       if (UNIV_UNLIKELY(m_made_dirty)) /* This should be IMPORT TABLESPACE */
426         mysql_mutex_lock(&log_sys.flush_order_mutex);
427     }
428 
429     if (m_freed_pages)
430     {
431       ut_ad(!m_freed_pages->empty());
432       ut_ad(m_freed_space);
433       ut_ad(memo_contains(*m_freed_space));
434       ut_ad(is_named_space(m_freed_space));
435       /* Update the last freed lsn */
436       m_freed_space->update_last_freed_lsn(m_commit_lsn);
437 
438       if (!is_trim_pages())
439         for (const auto &range : *m_freed_pages)
440           m_freed_space->add_free_range(range);
441       else
442         m_freed_space->clear_freed_ranges();
443       delete m_freed_pages;
444       m_freed_pages= nullptr;
445       m_freed_space= nullptr;
446       /* mtr_t::start() will reset m_trim_pages */
447     }
448     else
449       ut_ad(!m_freed_space);
450 
451     m_memo.for_each_block_in_reverse(CIterate<const ReleaseBlocks>
452                                      (ReleaseBlocks(lsns.first, m_commit_lsn,
453                                                     m_memo)));
454     if (m_made_dirty)
455       mysql_mutex_unlock(&log_sys.flush_order_mutex);
456 
457     m_memo.for_each_block_in_reverse(CIterate<ReleaseLatches>());
458 
459     if (UNIV_UNLIKELY(lsns.second != PAGE_FLUSH_NO))
460       buf_flush_ahead(m_commit_lsn, lsns.second == PAGE_FLUSH_SYNC);
461 
462     if (m_made_dirty)
463       srv_stats.log_write_requests.inc();
464   }
465   else
466     m_memo.for_each_block_in_reverse(CIterate<ReleaseAll>());
467 
468   release_resources();
469 }
470 
471 /** Shrink a tablespace. */
472 struct Shrink
473 {
474   /** the first non-existing page in the tablespace */
475   const page_id_t high;
476 
ShrinkShrink477   Shrink(const fil_space_t &space) : high({space.id, space.size}) {}
478 
operator ()Shrink479   bool operator()(mtr_memo_slot_t *slot) const
480   {
481     if (!slot->object)
482       return true;
483     switch (slot->type) {
484     default:
485       ut_ad("invalid type" == 0);
486       return false;
487     case MTR_MEMO_SPACE_X_LOCK:
488       ut_ad(high.space() == static_cast<fil_space_t*>(slot->object)->id);
489       return true;
490     case MTR_MEMO_PAGE_X_MODIFY:
491     case MTR_MEMO_PAGE_SX_MODIFY:
492     case MTR_MEMO_PAGE_X_FIX:
493     case MTR_MEMO_PAGE_SX_FIX:
494       auto &bpage= static_cast<buf_block_t*>(slot->object)->page;
495       ut_ad(bpage.io_fix() == BUF_IO_NONE);
496       const auto id= bpage.id();
497       if (id < high)
498       {
499         ut_ad(id.space() == high.space() ||
500               (id == page_id_t{0, TRX_SYS_PAGE_NO} &&
501                srv_is_undo_tablespace(high.space())));
502         break;
503       }
504       ut_ad(id.space() == high.space());
505       ut_ad(bpage.state() == BUF_BLOCK_FILE_PAGE);
506       if (bpage.oldest_modification() > 1)
507         bpage.clear_oldest_modification(false);
508       slot->type= static_cast<mtr_memo_type_t>(slot->type & ~MTR_MEMO_MODIFY);
509     }
510     return true;
511   }
512 };
513 
514 /** Commit a mini-transaction that is shrinking a tablespace.
515 @param space   tablespace that is being shrunk */
commit_shrink(fil_space_t & space)516 void mtr_t::commit_shrink(fil_space_t &space)
517 {
518   ut_ad(is_active());
519   ut_ad(!is_inside_ibuf());
520   ut_ad(!high_level_read_only);
521   ut_ad(m_modifications);
522   ut_ad(m_made_dirty);
523   ut_ad(!recv_recovery_is_on());
524   ut_ad(m_log_mode == MTR_LOG_ALL);
525   ut_ad(UT_LIST_GET_LEN(space.chain) == 1);
526 
527   log_write_and_flush_prepare();
528 
529   const lsn_t start_lsn= do_write().first;
530 
531   mysql_mutex_lock(&log_sys.flush_order_mutex);
532   /* Durably write the reduced FSP_SIZE before truncating the data file. */
533   log_write_and_flush();
534 
535   if (m_freed_pages)
536   {
537     ut_ad(!m_freed_pages->empty());
538     ut_ad(m_freed_space == &space);
539     ut_ad(memo_contains(*m_freed_space));
540     ut_ad(is_named_space(m_freed_space));
541     m_freed_space->update_last_freed_lsn(m_commit_lsn);
542 
543     if (!is_trim_pages())
544       for (const auto &range : *m_freed_pages)
545         m_freed_space->add_free_range(range);
546     else
547       m_freed_space->clear_freed_ranges();
548     delete m_freed_pages;
549     m_freed_pages= nullptr;
550     m_freed_space= nullptr;
551     /* mtr_t::start() will reset m_trim_pages */
552   }
553   else
554     ut_ad(!m_freed_space);
555 
556   m_memo.for_each_block_in_reverse(CIterate<Shrink>{space});
557 
558   m_memo.for_each_block_in_reverse(CIterate<const ReleaseBlocks>
559                                    (ReleaseBlocks(start_lsn, m_commit_lsn,
560                                                   m_memo)));
561   mysql_mutex_unlock(&log_sys.flush_order_mutex);
562 
563   mutex_enter(&fil_system.mutex);
564   ut_ad(space.is_being_truncated);
565   ut_ad(space.is_stopping());
566   space.set_stopping(false);
567   space.is_being_truncated= false;
568   mutex_exit(&fil_system.mutex);
569 
570   /* Truncate the file before releasing the space.latch. File extension
571   (and any allocation of pages beyond the current intended end of the file)
572   is covered by exclusive space.latch, which we are still holding here. */
573   os_file_truncate(space.chain.start->name, space.chain.start->handle,
574                    os_offset_t{space.size} << srv_page_size_shift, true);
575 
576   m_memo.for_each_block_in_reverse(CIterate<ReleaseLatches>());
577   srv_stats.log_write_requests.inc();
578 
579   release_resources();
580 }
581 
582 /** Commit a mini-transaction that did not modify any pages,
583 but generated some redo log on a higher level, such as
584 FILE_MODIFY records and an optional FILE_CHECKPOINT marker.
585 The caller must hold log_sys.mutex.
586 This is to be used at log_checkpoint().
587 @param[in]	checkpoint_lsn		log checkpoint LSN, or 0 */
commit_files(lsn_t checkpoint_lsn)588 void mtr_t::commit_files(lsn_t checkpoint_lsn)
589 {
590 	mysql_mutex_assert_owner(&log_sys.mutex);
591 	ut_ad(is_active());
592 	ut_ad(!is_inside_ibuf());
593 	ut_ad(m_log_mode == MTR_LOG_ALL);
594 	ut_ad(!m_made_dirty);
595 	ut_ad(m_memo.size() == 0);
596 	ut_ad(!srv_read_only_mode);
597 	ut_ad(!m_freed_space);
598 	ut_ad(!m_freed_pages);
599 
600 	if (checkpoint_lsn) {
601 		byte*	ptr = m_log.push<byte*>(SIZE_OF_FILE_CHECKPOINT);
602 		compile_time_assert(SIZE_OF_FILE_CHECKPOINT == 3 + 8 + 1);
603 		*ptr = FILE_CHECKPOINT | (SIZE_OF_FILE_CHECKPOINT - 2);
604 		::memset(ptr + 1, 0, 2);
605 		mach_write_to_8(ptr + 3, checkpoint_lsn);
606 		ptr[3 + 8] = 0;
607 	} else {
608 		*m_log.push<byte*>(1) = 0;
609 	}
610 
611 	finish_write(m_log.size());
612 	srv_stats.log_write_requests.inc();
613 	release_resources();
614 
615 	if (checkpoint_lsn) {
616 		DBUG_PRINT("ib_log",
617 			   ("FILE_CHECKPOINT(" LSN_PF ") written at " LSN_PF,
618 			    checkpoint_lsn, log_sys.get_lsn()));
619 	}
620 }
621 
622 #ifdef UNIV_DEBUG
623 /** Check if a tablespace is associated with the mini-transaction
624 (needed for generating a FILE_MODIFY record)
625 @param[in]	space	tablespace
626 @return whether the mini-transaction is associated with the space */
627 bool
is_named_space(ulint space) const628 mtr_t::is_named_space(ulint space) const
629 {
630 	ut_ad(!m_user_space || m_user_space->id != TRX_SYS_SPACE);
631 
632 	switch (m_log_mode) {
633 	case MTR_LOG_NONE:
634 	case MTR_LOG_NO_REDO:
635 		return(true);
636 	case MTR_LOG_ALL:
637 		return(m_user_space_id == space
638 		       || is_predefined_tablespace(space));
639 	}
640 
641 	ut_error;
642 	return(false);
643 }
644 /** Check if a tablespace is associated with the mini-transaction
645 (needed for generating a FILE_MODIFY record)
646 @param[in]	space	tablespace
647 @return whether the mini-transaction is associated with the space */
is_named_space(const fil_space_t * space) const648 bool mtr_t::is_named_space(const fil_space_t* space) const
649 {
650   ut_ad(!m_user_space || m_user_space->id != TRX_SYS_SPACE);
651 
652   switch (m_log_mode) {
653   case MTR_LOG_NONE:
654   case MTR_LOG_NO_REDO:
655     return true;
656   case MTR_LOG_ALL:
657     return m_user_space == space || is_predefined_tablespace(space->id);
658   }
659 
660   ut_error;
661   return false;
662 }
663 #endif /* UNIV_DEBUG */
664 
665 /** Acquire a tablespace X-latch.
666 NOTE: use mtr_x_lock_space().
667 @param[in]	space_id	tablespace ID
668 @param[in]	file		file name from where called
669 @param[in]	line		line number in file
670 @return the tablespace object (never NULL) */
671 fil_space_t*
x_lock_space(ulint space_id,const char * file,unsigned line)672 mtr_t::x_lock_space(ulint space_id, const char* file, unsigned line)
673 {
674 	fil_space_t*	space;
675 
676 	ut_ad(is_active());
677 
678 	if (space_id == TRX_SYS_SPACE) {
679 		space = fil_system.sys_space;
680 	} else if ((space = m_user_space) && space_id == space->id) {
681 	} else {
682 		space = fil_space_get(space_id);
683 		ut_ad(m_log_mode != MTR_LOG_NO_REDO
684 		      || space->purpose == FIL_TYPE_TEMPORARY
685 		      || space->purpose == FIL_TYPE_IMPORT);
686 	}
687 
688 	ut_ad(space);
689 	ut_ad(space->id == space_id);
690 	x_lock_space(space, file, line);
691 	return(space);
692 }
693 
694 /** Release an object in the memo stack.
695 @return true if released */
696 bool
memo_release(const void * object,ulint type)697 mtr_t::memo_release(const void* object, ulint type)
698 {
699 	ut_ad(is_active());
700 
701 	/* We cannot release a page that has been written to in the
702 	middle of a mini-transaction. */
703 	ut_ad(!m_modifications || type != MTR_MEMO_PAGE_X_FIX);
704 
705 	Iterate<Find> iteration(Find(object, type));
706 
707 	if (!m_memo.for_each_block_in_reverse(iteration)) {
708 		memo_slot_release(iteration.functor.m_slot);
709 		return(true);
710 	}
711 
712 	return(false);
713 }
714 
715 /** Release a page latch.
716 @param[in]	ptr	pointer to within a page frame
717 @param[in]	type	object type: MTR_MEMO_PAGE_X_FIX, ... */
718 void
release_page(const void * ptr,mtr_memo_type_t type)719 mtr_t::release_page(const void* ptr, mtr_memo_type_t type)
720 {
721 	ut_ad(is_active());
722 
723 	/* We cannot release a page that has been written to in the
724 	middle of a mini-transaction. */
725 	ut_ad(!m_modifications || type != MTR_MEMO_PAGE_X_FIX);
726 
727 	Iterate<FindPage> iteration(FindPage(ptr, type));
728 
729 	if (!m_memo.for_each_block_in_reverse(iteration)) {
730 		memo_slot_release(iteration.functor.get_slot());
731 		return;
732 	}
733 
734 	/* The page was not found! */
735 	ut_ad(0);
736 }
737 
738 static bool log_margin_warned;
739 static time_t log_margin_warn_time;
740 static bool log_close_warned;
741 static time_t log_close_warn_time;
742 
743 /** Check margin not to overwrite transaction log from the last checkpoint.
744 If would estimate the log write to exceed the log_capacity,
745 waits for the checkpoint is done enough.
746 @param len   length of the data to be written */
log_margin_checkpoint_age(ulint len)747 static void log_margin_checkpoint_age(ulint len)
748 {
749   const ulint framing_size= log_sys.framing_size();
750   /* actual length stored per block */
751   const ulint len_per_blk= OS_FILE_LOG_BLOCK_SIZE - framing_size;
752 
753   /* actual data length in last block already written */
754   ulint extra_len= log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE;
755 
756   ut_ad(extra_len >= LOG_BLOCK_HDR_SIZE);
757   extra_len-= LOG_BLOCK_HDR_SIZE;
758 
759   /* total extra length for block header and trailer */
760   extra_len= ((len + extra_len) / len_per_blk) * framing_size;
761 
762   const ulint margin= len + extra_len;
763 
764   mysql_mutex_assert_owner(&log_sys.mutex);
765 
766   const lsn_t lsn= log_sys.get_lsn();
767 
768   if (UNIV_UNLIKELY(margin > log_sys.log_capacity))
769   {
770     time_t t= time(nullptr);
771 
772     /* return with warning output to avoid deadlock */
773     if (!log_margin_warned || difftime(t, log_margin_warn_time) > 15)
774     {
775       log_margin_warned= true;
776       log_margin_warn_time= t;
777 
778       ib::error() << "innodb_log_file_size is too small "
779                      "for mini-transaction size " << len;
780     }
781   }
782   else if (UNIV_LIKELY(lsn + margin <= log_sys.last_checkpoint_lsn +
783                        log_sys.log_capacity))
784     return;
785 
786   log_sys.set_check_flush_or_checkpoint();
787 }
788 
789 
790 /** Open the log for log_write_low(). The log must be closed with log_close().
791 @param len length of the data to be written
792 @return start lsn of the log record */
log_reserve_and_open(size_t len)793 static lsn_t log_reserve_and_open(size_t len)
794 {
795   for (ut_d(ulint count= 0);;)
796   {
797     mysql_mutex_assert_owner(&log_sys.mutex);
798 
799     /* Calculate an upper limit for the space the string may take in
800     the log buffer */
801 
802     size_t len_upper_limit= (4 * OS_FILE_LOG_BLOCK_SIZE) +
803       srv_log_write_ahead_size + (5 * len) / 4;
804 
805     if (log_sys.buf_free + len_upper_limit <= srv_log_buffer_size)
806       break;
807 
808     mysql_mutex_unlock(&log_sys.mutex);
809     DEBUG_SYNC_C("log_buf_size_exceeded");
810 
811     /* Not enough free space, do a write of the log buffer */
812     log_write_up_to(log_sys.get_lsn(), false);
813 
814     srv_stats.log_waits.inc();
815 
816     ut_ad(++count < 50);
817 
818     mysql_mutex_lock(&log_sys.mutex);
819   }
820 
821   return log_sys.get_lsn();
822 }
823 
824 /** Append data to the log buffer. */
log_write_low(const void * str,size_t size)825 static void log_write_low(const void *str, size_t size)
826 {
827   mysql_mutex_assert_owner(&log_sys.mutex);
828   const ulint trailer_offset= log_sys.trailer_offset();
829 
830   do
831   {
832     /* Calculate a part length */
833     size_t len= size;
834     size_t data_len= (log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE) + size;
835 
836     if (data_len > trailer_offset)
837     {
838       data_len= trailer_offset;
839       len= trailer_offset - log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE;
840     }
841 
842     memcpy(log_sys.buf + log_sys.buf_free, str, len);
843 
844     size-= len;
845     str= static_cast<const char*>(str) + len;
846 
847     byte *log_block= static_cast<byte*>(ut_align_down(log_sys.buf +
848                                                       log_sys.buf_free,
849                                                       OS_FILE_LOG_BLOCK_SIZE));
850 
851     log_block_set_data_len(log_block, data_len);
852     lsn_t lsn= log_sys.get_lsn();
853 
854     if (data_len == trailer_offset)
855     {
856       /* This block became full */
857       log_block_set_data_len(log_block, OS_FILE_LOG_BLOCK_SIZE);
858       log_block_set_checkpoint_no(log_block, log_sys.next_checkpoint_no);
859       len+= log_sys.framing_size();
860       lsn+= len;
861       /* Initialize the next block header */
862       log_block_init(log_block + OS_FILE_LOG_BLOCK_SIZE, lsn);
863     }
864     else
865       lsn+= len;
866 
867     log_sys.set_lsn(lsn);
868     log_sys.buf_free+= len;
869 
870     ut_ad(log_sys.buf_free <= size_t{srv_log_buffer_size});
871   }
872   while (size);
873 }
874 
875 /** Close the log at mini-transaction commit.
876 @return whether buffer pool flushing is needed */
log_close(lsn_t lsn)877 static mtr_t::page_flush_ahead log_close(lsn_t lsn)
878 {
879   mysql_mutex_assert_owner(&log_sys.mutex);
880   ut_ad(lsn == log_sys.get_lsn());
881 
882   byte *log_block= static_cast<byte*>(ut_align_down(log_sys.buf +
883                                                     log_sys.buf_free,
884                                                     OS_FILE_LOG_BLOCK_SIZE));
885 
886   if (!log_block_get_first_rec_group(log_block))
887   {
888     /* We initialized a new log block which was not written
889     full by the current mtr: the next mtr log record group
890     will start within this block at the offset data_len */
891     log_block_set_first_rec_group(log_block,
892                                   log_block_get_data_len(log_block));
893   }
894 
895   if (log_sys.buf_free > log_sys.max_buf_free)
896     log_sys.set_check_flush_or_checkpoint();
897 
898   const lsn_t checkpoint_age= lsn - log_sys.last_checkpoint_lsn;
899 
900   if (UNIV_UNLIKELY(checkpoint_age >= log_sys.log_capacity) &&
901       /* silence message on create_log_file() after the log had been deleted */
902       checkpoint_age != lsn)
903   {
904     time_t t= time(nullptr);
905     if (!log_close_warned || difftime(t, log_close_warn_time) > 15)
906     {
907       log_close_warned= true;
908       log_close_warn_time= t;
909 
910       ib::error() << "The age of the last checkpoint is " << checkpoint_age
911                   << ", which exceeds the log capacity "
912                   << log_sys.log_capacity << ".";
913     }
914   }
915   else if (UNIV_LIKELY(checkpoint_age <= log_sys.max_modified_age_async))
916     return mtr_t::PAGE_FLUSH_NO;
917   else if (UNIV_LIKELY(checkpoint_age <= log_sys.max_checkpoint_age))
918     return mtr_t::PAGE_FLUSH_ASYNC;
919 
920   log_sys.set_check_flush_or_checkpoint();
921   return mtr_t::PAGE_FLUSH_SYNC;
922 }
923 
924 /** Write the block contents to the REDO log */
925 struct mtr_write_log
926 {
927   /** Append a block to the redo log buffer.
928   @return whether the appending should continue */
operator ()mtr_write_log929   bool operator()(const mtr_buf_t::block_t *block) const
930   {
931     log_write_low(block->begin(), block->used());
932     return true;
933   }
934 };
935 
do_write()936 std::pair<lsn_t,mtr_t::page_flush_ahead> mtr_t::do_write()
937 {
938 	ut_ad(!recv_no_log_write);
939 	ut_ad(m_log_mode == MTR_LOG_ALL);
940 
941 	ulint	len	= m_log.size();
942 	ut_ad(len > 0);
943 
944 	if (len > srv_log_buffer_size / 2) {
945 		log_buffer_extend(ulong((len + 1) * 2));
946 	}
947 
948 	fil_space_t*	space = m_user_space;
949 
950 	if (space != NULL && is_predefined_tablespace(space->id)) {
951 		/* Omit FILE_MODIFY for predefined tablespaces. */
952 		space = NULL;
953 	}
954 
955 	mysql_mutex_lock(&log_sys.mutex);
956 
957 	if (fil_names_write_if_was_clean(space)) {
958 		len = m_log.size();
959 	} else {
960 		/* This was not the first time of dirtying a
961 		tablespace since the latest checkpoint. */
962 		ut_ad(len == m_log.size());
963 	}
964 
965 	*m_log.push<byte*>(1) = 0;
966 	len++;
967 
968 	/* check and attempt a checkpoint if exceeding capacity */
969 	log_margin_checkpoint_age(len);
970 
971 	return finish_write(len);
972 }
973 
974 /** Append the redo log records to the redo log buffer.
975 @param len   number of bytes to write
976 @return {start_lsn,flush_ahead} */
finish_write(ulint len)977 inline std::pair<lsn_t,mtr_t::page_flush_ahead> mtr_t::finish_write(ulint len)
978 {
979 	ut_ad(m_log_mode == MTR_LOG_ALL);
980 	mysql_mutex_assert_owner(&log_sys.mutex);
981 	ut_ad(m_log.size() == len);
982 	ut_ad(len > 0);
983 
984 	lsn_t start_lsn;
985 
986 	if (m_log.is_small()) {
987 		const mtr_buf_t::block_t* front = m_log.front();
988 		ut_ad(len <= front->used());
989 
990 		m_commit_lsn = log_reserve_and_write_fast(front->begin(), len,
991 							  &start_lsn);
992 
993 		if (!m_commit_lsn) {
994 			goto piecewise;
995 		}
996 	} else {
997 piecewise:
998 		/* Open the database log for log_write_low */
999 		start_lsn = log_reserve_and_open(len);
1000 		mtr_write_log write_log;
1001 		m_log.for_each_block(write_log);
1002 		m_commit_lsn = log_sys.get_lsn();
1003 	}
1004 	page_flush_ahead flush= log_close(m_commit_lsn);
1005 	DBUG_EXECUTE_IF("ib_log_flush_ahead", flush = PAGE_FLUSH_SYNC;);
1006 
1007 	return std::make_pair(start_lsn, flush);
1008 }
1009 
1010 /** Find out whether a block was not X-latched by the mini-transaction */
1011 struct FindBlockX
1012 {
1013   const buf_block_t &block;
1014 
FindBlockXFindBlockX1015   FindBlockX(const buf_block_t &block): block(block) {}
1016 
1017   /** @return whether the block was not found x-latched */
operator ()FindBlockX1018   bool operator()(const mtr_memo_slot_t *slot) const
1019   {
1020     return slot->object != &block || slot->type != MTR_MEMO_PAGE_X_FIX;
1021   }
1022 };
1023 
1024 #ifdef UNIV_DEBUG
1025 /** Assert that the block is not present in the mini-transaction */
1026 struct FindNoBlock
1027 {
1028   const buf_block_t &block;
1029 
FindNoBlockFindNoBlock1030   FindNoBlock(const buf_block_t &block): block(block) {}
1031 
1032   /** @return whether the block was not found */
operator ()FindNoBlock1033   bool operator()(const mtr_memo_slot_t *slot) const
1034   {
1035     return slot->object != &block;
1036   }
1037 };
1038 #endif /* UNIV_DEBUG */
1039 
have_x_latch(const buf_block_t & block) const1040 bool mtr_t::have_x_latch(const buf_block_t &block) const
1041 {
1042   if (m_memo.for_each_block(CIterate<FindBlockX>(FindBlockX(block))))
1043   {
1044     ut_ad(m_memo.for_each_block(CIterate<FindNoBlock>(FindNoBlock(block))));
1045     ut_ad(!memo_contains_flagged(&block,
1046                                  MTR_MEMO_PAGE_S_FIX | MTR_MEMO_PAGE_SX_FIX |
1047                                  MTR_MEMO_BUF_FIX | MTR_MEMO_MODIFY));
1048     return false;
1049   }
1050   ut_ad(rw_lock_own(&block.lock, RW_LOCK_X));
1051   return true;
1052 }
1053 
1054 #ifdef UNIV_DEBUG
1055 /** Check if we are holding an rw-latch in this mini-transaction
1056 @param lock   latch to search for
1057 @param type   held latch type
1058 @return whether (lock,type) is contained */
memo_contains(const rw_lock_t & lock,mtr_memo_type_t type)1059 bool mtr_t::memo_contains(const rw_lock_t &lock, mtr_memo_type_t type)
1060 {
1061   Iterate<Find> iteration(Find(&lock, type));
1062   if (m_memo.for_each_block_in_reverse(iteration))
1063     return false;
1064 
1065   switch (type) {
1066   case MTR_MEMO_X_LOCK:
1067     ut_ad(rw_lock_own(&lock, RW_LOCK_X));
1068     break;
1069   case MTR_MEMO_SX_LOCK:
1070     ut_ad(rw_lock_own(&lock, RW_LOCK_SX));
1071     break;
1072   case MTR_MEMO_S_LOCK:
1073     ut_ad(rw_lock_own(&lock, RW_LOCK_S));
1074     break;
1075   default:
1076     break;
1077   }
1078 
1079   return true;
1080 }
1081 
1082 /** Check if we are holding exclusive tablespace latch
1083 @param space  tablespace to search for
1084 @return whether space.latch is being held */
memo_contains(const fil_space_t & space)1085 bool mtr_t::memo_contains(const fil_space_t& space)
1086 {
1087   Iterate<Find> iteration(Find(&space, MTR_MEMO_SPACE_X_LOCK));
1088   if (m_memo.for_each_block_in_reverse(iteration))
1089     return false;
1090   ut_ad(rw_lock_own(const_cast<rw_lock_t*>(&space.latch), RW_LOCK_X));
1091   return true;
1092 }
1093 
1094 /** Debug check for flags */
1095 struct FlaggedCheck {
FlaggedCheckFlaggedCheck1096 	FlaggedCheck(const void* ptr, ulint flags)
1097 		:
1098 		m_ptr(ptr),
1099 		m_flags(flags)
1100 	{
1101 		/* There must be some flags to look for. */
1102 		ut_ad(flags);
1103 		/* Look for rw-lock-related and page-related flags. */
1104 		ut_ad(!(flags & ulint(~(MTR_MEMO_PAGE_S_FIX
1105 					| MTR_MEMO_PAGE_X_FIX
1106 					| MTR_MEMO_PAGE_SX_FIX
1107 					| MTR_MEMO_BUF_FIX
1108 					| MTR_MEMO_MODIFY
1109 					| MTR_MEMO_X_LOCK
1110 					| MTR_MEMO_SX_LOCK
1111 					| MTR_MEMO_S_LOCK))));
1112 		/* Either some rw-lock-related or page-related flags
1113 		must be specified, but not both at the same time. */
1114 		ut_ad(!(flags & (MTR_MEMO_PAGE_S_FIX
1115 				 | MTR_MEMO_PAGE_X_FIX
1116 				 | MTR_MEMO_PAGE_SX_FIX
1117 				 | MTR_MEMO_BUF_FIX
1118 				 | MTR_MEMO_MODIFY))
1119 		      == !!(flags & (MTR_MEMO_X_LOCK
1120 				     | MTR_MEMO_SX_LOCK
1121 				     | MTR_MEMO_S_LOCK)));
1122 	}
1123 
1124 	/** Visit a memo entry.
1125 	@param[in]	slot	memo entry to visit
1126 	@retval	false	if m_ptr was found
1127 	@retval	true	if the iteration should continue */
operator ()FlaggedCheck1128 	bool operator()(const mtr_memo_slot_t* slot) const
1129 	{
1130 		if (m_ptr != slot->object || !(m_flags & slot->type)) {
1131 			return(true);
1132 		}
1133 
1134 		if (ulint flags = m_flags & (MTR_MEMO_PAGE_S_FIX
1135 					     | MTR_MEMO_PAGE_SX_FIX
1136 					     | MTR_MEMO_PAGE_X_FIX)) {
1137 			rw_lock_t* lock = &static_cast<buf_block_t*>(
1138 				const_cast<void*>(m_ptr))->lock;
1139 			ut_ad(rw_lock_own_flagged(lock, flags));
1140 		} else {
1141 			rw_lock_t* lock = static_cast<rw_lock_t*>(
1142 				const_cast<void*>(m_ptr));
1143 			ut_ad(rw_lock_own_flagged(lock, m_flags >> 5));
1144 		}
1145 
1146 		return(false);
1147 	}
1148 
1149 	const void*const	m_ptr;
1150 	const ulint		m_flags;
1151 };
1152 
1153 /** Check if memo contains the given item.
1154 @param object		object to search
1155 @param flags		specify types of object (can be ORred) of
1156 			MTR_MEMO_PAGE_S_FIX ... values
1157 @return true if contains */
1158 bool
memo_contains_flagged(const void * ptr,ulint flags) const1159 mtr_t::memo_contains_flagged(const void* ptr, ulint flags) const
1160 {
1161 	ut_ad(is_active());
1162 
1163 	return !m_memo.for_each_block_in_reverse(
1164 		CIterate<FlaggedCheck>(FlaggedCheck(ptr, flags)));
1165 }
1166 
1167 /** Check if memo contains the given page.
1168 @param[in]	ptr	pointer to within buffer frame
1169 @param[in]	flags	specify types of object with OR of
1170 			MTR_MEMO_PAGE_S_FIX... values
1171 @return	the block
1172 @retval	NULL	if not found */
1173 buf_block_t*
memo_contains_page_flagged(const byte * ptr,ulint flags) const1174 mtr_t::memo_contains_page_flagged(
1175 	const byte*	ptr,
1176 	ulint		flags) const
1177 {
1178 	Iterate<FindPage> iteration(FindPage(ptr, flags));
1179 	return m_memo.for_each_block_in_reverse(iteration)
1180 		? NULL : iteration.functor.get_block();
1181 }
1182 
1183 /** Print info of an mtr handle. */
1184 void
print() const1185 mtr_t::print() const
1186 {
1187 	ib::info() << "Mini-transaction handle: memo size "
1188 		<< m_memo.size() << " bytes log size "
1189 		<< get_log()->size() << " bytes";
1190 }
1191 
1192 #endif /* UNIV_DEBUG */
1193 
1194 
1195 /** Find a block, preferrably in MTR_MEMO_MODIFY state */
1196 struct FindModified
1197 {
1198   mtr_memo_slot_t *found= nullptr;
1199   const buf_block_t& block;
1200 
FindModifiedFindModified1201   FindModified(const buf_block_t &block) : block(block) {}
operator ()FindModified1202   bool operator()(mtr_memo_slot_t *slot)
1203   {
1204     if (slot->object != &block)
1205       return true;
1206     found= slot;
1207     return !(slot->type & (MTR_MEMO_MODIFY |
1208                            MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX));
1209   }
1210 };
1211 
1212 /** Mark the given latched page as modified.
1213 @param block   page that will be modified */
modify(const buf_block_t & block)1214 void mtr_t::modify(const buf_block_t &block)
1215 {
1216   if (UNIV_UNLIKELY(m_memo.empty()))
1217   {
1218     /* This must be PageConverter::update_page() in IMPORT TABLESPACE. */
1219     ut_ad(!block.page.in_LRU_list);
1220     return;
1221   }
1222 
1223   Iterate<FindModified> iteration((FindModified(block)));
1224   if (UNIV_UNLIKELY(m_memo.for_each_block(iteration)))
1225   {
1226     ut_ad("modifying an unlatched page" == 0);
1227     return;
1228   }
1229   iteration.functor.found->type= static_cast<mtr_memo_type_t>
1230     (iteration.functor.found->type | MTR_MEMO_MODIFY);
1231 }
1232