1 /*****************************************************************************
2 
3 Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file buf/buf0buddy.cc
29 Binary buddy allocator for compressed pages
30 
31 Created December 2006 by Marko Makela
32 *******************************************************/
33 
34 #define THIS_MODULE
35 #include "buf0buddy.h"
36 #ifdef UNIV_NONINL
37 # include "buf0buddy.ic"
38 #endif
39 #undef THIS_MODULE
40 #include "buf0buf.h"
41 #include "buf0lru.h"
42 #include "buf0flu.h"
43 #include "page0zip.h"
44 #include "srv0start.h"
45 
46 /** When freeing a buf we attempt to coalesce by looking at its buddy
47 and deciding whether it is free or not. To ascertain if the buddy is
48 free we look for BUF_BUDDY_STAMP_FREE at BUF_BUDDY_STAMP_OFFSET
49 within the buddy. The question is how we can be sure that it is
50 safe to look at BUF_BUDDY_STAMP_OFFSET.
51 The answer lies in following invariants:
52 * All blocks allocated by buddy allocator are used for compressed
53 page frame.
54 * A compressed table always have space_id < SRV_LOG_SPACE_FIRST_ID
55 * BUF_BUDDY_STAMP_OFFSET always points to the space_id field in
56 a frame.
57   -- The above is true because we look at these fields when the
58      corresponding buddy block is free which implies that:
59      * The block we are looking at must have an address aligned at
60        the same size that its free buddy has. For example, if we have
61        a free block of 8K then its buddy's address must be aligned at
62        8K as well.
63      * It is possible that the block we are looking at may have been
64        further divided into smaller sized blocks but its starting
65        address must still remain the start of a page frame i.e.: it
66        cannot be middle of a block. For example, if we have a free
67        block of size 8K then its buddy may be divided into blocks
68        of, say, 1K, 1K, 2K, 4K but the buddy's address will still be
69        the starting address of first 1K compressed page.
70      * What is important to note is that for any given block, the
71        buddy's address cannot be in the middle of a larger block i.e.:
72        in above example, our 8K block cannot have a buddy whose address
73        is aligned on 8K but it is part of a larger 16K block.
74 */
75 
76 /** Offset within buf_buddy_free_t where free or non_free stamps
77 are written.*/
78 #define BUF_BUDDY_STAMP_OFFSET	FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID
79 
80 /** Value that we stamp on all buffers that are currently on the zip_free
81 list. This value is stamped at BUF_BUDDY_STAMP_OFFSET offset */
82 #define BUF_BUDDY_STAMP_FREE	(SRV_LOG_SPACE_FIRST_ID)
83 
84 /** Stamp value for non-free buffers. Will be overwritten by a non-zero
85 value by the consumer of the block */
86 #define BUF_BUDDY_STAMP_NONFREE	(0XFFFFFFFF)
87 
88 #if BUF_BUDDY_STAMP_FREE >= BUF_BUDDY_STAMP_NONFREE
89 # error "BUF_BUDDY_STAMP_FREE >= BUF_BUDDY_STAMP_NONFREE"
90 #endif
91 
92 /** Return type of buf_buddy_is_free() */
93 enum buf_buddy_state_t {
94 	BUF_BUDDY_STATE_FREE,	/*!< If the buddy to completely free */
95 	BUF_BUDDY_STATE_USED,	/*!< Buddy currently in used */
96 	BUF_BUDDY_STATE_PARTIALLY_USED/*!< Some sub-blocks in the buddy
97 				are in use */
98 };
99 
100 #ifdef UNIV_DEBUG_VALGRIND
101 /**********************************************************************//**
102 Invalidate memory area that we won't access while page is free */
103 UNIV_INLINE
104 void
buf_buddy_mem_invalid(buf_buddy_free_t * buf,ulint i)105 buf_buddy_mem_invalid(
106 /*==================*/
107 	buf_buddy_free_t*	buf,	/*!< in: block to check */
108 	ulint			i)	/*!< in: index of zip_free[] */
109 {
110 	const size_t	size	= BUF_BUDDY_LOW << i;
111 	ut_ad(i <= BUF_BUDDY_SIZES);
112 
113 	UNIV_MEM_ASSERT_W(buf, size);
114 	UNIV_MEM_INVALID(buf, size);
115 }
116 #else /* UNIV_DEBUG_VALGRIND */
117 # define buf_buddy_mem_invalid(buf, i) ut_ad((i) <= BUF_BUDDY_SIZES)
118 #endif /* UNIV_DEBUG_VALGRIND */
119 
120 /**********************************************************************//**
121 Check if a buddy is stamped free.
122 @return	whether the buddy is free */
UNIV_INLINE(warn_unused_result)123 UNIV_INLINE MY_ATTRIBUTE((warn_unused_result))
124 bool
125 buf_buddy_stamp_is_free(
126 /*====================*/
127 	const buf_buddy_free_t*	buf)	/*!< in: block to check */
128 {
129 	return(mach_read_from_4(buf->stamp.bytes + BUF_BUDDY_STAMP_OFFSET)
130 	       == BUF_BUDDY_STAMP_FREE);
131 }
132 
133 /**********************************************************************//**
134 Stamps a buddy free. */
135 UNIV_INLINE
136 void
buf_buddy_stamp_free(buf_buddy_free_t * buf,ulint i)137 buf_buddy_stamp_free(
138 /*=================*/
139 	buf_buddy_free_t*	buf,	/*!< in/out: block to stamp */
140 	ulint			i)	/*!< in: block size */
141 {
142 	ut_d(memset(buf, static_cast<int>(i), BUF_BUDDY_LOW << i));
143 	buf_buddy_mem_invalid(buf, i);
144 	mach_write_to_4(buf->stamp.bytes + BUF_BUDDY_STAMP_OFFSET,
145 			BUF_BUDDY_STAMP_FREE);
146 	buf->stamp.size = i;
147 }
148 
149 /**********************************************************************//**
150 Stamps a buddy nonfree.
151 @param[in/out]	buf	block to stamp
152 @param[in]	i	block size */
153 #define buf_buddy_stamp_nonfree(buf, i) do {				\
154 	buf_buddy_mem_invalid(buf, i);					\
155 	memset(buf->stamp.bytes + BUF_BUDDY_STAMP_OFFSET, 0xff, 4);	\
156 } while (0)
157 #if BUF_BUDDY_STAMP_NONFREE != 0xffffffff
158 # error "BUF_BUDDY_STAMP_NONFREE != 0xffffffff"
159 #endif
160 
161 /**********************************************************************//**
162 Get the offset of the buddy of a compressed page frame.
163 @return	the buddy relative of page */
164 UNIV_INLINE
165 void*
buf_buddy_get(byte * page,ulint size)166 buf_buddy_get(
167 /*==========*/
168 	byte*	page,	/*!< in: compressed page */
169 	ulint	size)	/*!< in: page size in bytes */
170 {
171 	ut_ad(ut_is_2pow(size));
172 	ut_ad(size >= BUF_BUDDY_LOW);
173 	ut_ad(BUF_BUDDY_LOW <= UNIV_ZIP_SIZE_MIN);
174 	ut_ad(size < BUF_BUDDY_HIGH);
175 	ut_ad(BUF_BUDDY_HIGH == UNIV_PAGE_SIZE);
176 	ut_ad(!ut_align_offset(page, size));
177 
178 	if (((ulint) page) & size) {
179 		return(page - size);
180 	} else {
181 		return(page + size);
182 	}
183 }
184 
185 /** Validate a given zip_free list. */
186 struct	CheckZipFree {
187 	ulint	i;
CheckZipFreeCheckZipFree188 	CheckZipFree(ulint i) : i (i) {}
189 
operator ()CheckZipFree190 	void	operator()(const buf_buddy_free_t* elem) const
191 	{
192 		ut_a(buf_buddy_stamp_is_free(elem));
193 		ut_a(elem->stamp.size <= i);
194 	}
195 };
196 
197 #define BUF_BUDDY_LIST_VALIDATE(bp, i)				\
198 	UT_LIST_VALIDATE(list, buf_buddy_free_t,		\
199 			 bp->zip_free[i], CheckZipFree(i))
200 
201 #ifdef UNIV_DEBUG
202 /**********************************************************************//**
203 Debug function to validate that a buffer is indeed free i.e.: in the
204 zip_free[].
205 @return true if free */
206 UNIV_INLINE
207 bool
buf_buddy_check_free(buf_pool_t * buf_pool,const buf_buddy_free_t * buf,ulint i)208 buf_buddy_check_free(
209 /*=================*/
210 	buf_pool_t*		buf_pool,/*!< in: buffer pool instance */
211 	const buf_buddy_free_t*	buf,	/*!< in: block to check */
212 	ulint			i)	/*!< in: index of buf_pool->zip_free[] */
213 {
214 	const ulint	size	= BUF_BUDDY_LOW << i;
215 
216 	ut_ad(buf_pool_mutex_own(buf_pool));
217 	ut_ad(!ut_align_offset(buf, size));
218 	ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
219 
220 	buf_buddy_free_t* itr;
221 
222 	for (itr = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
223 	     itr && itr != buf;
224 	     itr = UT_LIST_GET_NEXT(list, itr)) {
225 	}
226 
227 	return(itr == buf);
228 }
229 #endif /* UNIV_DEBUG */
230 
231 /**********************************************************************//**
232 Checks if a buf is free i.e.: in the zip_free[].
233 @retval BUF_BUDDY_STATE_FREE if fully free
234 @retval BUF_BUDDY_STATE_USED if currently in use
235 @retval BUF_BUDDY_STATE_PARTIALLY_USED if partially in use. */
236 static  MY_ATTRIBUTE((warn_unused_result))
237 buf_buddy_state_t
buf_buddy_is_free(buf_buddy_free_t * buf,ulint i)238 buf_buddy_is_free(
239 /*==============*/
240 	buf_buddy_free_t*	buf,	/*!< in: block to check */
241 	ulint			i)	/*!< in: index of
242 					buf_pool->zip_free[] */
243 {
244 #ifdef UNIV_DEBUG
245 	const ulint	size	= BUF_BUDDY_LOW << i;
246 	ut_ad(!ut_align_offset(buf, size));
247 	ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
248 #endif /* UNIV_DEBUG */
249 
250 	/* We assume that all memory from buf_buddy_alloc()
251 	is used for compressed page frames. */
252 
253 	/* We look inside the allocated objects returned by
254 	buf_buddy_alloc() and assume that each block is a compressed
255 	page that contains one of the following in space_id.
256 	* BUF_BUDDY_STAMP_FREE if the block is in a zip_free list or
257 	* BUF_BUDDY_STAMP_NONFREE if the block has been allocated but
258 	not initialized yet or
259 	* A valid space_id of a compressed tablespace
260 
261 	The call below attempts to read from free memory.  The memory
262 	is "owned" by the buddy allocator (and it has been allocated
263 	from the buffer pool), so there is nothing wrong about this. */
264 	if (!buf_buddy_stamp_is_free(buf)) {
265 		return(BUF_BUDDY_STATE_USED);
266 	}
267 
268 	/* A block may be free but a fragment of it may still be in use.
269 	To guard against that we write the free block size in terms of
270 	zip_free index at start of stamped block. Note that we can
271 	safely rely on this value only if the buf is free. */
272 	ut_ad(buf->stamp.size <= i);
273 	return(buf->stamp.size == i
274 	       ? BUF_BUDDY_STATE_FREE
275 	       : BUF_BUDDY_STATE_PARTIALLY_USED);
276 }
277 
278 /**********************************************************************//**
279 Add a block to the head of the appropriate buddy free list. */
280 UNIV_INLINE
281 void
buf_buddy_add_to_free(buf_pool_t * buf_pool,buf_buddy_free_t * buf,ulint i)282 buf_buddy_add_to_free(
283 /*==================*/
284 	buf_pool_t*		buf_pool,	/*!< in: buffer pool instance */
285 	buf_buddy_free_t*	buf,		/*!< in,own: block to be freed */
286 	ulint			i)		/*!< in: index of
287 						buf_pool->zip_free[] */
288 {
289 	ut_ad(buf_pool_mutex_own(buf_pool));
290 	ut_ad(buf_pool->zip_free[i].start != buf);
291 
292 	buf_buddy_stamp_free(buf, i);
293 	UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], buf);
294 	ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
295 }
296 
297 /**********************************************************************//**
298 Remove a block from the appropriate buddy free list. */
299 UNIV_INLINE
300 void
buf_buddy_remove_from_free(buf_pool_t * buf_pool,buf_buddy_free_t * buf,ulint i)301 buf_buddy_remove_from_free(
302 /*=======================*/
303 	buf_pool_t*		buf_pool,	/*!< in: buffer pool instance */
304 	buf_buddy_free_t*	buf,		/*!< in,own: block to be freed */
305 	ulint			i)		/*!< in: index of
306 						buf_pool->zip_free[] */
307 {
308 	ut_ad(buf_pool_mutex_own(buf_pool));
309 	ut_ad(buf_buddy_check_free(buf_pool, buf, i));
310 
311 	UT_LIST_REMOVE(list, buf_pool->zip_free[i], buf);
312 	buf_buddy_stamp_nonfree(buf, i);
313 }
314 
315 /**********************************************************************//**
316 Try to allocate a block from buf_pool->zip_free[].
317 @return	allocated block, or NULL if buf_pool->zip_free[] was empty */
318 static
319 buf_buddy_free_t*
buf_buddy_alloc_zip(buf_pool_t * buf_pool,ulint i)320 buf_buddy_alloc_zip(
321 /*================*/
322 	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
323 	ulint		i)		/*!< in: index of buf_pool->zip_free[] */
324 {
325 	buf_buddy_free_t*	buf;
326 
327 	ut_ad(buf_pool_mutex_own(buf_pool));
328 	ut_a(i < BUF_BUDDY_SIZES);
329 	ut_a(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
330 
331 	ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
332 
333 	buf = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
334 
335 	if (buf) {
336 		buf_buddy_remove_from_free(buf_pool, buf, i);
337 	} else if (i + 1 < BUF_BUDDY_SIZES) {
338 		/* Attempt to split. */
339 		buf = buf_buddy_alloc_zip(buf_pool, i + 1);
340 
341 		if (buf) {
342 			buf_buddy_free_t* buddy =
343 				reinterpret_cast<buf_buddy_free_t*>(
344 					buf->stamp.bytes
345 					+ (BUF_BUDDY_LOW << i));
346 
347 			ut_ad(!buf_pool_contains_zip(buf_pool, buddy));
348 			buf_buddy_add_to_free(buf_pool, buddy, i);
349 		}
350 	}
351 
352 	if (buf) {
353 		/* Trash the page other than the BUF_BUDDY_STAMP_NONFREE. */
354 		UNIV_MEM_TRASH(buf, ~i, BUF_BUDDY_STAMP_OFFSET);
355 		UNIV_MEM_TRASH(BUF_BUDDY_STAMP_OFFSET + 4
356 			       + buf->stamp.bytes, ~i,
357 			       (BUF_BUDDY_LOW << i)
358 			       - (BUF_BUDDY_STAMP_OFFSET + 4));
359 		ut_ad(mach_read_from_4(buf->stamp.bytes
360 				       + BUF_BUDDY_STAMP_OFFSET)
361 		      == BUF_BUDDY_STAMP_NONFREE);
362 	}
363 
364 	return(buf);
365 }
366 
367 /**********************************************************************//**
368 Deallocate a buffer frame of UNIV_PAGE_SIZE. */
369 static
370 void
buf_buddy_block_free(buf_pool_t * buf_pool,void * buf)371 buf_buddy_block_free(
372 /*=================*/
373 	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
374 	void*		buf)		/*!< in: buffer frame to deallocate */
375 {
376 	const ulint	fold	= BUF_POOL_ZIP_FOLD_PTR(buf);
377 	buf_page_t*	bpage;
378 	buf_block_t*	block;
379 
380 	ut_ad(buf_pool_mutex_own(buf_pool));
381 	ut_ad(!mutex_own(&buf_pool->zip_mutex));
382 	ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
383 
384 	HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
385 		    ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
386 			  && bpage->in_zip_hash && !bpage->in_page_hash),
387 		    ((buf_block_t*) bpage)->frame == buf);
388 	ut_a(bpage);
389 	ut_a(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY);
390 	ut_ad(!bpage->in_page_hash);
391 	ut_ad(bpage->in_zip_hash);
392 	ut_d(bpage->in_zip_hash = FALSE);
393 	HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
394 
395 	ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
396 	UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
397 
398 	block = (buf_block_t*) bpage;
399 	mutex_enter(&block->mutex);
400 	buf_LRU_block_free_non_file_page(block);
401 	mutex_exit(&block->mutex);
402 
403 	ut_ad(buf_pool->buddy_n_frames > 0);
404 	ut_d(buf_pool->buddy_n_frames--);
405 }
406 
407 /**********************************************************************//**
408 Allocate a buffer block to the buddy allocator. */
409 static
410 void
buf_buddy_block_register(buf_block_t * block)411 buf_buddy_block_register(
412 /*=====================*/
413 	buf_block_t*	block)	/*!< in: buffer frame to allocate */
414 {
415 	buf_pool_t*	buf_pool = buf_pool_from_block(block);
416 	const ulint	fold = BUF_POOL_ZIP_FOLD(block);
417 	ut_ad(buf_pool_mutex_own(buf_pool));
418 	ut_ad(!mutex_own(&buf_pool->zip_mutex));
419 	ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
420 
421 	buf_block_set_state(block, BUF_BLOCK_MEMORY);
422 
423 	ut_a(block->frame);
424 	ut_a(!ut_align_offset(block->frame, UNIV_PAGE_SIZE));
425 
426 	ut_ad(!block->page.in_page_hash);
427 	ut_ad(!block->page.in_zip_hash);
428 	ut_d(block->page.in_zip_hash = TRUE);
429 	HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
430 
431 	ut_d(buf_pool->buddy_n_frames++);
432 }
433 
434 /**********************************************************************//**
435 Allocate a block from a bigger object.
436 @return	allocated block */
437 static
438 void*
buf_buddy_alloc_from(buf_pool_t * buf_pool,void * buf,ulint i,ulint j)439 buf_buddy_alloc_from(
440 /*=================*/
441 	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
442 	void*		buf,		/*!< in: a block that is free to use */
443 	ulint		i,		/*!< in: index of
444 					buf_pool->zip_free[] */
445 	ulint		j)		/*!< in: size of buf as an index
446 					of buf_pool->zip_free[] */
447 {
448 	ulint	offs	= BUF_BUDDY_LOW << j;
449 	ut_ad(j <= BUF_BUDDY_SIZES);
450 	ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
451 	ut_ad(j >= i);
452 	ut_ad(!ut_align_offset(buf, offs));
453 
454 	/* Add the unused parts of the block to the free lists. */
455 	while (j > i) {
456 		buf_buddy_free_t*	zip_buf;
457 
458 		offs >>= 1;
459 		j--;
460 
461 		zip_buf = reinterpret_cast<buf_buddy_free_t*>(
462 			reinterpret_cast<byte*>(buf) + offs);
463 		buf_buddy_add_to_free(buf_pool, zip_buf, j);
464 	}
465 
466 	buf_buddy_stamp_nonfree(reinterpret_cast<buf_buddy_free_t*>(buf), i);
467 	return(buf);
468 }
469 
470 /**********************************************************************//**
471 Allocate a block.  The thread calling this function must hold
472 buf_pool->mutex and must not hold buf_pool->zip_mutex or any block->mutex.
473 The buf_pool_mutex may be released and reacquired.
474 @return	allocated block, never NULL */
475 UNIV_INTERN
476 void*
buf_buddy_alloc_low(buf_pool_t * buf_pool,ulint i,ibool * lru)477 buf_buddy_alloc_low(
478 /*================*/
479 	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
480 	ulint		i,		/*!< in: index of buf_pool->zip_free[],
481 					or BUF_BUDDY_SIZES */
482 	ibool*		lru)		/*!< in: pointer to a variable that
483 					will be assigned TRUE if storage was
484 					allocated from the LRU list and
485 					buf_pool->mutex was temporarily
486 					released */
487 {
488 	buf_block_t*	block;
489 
490 	ut_ad(lru);
491 	ut_ad(buf_pool_mutex_own(buf_pool));
492 	ut_ad(!mutex_own(&buf_pool->zip_mutex));
493 	ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
494 
495 	if (i < BUF_BUDDY_SIZES) {
496 		/* Try to allocate from the buddy system. */
497 		block = (buf_block_t*) buf_buddy_alloc_zip(buf_pool, i);
498 
499 		if (block) {
500 			goto func_exit;
501 		}
502 	}
503 
504 	/* Try allocating from the buf_pool->free list. */
505 	block = buf_LRU_get_free_only(buf_pool);
506 
507 	if (block) {
508 
509 		goto alloc_big;
510 	}
511 
512 	/* Try replacing an uncompressed page in the buffer pool. */
513 	buf_pool_mutex_exit(buf_pool);
514 	block = buf_LRU_get_free_block(buf_pool);
515 	*lru = TRUE;
516 	buf_pool_mutex_enter(buf_pool);
517 
518 alloc_big:
519 	buf_buddy_block_register(block);
520 
521 	block = (buf_block_t*) buf_buddy_alloc_from(
522 		buf_pool, block->frame, i, BUF_BUDDY_SIZES);
523 
524 func_exit:
525 	buf_pool->buddy_stat[i].used++;
526 	return(block);
527 }
528 
529 /**********************************************************************//**
530 Try to relocate a block.
531 @return	true if relocated */
532 static
533 bool
buf_buddy_relocate(buf_pool_t * buf_pool,void * src,void * dst,ulint i)534 buf_buddy_relocate(
535 /*===============*/
536 	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
537 	void*		src,		/*!< in: block to relocate */
538 	void*		dst,		/*!< in: free block to relocate to */
539 	ulint		i)		/*!< in: index of
540 					buf_pool->zip_free[] */
541 {
542 	buf_page_t*	bpage;
543 	const ulint	size	= BUF_BUDDY_LOW << i;
544 	ulint		space;
545 	ulint		offset;
546 
547 	ut_ad(buf_pool_mutex_own(buf_pool));
548 	ut_ad(!mutex_own(&buf_pool->zip_mutex));
549 	ut_ad(!ut_align_offset(src, size));
550 	ut_ad(!ut_align_offset(dst, size));
551 	ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
552 	UNIV_MEM_ASSERT_W(dst, size);
553 
554 	space	= mach_read_from_4((const byte*) src
555 				   + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
556 	offset	= mach_read_from_4((const byte*) src
557 				   + FIL_PAGE_OFFSET);
558 
559 	/* Suppress Valgrind warnings about conditional jump
560 	on uninitialized value. */
561 	UNIV_MEM_VALID(&space, sizeof space);
562 	UNIV_MEM_VALID(&offset, sizeof offset);
563 
564 	ut_ad(space != BUF_BUDDY_STAMP_FREE);
565 
566 	ulint		fold = buf_page_address_fold(space, offset);
567 	rw_lock_t*	hash_lock = buf_page_hash_lock_get(buf_pool, fold);
568 
569 	rw_lock_x_lock(hash_lock);
570 
571 	bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
572 
573 	if (!bpage || bpage->zip.data != src) {
574 		/* The block has probably been freshly
575 		allocated by buf_LRU_get_free_block() but not
576 		added to buf_pool->page_hash yet.  Obviously,
577 		it cannot be relocated. */
578 
579 		rw_lock_x_unlock(hash_lock);
580 
581 		return(false);
582 	}
583 
584 	if (page_zip_get_size(&bpage->zip) != size) {
585 		/* The block is of different size.  We would
586 		have to relocate all blocks covered by src.
587 		For the sake of simplicity, give up. */
588 		ut_ad(page_zip_get_size(&bpage->zip) < size);
589 
590 		rw_lock_x_unlock(hash_lock);
591 
592 		return(false);
593 	}
594 
595 	/* The block must have been allocated, but it may
596 	contain uninitialized data. */
597 	UNIV_MEM_ASSERT_W(src, size);
598 
599 	ib_mutex_t*	block_mutex = buf_page_get_mutex(bpage);
600 
601 	mutex_enter(block_mutex);
602 
603 	if (buf_page_can_relocate(bpage)) {
604 		/* Relocate the compressed page. */
605 		ullint	usec = ut_time_us(NULL);
606 
607 		ut_a(bpage->zip.data == src);
608 
609 		/* Note: This is potentially expensive, we need a better
610 		solution here. We go with correctness for now. */
611 		::memcpy(dst, src, size);
612 
613 		bpage->zip.data = reinterpret_cast<page_zip_t*>(dst);
614 
615 		rw_lock_x_unlock(hash_lock);
616 
617 		mutex_exit(block_mutex);
618 
619 		buf_buddy_mem_invalid(
620 			reinterpret_cast<buf_buddy_free_t*>(src), i);
621 
622 		buf_buddy_stat_t*	buddy_stat = &buf_pool->buddy_stat[i];
623 
624 		++buddy_stat->relocated;
625 
626 		buddy_stat->relocated_usec += ut_time_us(NULL) - usec;
627 
628 		return(true);
629 	}
630 
631 	rw_lock_x_unlock(hash_lock);
632 
633 	mutex_exit(block_mutex);
634 
635 	return(false);
636 }
637 
638 /**********************************************************************//**
639 Deallocate a block. */
640 UNIV_INTERN
641 void
buf_buddy_free_low(buf_pool_t * buf_pool,void * buf,ulint i)642 buf_buddy_free_low(
643 /*===============*/
644 	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
645 	void*		buf,		/*!< in: block to be freed, must not be
646 					pointed to by the buffer pool */
647 	ulint		i)		/*!< in: index of buf_pool->zip_free[],
648 					or BUF_BUDDY_SIZES */
649 {
650 	buf_buddy_free_t*	buddy;
651 
652 	ut_ad(buf_pool_mutex_own(buf_pool));
653 	ut_ad(!mutex_own(&buf_pool->zip_mutex));
654 	ut_ad(i <= BUF_BUDDY_SIZES);
655 	ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
656 	ut_ad(buf_pool->buddy_stat[i].used > 0);
657 
658 	buf_pool->buddy_stat[i].used--;
659 recombine:
660 	UNIV_MEM_ASSERT_AND_ALLOC(buf, BUF_BUDDY_LOW << i);
661 
662 	if (i == BUF_BUDDY_SIZES) {
663 		buf_buddy_block_free(buf_pool, buf);
664 		return;
665 	}
666 
667 	ut_ad(i < BUF_BUDDY_SIZES);
668 	ut_ad(buf == ut_align_down(buf, BUF_BUDDY_LOW << i));
669 	ut_ad(!buf_pool_contains_zip(buf_pool, buf));
670 
671 	/* Do not recombine blocks if there are few free blocks.
672 	We may waste up to 15360*max_len bytes to free blocks
673 	(1024 + 2048 + 4096 + 8192 = 15360) */
674 	if (UT_LIST_GET_LEN(buf_pool->zip_free[i]) < 16) {
675 		goto func_exit;
676 	}
677 
678 	/* Try to combine adjacent blocks. */
679 	buddy = reinterpret_cast<buf_buddy_free_t*>(
680 		buf_buddy_get(reinterpret_cast<byte*>(buf),
681 			      BUF_BUDDY_LOW << i));
682 
683 	switch (buf_buddy_is_free(buddy, i)) {
684 	case BUF_BUDDY_STATE_FREE:
685 		/* The buddy is free: recombine */
686 		buf_buddy_remove_from_free(buf_pool, buddy, i);
687 buddy_is_free:
688 		ut_ad(!buf_pool_contains_zip(buf_pool, buddy));
689 		i++;
690 		buf = ut_align_down(buf, BUF_BUDDY_LOW << i);
691 
692 		goto recombine;
693 
694 	case BUF_BUDDY_STATE_USED:
695 		ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
696 
697 		/* The buddy is not free. Is there a free block of
698 		this size? */
699 		if (buf_buddy_free_t* zip_buf =
700 			UT_LIST_GET_FIRST(buf_pool->zip_free[i])) {
701 
702 			/* Remove the block from the free list, because
703 			a successful buf_buddy_relocate() will overwrite
704 			zip_free->list. */
705 			buf_buddy_remove_from_free(buf_pool, zip_buf, i);
706 
707 			/* Try to relocate the buddy of buf to the free
708 			block. */
709 			if (buf_buddy_relocate(buf_pool, buddy, zip_buf, i)) {
710 
711 				goto buddy_is_free;
712 			}
713 
714 			buf_buddy_add_to_free(buf_pool, zip_buf, i);
715 		}
716 
717 		break;
718 	case BUF_BUDDY_STATE_PARTIALLY_USED:
719 		/* Some sub-blocks in the buddy are still in use.
720 		Relocation will fail. No need to try. */
721 		break;
722 	}
723 
724 func_exit:
725 	/* Free the block to the buddy list. */
726 	buf_buddy_add_to_free(buf_pool,
727 			      reinterpret_cast<buf_buddy_free_t*>(buf),
728 			      i);
729 }
730