mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-30 18:36:12 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			761 lines
		
	
	
	
		
			23 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			761 lines
		
	
	
	
		
			23 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*****************************************************************************
 | |
| 
 | |
| Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved.
 | |
| Copyright (c) 2018, 2022, MariaDB Corporation.
 | |
| 
 | |
| This program is free software; you can redistribute it and/or modify it under
 | |
| the terms of the GNU General Public License as published by the Free Software
 | |
| Foundation; version 2 of the License.
 | |
| 
 | |
| This program is distributed in the hope that it will be useful, but WITHOUT
 | |
| ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 | |
| FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 | |
| 
 | |
| You should have received a copy of the GNU General Public License along with
 | |
| this program; if not, write to the Free Software Foundation, Inc.,
 | |
| 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
 | |
| 
 | |
| *****************************************************************************/
 | |
| 
 | |
| /**************************************************//**
 | |
| @file buf/buf0buddy.cc
 | |
| Binary buddy allocator for compressed pages
 | |
| 
 | |
| Created December 2006 by Marko Makela
 | |
| *******************************************************/
 | |
| 
 | |
| #include "buf0buddy.h"
 | |
| #include "buf0buf.h"
 | |
| #include "buf0lru.h"
 | |
| #include "buf0flu.h"
 | |
| #include "page0zip.h"
 | |
| #include "srv0start.h"
 | |
| 
 | |
| /** When freeing a buf we attempt to coalesce by looking at its buddy
 | |
| and deciding whether it is free or not. To ascertain if the buddy is
 | |
| free we look for BUF_BUDDY_STAMP_FREE at BUF_BUDDY_STAMP_OFFSET
 | |
| within the buddy. The question is how we can be sure that it is
 | |
| safe to look at BUF_BUDDY_STAMP_OFFSET.
 | |
| The answer lies in following invariants:
 | |
| * All blocks allocated by buddy allocator are used for compressed
 | |
| page frame.
 | |
| * A compressed table always have space_id < SRV_SPACE_ID_UPPER_BOUND
 | |
| * BUF_BUDDY_STAMP_OFFSET always points to the space_id field in
 | |
| a frame.
 | |
|   -- The above is true because we look at these fields when the
 | |
|      corresponding buddy block is free which implies that:
 | |
|      * The block we are looking at must have an address aligned at
 | |
|        the same size that its free buddy has. For example, if we have
 | |
|        a free block of 8K then its buddy's address must be aligned at
 | |
|        8K as well.
 | |
|      * It is possible that the block we are looking at may have been
 | |
|        further divided into smaller sized blocks but its starting
 | |
|        address must still remain the start of a page frame i.e.: it
 | |
|        cannot be middle of a block. For example, if we have a free
 | |
|        block of size 8K then its buddy may be divided into blocks
 | |
|        of, say, 1K, 1K, 2K, 4K but the buddy's address will still be
 | |
|        the starting address of first 1K compressed page.
 | |
|      * What is important to note is that for any given block, the
 | |
|        buddy's address cannot be in the middle of a larger block i.e.:
 | |
|        in above example, our 8K block cannot have a buddy whose address
 | |
|        is aligned on 8K but it is part of a larger 16K block.
 | |
| */
 | |
| 
 | |
| /** Offset within buf_buddy_free_t where free or non_free stamps
 | |
| are written.*/
 | |
| #define BUF_BUDDY_STAMP_OFFSET	FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID
 | |
| 
 | |
| /** Value that we stamp on all buffers that are currently on the zip_free
 | |
| list. This value is stamped at BUF_BUDDY_STAMP_OFFSET offset */
 | |
| #define BUF_BUDDY_STAMP_FREE	 SRV_SPACE_ID_UPPER_BOUND
 | |
| 
 | |
| /** Stamp value for non-free buffers. Will be overwritten by a non-zero
 | |
| value by the consumer of the block */
 | |
| #define BUF_BUDDY_STAMP_NONFREE	0XFFFFFFFFUL
 | |
| 
 | |
| /** Return type of buf_buddy_is_free() */
 | |
| enum buf_buddy_state_t {
 | |
| 	BUF_BUDDY_STATE_FREE,	/*!< If the buddy to completely free */
 | |
| 	BUF_BUDDY_STATE_USED,	/*!< Buddy currently in used */
 | |
| 	BUF_BUDDY_STATE_PARTIALLY_USED/*!< Some sub-blocks in the buddy
 | |
| 				are in use */
 | |
| };
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Invalidate memory area that we won't access while page is free */
 | |
| UNIV_INLINE
 | |
| void
 | |
| buf_buddy_mem_invalid(
 | |
| /*==================*/
 | |
| 	buf_buddy_free_t*	buf,	/*!< in: block to check */
 | |
| 	ulint			i)	/*!< in: index of zip_free[] */
 | |
| {
 | |
|   ut_ad(i <= BUF_BUDDY_SIZES);
 | |
| 
 | |
|   MEM_CHECK_ADDRESSABLE(buf, BUF_BUDDY_LOW << i);
 | |
|   MEM_UNDEFINED(buf, BUF_BUDDY_LOW << i);
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Check if a buddy is stamped free.
 | |
| @return whether the buddy is free */
 | |
| UNIV_INLINE MY_ATTRIBUTE((warn_unused_result))
 | |
| bool
 | |
| buf_buddy_stamp_is_free(
 | |
| /*====================*/
 | |
| 	const buf_buddy_free_t*	buf)	/*!< in: block to check */
 | |
| {
 | |
| 	compile_time_assert(BUF_BUDDY_STAMP_FREE < BUF_BUDDY_STAMP_NONFREE);
 | |
| 	return(mach_read_from_4(buf->stamp.bytes + BUF_BUDDY_STAMP_OFFSET)
 | |
| 	       == BUF_BUDDY_STAMP_FREE);
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Stamps a buddy free. */
 | |
| UNIV_INLINE
 | |
| void
 | |
| buf_buddy_stamp_free(
 | |
| /*=================*/
 | |
| 	buf_buddy_free_t*	buf,	/*!< in/out: block to stamp */
 | |
| 	ulint			i)	/*!< in: block size */
 | |
| {
 | |
| 	ut_d(memset(&buf->stamp.bytes, int(i), BUF_BUDDY_LOW << i));
 | |
| 	buf_buddy_mem_invalid(buf, i);
 | |
| 	mach_write_to_4(buf->stamp.bytes + BUF_BUDDY_STAMP_OFFSET,
 | |
| 			BUF_BUDDY_STAMP_FREE);
 | |
| 	buf->stamp.size = i;
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Stamps a buddy nonfree.
 | |
| @param[in,out]	buf	block to stamp
 | |
| @param[in]	i	block size */
 | |
| static inline void buf_buddy_stamp_nonfree(buf_buddy_free_t* buf, ulint i)
 | |
| {
 | |
| 	buf_buddy_mem_invalid(buf, i);
 | |
| 	compile_time_assert(BUF_BUDDY_STAMP_NONFREE == 0xffffffffU);
 | |
| 	memset(buf->stamp.bytes + BUF_BUDDY_STAMP_OFFSET, 0xff, 4);
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Get the offset of the buddy of a compressed page frame.
 | |
| @return the buddy relative of page */
 | |
| UNIV_INLINE
 | |
| void*
 | |
| buf_buddy_get(
 | |
| /*==========*/
 | |
| 	byte*	page,	/*!< in: compressed page */
 | |
| 	ulint	size)	/*!< in: page size in bytes */
 | |
| {
 | |
| 	ut_ad(ut_is_2pow(size));
 | |
| 	ut_ad(size >= BUF_BUDDY_LOW);
 | |
| 	ut_ad(BUF_BUDDY_LOW <= UNIV_ZIP_SIZE_MIN);
 | |
| 	ut_ad(size < BUF_BUDDY_HIGH);
 | |
| 	ut_ad(BUF_BUDDY_HIGH == srv_page_size);
 | |
| 	ut_ad(!ut_align_offset(page, size));
 | |
| 
 | |
| 	if (((ulint) page) & size) {
 | |
| 		return(page - size);
 | |
| 	} else {
 | |
| 		return(page + size);
 | |
| 	}
 | |
| }
 | |
| 
 | |
| #ifdef UNIV_DEBUG
 | |
| const buf_block_t *buf_pool_t::contains_zip(const void *data, size_t shift)
 | |
|   const noexcept
 | |
| {
 | |
|   const size_t d= size_t(data) >> shift;
 | |
| 
 | |
|   for (size_t i= 0; i < n_blocks; i++)
 | |
|   {
 | |
|     const buf_block_t *block= get_nth_page(i);
 | |
|     if (size_t(block->page.zip.data) >> shift == d)
 | |
|       return block;
 | |
|   }
 | |
|   return nullptr;
 | |
| }
 | |
| 
 | |
| /** Validate a given zip_free list. */
 | |
| struct	CheckZipFree {
 | |
| 	CheckZipFree(ulint i) : m_i(i) {}
 | |
| 
 | |
| 	void operator()(const buf_buddy_free_t* elem) const
 | |
| 	{
 | |
| 		ut_ad(buf_buddy_stamp_is_free(elem));
 | |
| 		ut_ad(elem->stamp.size <= m_i);
 | |
| 	}
 | |
| 
 | |
| 	const ulint m_i;
 | |
| };
 | |
| 
 | |
| /** Validate a buddy list.
 | |
| @param[in]	i		buddy size to validate */
 | |
| static void buf_buddy_list_validate(ulint i)
 | |
| {
 | |
| 	ut_list_validate(buf_pool.zip_free[i], CheckZipFree(i));
 | |
| }
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Debug function to validate that a buffer is indeed free i.e.: in the
 | |
| zip_free[].
 | |
| @param[in]	buf		block to check
 | |
| @param[in]	i		index of buf_pool.zip_free[]
 | |
| @return true if free */
 | |
| static bool buf_buddy_check_free(const buf_buddy_free_t* buf, ulint i)
 | |
| {
 | |
| 	const ulint	size	= BUF_BUDDY_LOW << i;
 | |
| 
 | |
| 	mysql_mutex_assert_owner(&buf_pool.mutex);
 | |
| 	ut_ad(!ut_align_offset(buf, size));
 | |
| 	ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
 | |
| 
 | |
| 	buf_buddy_free_t* itr;
 | |
| 
 | |
| 	for (itr = UT_LIST_GET_FIRST(buf_pool.zip_free[i]);
 | |
| 	     itr && itr != buf;
 | |
| 	     itr = UT_LIST_GET_NEXT(list, itr)) {
 | |
| 	}
 | |
| 
 | |
| 	return(itr == buf);
 | |
| }
 | |
| #endif /* UNIV_DEBUG */
 | |
| 
 | |
| /**********************************************************************//**
 | |
| Checks if a buf is free i.e.: in the zip_free[].
 | |
| @retval BUF_BUDDY_STATE_FREE if fully free
 | |
| @retval BUF_BUDDY_STATE_USED if currently in use
 | |
| @retval BUF_BUDDY_STATE_PARTIALLY_USED if partially in use. */
 | |
| static  MY_ATTRIBUTE((warn_unused_result))
 | |
| buf_buddy_state_t
 | |
| buf_buddy_is_free(
 | |
| /*==============*/
 | |
| 	buf_buddy_free_t*	buf,	/*!< in: block to check */
 | |
| 	ulint			i)	/*!< in: index of
 | |
| 					buf_pool.zip_free[] */
 | |
| {
 | |
| #ifdef UNIV_DEBUG
 | |
| 	const ulint	size	= BUF_BUDDY_LOW << i;
 | |
| 	ut_ad(!ut_align_offset(buf, size));
 | |
| 	ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
 | |
| #endif /* UNIV_DEBUG */
 | |
| 
 | |
| 	/* We assume that all memory from buf_buddy_alloc()
 | |
| 	is used for compressed page frames. */
 | |
| 
 | |
| 	/* We look inside the allocated objects returned by
 | |
| 	buf_buddy_alloc() and assume that each block is a compressed
 | |
| 	page that contains one of the following in space_id.
 | |
| 	* BUF_BUDDY_STAMP_FREE if the block is in a zip_free list or
 | |
| 	* BUF_BUDDY_STAMP_NONFREE if the block has been allocated but
 | |
| 	not initialized yet or
 | |
| 	* A valid space_id of a compressed tablespace
 | |
| 
 | |
| 	The call below attempts to read from free memory.  The memory
 | |
| 	is "owned" by the buddy allocator (and it has been allocated
 | |
| 	from the buffer pool), so there is nothing wrong about this. */
 | |
| 	if (!buf_buddy_stamp_is_free(buf)) {
 | |
| 		return(BUF_BUDDY_STATE_USED);
 | |
| 	}
 | |
| 
 | |
| 	/* A block may be free but a fragment of it may still be in use.
 | |
| 	To guard against that we write the free block size in terms of
 | |
| 	zip_free index at start of stamped block. Note that we can
 | |
| 	safely rely on this value only if the buf is free. */
 | |
| 	ut_ad(buf->stamp.size <= i);
 | |
| 	return(buf->stamp.size == i
 | |
| 	       ? BUF_BUDDY_STATE_FREE
 | |
| 	       : BUF_BUDDY_STATE_PARTIALLY_USED);
 | |
| }
 | |
| 
 | |
| /** Add a block to the head of the appropriate buddy free list.
 | |
| @param[in,out]	buf		block to be freed
 | |
| @param[in]	i		index of buf_pool.zip_free[] */
 | |
| static void buf_buddy_add_to_free(buf_buddy_free_t *buf, ulint i)
 | |
| {
 | |
| 	mysql_mutex_assert_owner(&buf_pool.mutex);
 | |
| 	ut_ad(buf_pool.zip_free[i].start != buf);
 | |
| 	buf_buddy_stamp_free(buf, i);
 | |
| 	UT_LIST_ADD_FIRST(buf_pool.zip_free[i], buf);
 | |
| 	ut_d(buf_buddy_list_validate(i));
 | |
| }
 | |
| 
 | |
| /** Remove a block from the appropriate buddy free list.
 | |
| @param[in,out]	buf		block to be freed
 | |
| @param[in]	i		index of buf_pool.zip_free[] */
 | |
| static void buf_buddy_remove_from_free(buf_buddy_free_t *buf, ulint i)
 | |
| {
 | |
| 	mysql_mutex_assert_owner(&buf_pool.mutex);
 | |
| 	ut_ad(buf_buddy_check_free(buf, i));
 | |
| 
 | |
| 	UT_LIST_REMOVE(buf_pool.zip_free[i], buf);
 | |
| 	buf_buddy_stamp_nonfree(buf, i);
 | |
| }
 | |
| 
 | |
| /** Try to allocate a block from buf_pool.zip_free[].
 | |
| @param[in]	i		index of buf_pool.zip_free[]
 | |
| @return allocated block, or NULL if buf_pool.zip_free[] was empty */
 | |
| static buf_buddy_free_t* buf_buddy_alloc_zip(ulint i)
 | |
| {
 | |
| 	buf_buddy_free_t*	buf;
 | |
| 
 | |
| 	mysql_mutex_assert_owner(&buf_pool.mutex);
 | |
| 	ut_a(i < BUF_BUDDY_SIZES);
 | |
| 	ut_a(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
 | |
| 
 | |
| 	ut_d(buf_buddy_list_validate(i));
 | |
| 
 | |
| 	buf = UT_LIST_GET_FIRST(buf_pool.zip_free[i]);
 | |
| 
 | |
| 	if (size_t size = buf_pool.shrinking_size()) {
 | |
| 		while (buf != NULL
 | |
| 		       && buf_pool.will_be_withdrawn(
 | |
| 			       reinterpret_cast<byte*>(buf), size)) {
 | |
| 			/* This should be withdrawn, not to be allocated */
 | |
| 			buf = UT_LIST_GET_NEXT(list, buf);
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if (buf) {
 | |
| 		buf_buddy_remove_from_free(buf, i);
 | |
| 		ut_ad(!buf_pool.contains_zip(buf, BUF_BUDDY_LOW_SHIFT + i));
 | |
| 	} else if (i + 1 < BUF_BUDDY_SIZES) {
 | |
| 		/* Attempt to split. */
 | |
| 		buf = buf_buddy_alloc_zip(i + 1);
 | |
| 
 | |
| 		if (buf) {
 | |
| 			buf_buddy_free_t* buddy =
 | |
| 				reinterpret_cast<buf_buddy_free_t*>(
 | |
| 					reinterpret_cast<byte*>(buf)
 | |
| 					+ (BUF_BUDDY_LOW << i));
 | |
| 			buf_buddy_add_to_free(buddy, i);
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if (buf) {
 | |
| 		/* Trash the page other than the BUF_BUDDY_STAMP_NONFREE. */
 | |
| 		MEM_UNDEFINED(buf, BUF_BUDDY_STAMP_OFFSET);
 | |
| 		MEM_UNDEFINED(BUF_BUDDY_STAMP_OFFSET + 4 + buf->stamp.bytes,
 | |
| 			      (BUF_BUDDY_LOW << i)
 | |
| 			      - (BUF_BUDDY_STAMP_OFFSET + 4));
 | |
| 		ut_ad(mach_read_from_4(buf->stamp.bytes
 | |
| 				       + BUF_BUDDY_STAMP_OFFSET)
 | |
| 		      == BUF_BUDDY_STAMP_NONFREE);
 | |
| 	}
 | |
| 
 | |
| 	return(buf);
 | |
| }
 | |
| 
 | |
| #ifdef UNIV_DEBUG
 | |
| /** number of blocks allocated to the buddy system */
 | |
| static size_t buf_buddy_n_frames;
 | |
| #endif
 | |
| 
 | |
| /** Deallocate a buffer frame of srv_page_size.
 | |
| @param buf    buffer frame to deallocate */
 | |
| static void buf_buddy_block_free(void *buf) noexcept
 | |
| {
 | |
|   mysql_mutex_assert_owner(&buf_pool.mutex);
 | |
|   buf_block_t *block= buf_pool.block_from(buf);
 | |
|   ut_ad(block->page.state() == buf_page_t::MEMORY);
 | |
|   ut_ad(block->page.frame == buf);
 | |
|   ut_ad(!buf_pool.contains_zip(buf, srv_page_size_shift));
 | |
|   ut_d(memset(buf, 0, srv_page_size));
 | |
|   MEM_UNDEFINED(buf, srv_page_size);
 | |
|   buf_LRU_block_free_non_file_page(block);
 | |
|   ut_ad(buf_buddy_n_frames > 0);
 | |
|   ut_d(buf_buddy_n_frames--);
 | |
| }
 | |
| 
 | |
| /** Allocate a buffer block to the buddy allocator.
 | |
| @param block   buffer block to register */
 | |
| static void buf_buddy_block_register(buf_block_t *block) noexcept
 | |
| {
 | |
|   ut_ad(buf_pool.is_uncompressed_current(block));
 | |
|   ut_ad(block->page.state() == buf_page_t::MEMORY);
 | |
|   ut_d(buf_buddy_n_frames++);
 | |
| }
 | |
| 
 | |
| /** Allocate a block from a bigger object.
 | |
| @param[in]	buf		a block that is free to use
 | |
| @param[in]	i		index of buf_pool.zip_free[]
 | |
| @return allocated block */
 | |
| static void *buf_buddy_alloc_from(void *buf, ulint i)
 | |
| {
 | |
| 	ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
 | |
| 	ut_ad(i <= BUF_BUDDY_SIZES);
 | |
| 	ut_ad(!ut_align_offset(buf, srv_page_size));
 | |
| 	ut_ad(!buf_pool.contains_zip(buf, srv_page_size));
 | |
| 
 | |
| 	/* Add the unused parts of the block to the free lists. */
 | |
| 	for (ulint j = BUF_BUDDY_SIZES, offs = srv_page_size; j-- > i; ) {
 | |
| 		buf_buddy_free_t*	zip_buf;
 | |
| 
 | |
| 		offs >>= 1;
 | |
| 
 | |
| 		zip_buf = reinterpret_cast<buf_buddy_free_t*>(
 | |
| 			reinterpret_cast<byte*>(buf) + offs);
 | |
| 		buf_buddy_add_to_free(zip_buf, j);
 | |
| 	}
 | |
| 
 | |
| 	buf_buddy_stamp_nonfree(reinterpret_cast<buf_buddy_free_t*>(buf), i);
 | |
| 	return(buf);
 | |
| }
 | |
| 
 | |
| /** Allocate a ROW_FORMAT=COMPRESSED block.
 | |
| @param i      index of buf_pool.zip_free[] or BUF_BUDDY_SIZES
 | |
| @param lru    assigned to true if buf_pool.mutex was temporarily released
 | |
| @return allocated block, never NULL */
 | |
| byte *buf_buddy_alloc_low(ulint i, bool *lru) noexcept
 | |
| {
 | |
| 	buf_block_t*	block;
 | |
| 
 | |
| 	mysql_mutex_assert_owner(&buf_pool.mutex);
 | |
| 	ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
 | |
| 
 | |
| 	if (i < BUF_BUDDY_SIZES) {
 | |
| 		/* Try to allocate from the buddy system. */
 | |
| 		block = (buf_block_t*) buf_buddy_alloc_zip(i);
 | |
| 
 | |
| 		if (block) {
 | |
| 			goto func_exit;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/* Try allocating from the buf_pool.free list. */
 | |
| 	block = buf_pool.allocate();
 | |
| 
 | |
| 	if (block) {
 | |
| 		goto alloc_big;
 | |
| 	}
 | |
| 
 | |
| 	/* Try replacing an uncompressed page in the buffer pool. */
 | |
| 	block = buf_LRU_get_free_block(have_mutex);
 | |
| 	if (lru) {
 | |
| 		*lru = true;
 | |
| 	}
 | |
| 
 | |
| alloc_big:
 | |
| 	buf_buddy_block_register(block);
 | |
| 
 | |
| 	block = reinterpret_cast<buf_block_t*>(
 | |
| 		buf_buddy_alloc_from(block->page.frame, i));
 | |
| 
 | |
| func_exit:
 | |
| 	buf_pool.buddy_stat[i].used++;
 | |
| 	return reinterpret_cast<byte*>(block);
 | |
| }
 | |
| 
 | |
| /** Try to relocate a block.
 | |
| @param[in]	src		block to relocate
 | |
| @param[in]	dst		free block to relocated to
 | |
| @param[in]	i		index of buf_pool.zip_free[]
 | |
| @param[in]	force		true if we must relocated always
 | |
| @return true if relocated */
 | |
| static bool buf_buddy_relocate(void *src, void *dst, ulint i, bool force)
 | |
|   noexcept
 | |
| {
 | |
| 	buf_page_t*	bpage;
 | |
| 	const ulint	size = BUF_BUDDY_LOW << i;
 | |
| 
 | |
| 	mysql_mutex_assert_owner(&buf_pool.mutex);
 | |
| 	ut_ad(!ut_align_offset(src, size));
 | |
| 	ut_ad(!ut_align_offset(dst, size));
 | |
| 	ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
 | |
| 	MEM_CHECK_ADDRESSABLE(dst, size);
 | |
| 
 | |
| 	uint32_t space = mach_read_from_4(static_cast<const byte*>(src)
 | |
| 					  + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
 | |
| 	uint32_t offset = mach_read_from_4(static_cast<const byte*>(src)
 | |
| 					   + FIL_PAGE_OFFSET);
 | |
| 
 | |
| 	/* Suppress Valgrind or MSAN warnings. */
 | |
| 	MEM_MAKE_DEFINED(&space, sizeof space);
 | |
| 	MEM_MAKE_DEFINED(&offset, sizeof offset);
 | |
| 
 | |
| 	ut_ad(space != BUF_BUDDY_STAMP_FREE);
 | |
| 
 | |
| 	const page_id_t	page_id(space, offset);
 | |
| 	/* FIXME: we are computing this while holding buf_pool.mutex */
 | |
| 	auto &cell= buf_pool.page_hash.cell_get(page_id.fold());
 | |
| 
 | |
| 	bpage = buf_pool.page_hash.get(page_id, cell);
 | |
| 
 | |
| 	if (!bpage || bpage->zip.data != src) {
 | |
| 		/* The block has probably been freshly
 | |
| 		allocated by buf_LRU_get_free_block() but not
 | |
| 		added to buf_pool.page_hash yet.  Obviously,
 | |
| 		it cannot be relocated. */
 | |
| 
 | |
| 		if (!force || space != 0 || offset != 0) {
 | |
| 			return(false);
 | |
| 		}
 | |
| 
 | |
| 		/* It might be just uninitialized page.
 | |
| 		We should search from LRU list also. */
 | |
| 
 | |
| 		bpage = UT_LIST_GET_FIRST(buf_pool.LRU);
 | |
| 		while (bpage != NULL) {
 | |
| 			if (bpage->zip.data == src) {
 | |
| 				ut_ad(bpage->id() == page_id);
 | |
| 				break;
 | |
| 			}
 | |
| 			bpage = UT_LIST_GET_NEXT(LRU, bpage);
 | |
| 		}
 | |
| 
 | |
| 		if (bpage == NULL) {
 | |
| 			return(false);
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if (page_zip_get_size(&bpage->zip) != size) {
 | |
| 		/* The block is of different size.  We would
 | |
| 		have to relocate all blocks covered by src.
 | |
| 		For the sake of simplicity, give up. */
 | |
| 		ut_ad(page_zip_get_size(&bpage->zip) < size);
 | |
| 		return(false);
 | |
| 	}
 | |
| 
 | |
| 	/* The block must have been allocated, but it may
 | |
| 	contain uninitialized data. */
 | |
| 	MEM_CHECK_ADDRESSABLE(src, size);
 | |
| 
 | |
| 	if (!bpage->can_relocate()) {
 | |
| 		return false;
 | |
| 	}
 | |
| 
 | |
| 	page_hash_latch &hash_lock = buf_pool.page_hash.lock_get(cell);
 | |
| 	/* It does not make sense to use transactional_lock_guard here,
 | |
| 	because the memcpy() of 1024 to 16384 bytes would likely make the
 | |
| 	memory transaction too large. */
 | |
| 	hash_lock.lock();
 | |
| 
 | |
| 	if (bpage->can_relocate()) {
 | |
| 		/* Relocate the compressed page. */
 | |
| 		const ulonglong ns = my_interval_timer();
 | |
| 
 | |
| 		ut_a(bpage->zip.data == src);
 | |
| 
 | |
| 		memcpy(dst, src, size);
 | |
| 		bpage->zip.data = reinterpret_cast<page_zip_t*>(dst);
 | |
| 
 | |
| 		hash_lock.unlock();
 | |
| 
 | |
| 		buf_buddy_mem_invalid(
 | |
| 			reinterpret_cast<buf_buddy_free_t*>(src), i);
 | |
| 
 | |
| 		buf_buddy_stat_t*	buddy_stat = &buf_pool.buddy_stat[i];
 | |
| 		buddy_stat->relocated++;
 | |
| 		buddy_stat->relocated_usec+= (my_interval_timer() - ns) / 1000;
 | |
| 		return(true);
 | |
| 	}
 | |
| 
 | |
| 	hash_lock.unlock();
 | |
| 
 | |
| 	return(false);
 | |
| }
 | |
| 
 | |
| /** Deallocate a block.
 | |
| @param[in]	buf	block to be freed, must not be pointed to
 | |
| 			by the buffer pool
 | |
| @param[in]	i	index of buf_pool.zip_free[], or BUF_BUDDY_SIZES */
 | |
| void buf_buddy_free_low(void* buf, ulint i) noexcept
 | |
| {
 | |
| 	buf_buddy_free_t*	buddy;
 | |
| 
 | |
| 	mysql_mutex_assert_owner(&buf_pool.mutex);
 | |
| 	ut_ad(i <= BUF_BUDDY_SIZES);
 | |
| 	ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
 | |
| 	ut_ad(buf_pool.buddy_stat[i].used > 0);
 | |
| 
 | |
| 	buf_pool.buddy_stat[i].used--;
 | |
| recombine:
 | |
| 	MEM_UNDEFINED(buf, BUF_BUDDY_LOW << i);
 | |
| 
 | |
| 	if (i == BUF_BUDDY_SIZES) {
 | |
| 		buf_buddy_block_free(buf);
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	ut_ad(i < BUF_BUDDY_SIZES);
 | |
| 	ut_ad(buf == ut_align_down(buf, BUF_BUDDY_LOW << i));
 | |
| 	ut_ad(!buf_pool.contains_zip(buf, BUF_BUDDY_LOW_SHIFT + i));
 | |
| 
 | |
| 	/* Do not recombine blocks if there are few free blocks.
 | |
| 	We may waste up to 15360*max_len bytes to free blocks
 | |
| 	(1024 + 2048 + 4096 + 8192 = 15360) */
 | |
| 	if (UT_LIST_GET_LEN(buf_pool.zip_free[i]) < 16) {
 | |
| 		goto func_exit;
 | |
| 	}
 | |
| 
 | |
| 	/* Try to combine adjacent blocks. */
 | |
| 	buddy = reinterpret_cast<buf_buddy_free_t*>(
 | |
| 		buf_buddy_get(reinterpret_cast<byte*>(buf),
 | |
| 			      BUF_BUDDY_LOW << i));
 | |
| 
 | |
| 	switch (buf_buddy_is_free(buddy, i)) {
 | |
| 	case BUF_BUDDY_STATE_FREE:
 | |
| 		/* The buddy is free: recombine */
 | |
| 		buf_buddy_remove_from_free(buddy, i);
 | |
| buddy_is_free:
 | |
| 		i++;
 | |
| 		buf = ut_align_down(buf, BUF_BUDDY_LOW << i);
 | |
| 		ut_ad(!buf_pool.contains_zip(buf, BUF_BUDDY_LOW_SHIFT + i));
 | |
| 		goto recombine;
 | |
| 
 | |
| 	case BUF_BUDDY_STATE_USED:
 | |
| 		ut_d(buf_buddy_list_validate(i));
 | |
| 
 | |
| 		/* The buddy is not free. Is there a free block of
 | |
| 		this size? */
 | |
| 		if (buf_buddy_free_t* zip_buf =
 | |
| 			UT_LIST_GET_FIRST(buf_pool.zip_free[i])) {
 | |
| 
 | |
| 			/* Remove the block from the free list, because
 | |
| 			a successful buf_buddy_relocate() will overwrite
 | |
| 			zip_free->list. */
 | |
| 			buf_buddy_remove_from_free(zip_buf, i);
 | |
| 
 | |
| 			/* Try to relocate the buddy of buf to the free
 | |
| 			block. */
 | |
| 			if (buf_buddy_relocate(buddy, zip_buf, i, false)) {
 | |
| 				goto buddy_is_free;
 | |
| 			}
 | |
| 
 | |
| 			buf_buddy_add_to_free(zip_buf, i);
 | |
| 		}
 | |
| 
 | |
| 		break;
 | |
| 	case BUF_BUDDY_STATE_PARTIALLY_USED:
 | |
| 		/* Some sub-blocks in the buddy are still in use.
 | |
| 		Relocation will fail. No need to try. */
 | |
| 		break;
 | |
| 	}
 | |
| 
 | |
| func_exit:
 | |
| 	/* Free the block to the buddy list. */
 | |
| 	buf_buddy_add_to_free(reinterpret_cast<buf_buddy_free_t*>(buf), i);
 | |
| }
 | |
| 
 | |
| /** Reallocate a ROW_FORMAT=COMPRESSED page frame during buf_pool_t::resize().
 | |
| @param bpage page descriptor covering a ROW_FORMAT=COMPRESSED page
 | |
| @param block uncompressed block for storage
 | |
| @return block
 | |
| @retval nullptr if the block was consumed */
 | |
| ATTRIBUTE_COLD
 | |
| buf_block_t *buf_buddy_shrink(buf_page_t *bpage, buf_block_t *block) noexcept
 | |
| {
 | |
|   ut_ad(bpage->zip.data);
 | |
| 
 | |
|   void *dst= nullptr;
 | |
|   ulint size= page_zip_get_size(&bpage->zip);
 | |
|   ulint i= buf_buddy_get_slot(size);
 | |
| 
 | |
|   ut_ad(buf_pool.will_be_withdrawn(bpage->zip.data, size));
 | |
|   ut_ad(bpage->can_relocate());
 | |
|   ut_ad(i <= BUF_BUDDY_SIZES);
 | |
|   ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
 | |
| 
 | |
|   if (UNIV_LIKELY(i < BUF_BUDDY_SIZES))
 | |
|     dst= buf_buddy_alloc_zip(i);
 | |
| 
 | |
|   if (!dst)
 | |
|   {
 | |
|     buf_buddy_block_register(block);
 | |
|     dst= buf_buddy_alloc_from(block->page.frame, i);
 | |
|     ut_ad(dst);
 | |
|     block= nullptr;
 | |
|   }
 | |
| 
 | |
|   void *src= bpage->zip.data;
 | |
|   memcpy_aligned<UNIV_ZIP_SIZE_MIN>(dst, src, size);
 | |
|   bpage->zip.data= static_cast<page_zip_t*>(dst);
 | |
|   buf_pool.buddy_stat[i].relocated++;
 | |
| 
 | |
|   for (;;)
 | |
|   {
 | |
|     MEM_UNDEFINED(src, BUF_BUDDY_LOW << i);
 | |
|     ut_ad(i < BUF_BUDDY_SIZES);
 | |
|     /* Try to combine adjacent blocks. */
 | |
|     buf_buddy_free_t *buddy= reinterpret_cast<buf_buddy_free_t*>
 | |
|       (buf_buddy_get(static_cast<byte*>(src), BUF_BUDDY_LOW << i));
 | |
| 
 | |
|     if (buf_buddy_is_free(buddy, i) != BUF_BUDDY_STATE_FREE)
 | |
|     {
 | |
|       ut_ad(!buf_pool.contains_zip(src, BUF_BUDDY_LOW_SHIFT + i));
 | |
|       buf_buddy_add_to_free(static_cast<buf_buddy_free_t*>(src), i);
 | |
|       break;
 | |
|     }
 | |
| 
 | |
|     /* The buddy is free: recombine */
 | |
|     buf_buddy_remove_from_free(buddy, i);
 | |
|     i++;
 | |
|     src= ut_align_down(src, BUF_BUDDY_LOW << i);
 | |
|     if (i == BUF_BUDDY_SIZES)
 | |
|     {
 | |
|       buf_buddy_block_free(src);
 | |
|       break;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return block;
 | |
| }
 | |
| 
 | |
| /** Combine all pairs of free buddies.
 | |
| @param size  the target innodb_buffer_pool_size */
 | |
| ATTRIBUTE_COLD void buf_buddy_condense_free(size_t size) noexcept
 | |
| {
 | |
|   ut_ad(size);
 | |
|   ut_ad(size == buf_pool.shrinking_size());
 | |
| 
 | |
|   for (ulint i= 0; i < array_elements(buf_pool.zip_free); i++)
 | |
|   {
 | |
|     buf_buddy_free_t *buf= UT_LIST_GET_FIRST(buf_pool.zip_free[i]);
 | |
| 
 | |
|     /* seek to withdraw target */
 | |
|     while (buf &&
 | |
|            !buf_pool.will_be_withdrawn(reinterpret_cast<byte*>(buf), size))
 | |
|       buf= UT_LIST_GET_NEXT(list, buf);
 | |
| 
 | |
|     for (buf_buddy_free_t *next= buf; buf; buf= next)
 | |
|     {
 | |
|       buf_buddy_free_t *buddy= reinterpret_cast<buf_buddy_free_t*>
 | |
|         (buf_buddy_get(reinterpret_cast<byte*>(buf), BUF_BUDDY_LOW << i));
 | |
| 
 | |
|       /* seek to the next withdraw target */
 | |
|       do
 | |
|       {
 | |
|         while ((next= UT_LIST_GET_NEXT(list, next)) &&
 | |
|                !buf_pool.will_be_withdrawn(reinterpret_cast<byte*>(next),
 | |
|                                            size)) {}
 | |
|       }
 | |
|       while (buddy == next);
 | |
| 
 | |
|       if (buf_buddy_is_free(buddy, i) != BUF_BUDDY_STATE_FREE)
 | |
|         continue;
 | |
| 
 | |
|       buf_buddy_remove_from_free(buf, i);
 | |
|       ulint j= i;
 | |
|     recombine:
 | |
|       buf_buddy_remove_from_free(buddy, j);
 | |
|       j++;
 | |
|       buf= static_cast<buf_buddy_free_t*>
 | |
|         (ut_align_down(buf, BUF_BUDDY_LOW << j));
 | |
|       MEM_UNDEFINED(buf, BUF_BUDDY_LOW << j);
 | |
| 
 | |
|       if (j == BUF_BUDDY_SIZES)
 | |
|       {
 | |
|         buf_buddy_block_free(buf);
 | |
|         continue;
 | |
|       }
 | |
| 
 | |
|       buddy= reinterpret_cast<buf_buddy_free_t*>
 | |
|         (buf_buddy_get(reinterpret_cast<byte*>(buf), BUF_BUDDY_LOW << j));
 | |
|       if (buf_buddy_is_free(buddy, j) == BUF_BUDDY_STATE_FREE)
 | |
|         goto recombine;
 | |
| 
 | |
|       ut_ad(!buf_pool.contains_zip(buf, BUF_BUDDY_LOW_SHIFT + j));
 | |
|       buf_buddy_add_to_free(buf, j);
 | |
|     }
 | |
|   }
 | |
| }
 | 
