/****************************************************** Binary buddy allocator for compressed pages (c) 2006 Innobase Oy Created December 2006 by Marko Makela *******************************************************/ #define THIS_MODULE #include "buf0buddy.h" #ifdef UNIV_NONINL # include "buf0buddy.ic" #endif #undef THIS_MODULE #include "buf0buf.h" #include "buf0lru.h" #include "buf0flu.h" #include "page0zip.h" /************************************************************************** Get the offset of the buddy of a compressed page frame. */ UNIV_INLINE byte* buf_buddy_get( /*==========*/ /* out: the buddy relative of page */ byte* page, /* in: compressed page */ ulint size) /* in: page size in bytes */ { ut_ad(ut_is_2pow(size)); ut_ad(size >= BUF_BUDDY_LOW); ut_ad(size < BUF_BUDDY_HIGH); ut_ad(!ut_align_offset(page, size)); if (((ulint) page) & size) { return(page - size); } else { return(page + size); } } /************************************************************************** Try to allocate a block from buf_pool->zip_free[]. */ static void* buf_buddy_alloc_zip( /*================*/ /* out: allocated block, or NULL if buf_pool->zip_free[] was empty */ ulint i) /* in: index of buf_pool->zip_free[] */ { buf_page_t* bpage; #ifdef UNIV_SYNC_DEBUG ut_a(mutex_own(&buf_pool->mutex)); #endif /* UNIV_SYNC_DEBUG */ ut_a(i < BUF_BUDDY_SIZES); ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i])); bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); if (bpage) { ut_a(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE); UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage); } else if (i + 1 < BUF_BUDDY_SIZES) { /* Attempt to split. */ bpage = buf_buddy_alloc_zip(i + 1); if (bpage) { buf_page_t* buddy = (buf_page_t*) (((char*) bpage) + (BUF_BUDDY_LOW << i)); ut_ad(!buf_pool_contains_zip(buddy)); ut_d(memset(buddy, i, BUF_BUDDY_LOW << i)); buddy->state = BUF_BLOCK_ZIP_FREE; ut_ad(buf_pool->zip_free[i].start != buddy); UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], buddy); } } #ifdef UNIV_DEBUG if (bpage) { memset(bpage, ~i, BUF_BUDDY_LOW << i); } #endif /* UNIV_DEBUG */ return(bpage); } /************************************************************************** Deallocate a buffer frame of UNIV_PAGE_SIZE. */ static void buf_buddy_block_free( /*=================*/ void* buf) /* in: buffer frame to deallocate */ { ulint fold = (ulint) buf / UNIV_PAGE_SIZE; buf_page_t* bpage; buf_block_t* block; #ifdef UNIV_SYNC_DEBUG ut_a(mutex_own(&buf_pool->mutex)); #endif /* UNIV_SYNC_DEBUG */ ut_a(buf == ut_align_down(buf, UNIV_PAGE_SIZE)); HASH_SEARCH(hash, buf_pool->zip_hash, fold, bpage, ((buf_block_t*) bpage)->frame == buf); ut_a(bpage); ut_a(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY); HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage); ut_d(memset(buf, 0, UNIV_PAGE_SIZE)); block = (buf_block_t*) bpage; mutex_enter(&block->mutex); buf_LRU_block_free_non_file_page(block); mutex_exit(&block->mutex); } /************************************************************************** Allocate a buffer block to the buddy allocator. */ static void buf_buddy_block_register( /*=====================*/ buf_block_t* block) /* in: buffer frame to allocate */ { ulint fold; #ifdef UNIV_SYNC_DEBUG ut_a(mutex_own(&buf_pool->mutex)); #endif /* UNIV_SYNC_DEBUG */ buf_block_set_state(block, BUF_BLOCK_MEMORY); ut_a(block->frame); ut_a(block->frame == ut_align_down(block->frame, UNIV_PAGE_SIZE)); fold = (ulint) block->frame / UNIV_PAGE_SIZE; HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page); } /************************************************************************** Allocate a block from a bigger object. */ static void* buf_buddy_alloc_from( /*=================*/ /* out: allocated block */ void* buf, /* in: a block that is free to use */ ulint i, /* in: index of buf_pool->zip_free[] */ ulint j) /* in: size of buf as an index of buf_pool->zip_free[] */ { ulint offs = BUF_BUDDY_LOW << j; /* Add the unused parts of the block to the free lists. */ while (j > i) { buf_page_t* bpage; offs >>= 1; j--; bpage = (buf_page_t*) ((byte*) buf + offs); ut_d(memset(bpage, j, BUF_BUDDY_LOW << j)); bpage->state = BUF_BLOCK_ZIP_FREE; ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[j])); ut_ad(buf_pool->zip_free[j].start != bpage); UT_LIST_ADD_FIRST(list, buf_pool->zip_free[j], bpage); } return(buf); } /************************************************************************** Try to allocate a block by freeing an unmodified page. */ static void* buf_buddy_alloc_clean( /*==================*/ /* out: allocated block, or NULL */ ulint i) /* in: index of buf_pool->zip_free[] */ { buf_page_t* bpage; #ifdef UNIV_SYNC_DEBUG ut_a(mutex_own(&buf_pool->mutex)); #endif /* UNIV_SYNC_DEBUG */ if (BUF_BUDDY_LOW << i >= PAGE_ZIP_MIN_SIZE && i < BUF_BUDDY_SIZES) { /* Try to find a clean compressed-only page of the same size. */ page_zip_des_t dummy_zip; ulint j; page_zip_set_size(&dummy_zip, BUF_BUDDY_LOW << i); j = ut_min(UT_LIST_GET_LEN(buf_pool->zip_clean), 100); bpage = UT_LIST_GET_FIRST(buf_pool->zip_clean); mutex_enter(&buf_pool->zip_mutex); for (; j--; bpage = UT_LIST_GET_NEXT(list, bpage)) { if (bpage->zip.ssize != dummy_zip.ssize || !buf_LRU_free_block(bpage, FALSE)) { continue; } /* Reuse the block. In case the block was recombined by buf_buddy_free(), we invoke the buddy allocator instead of using the block directly. Yes, bpage points to freed memory here, but it cannot be used by other threads, because when invoked on compressed-only pages, buf_LRU_free_block() does not release buf_pool->mutex. */ mutex_exit(&buf_pool->zip_mutex); bpage = buf_buddy_alloc_zip(i); ut_a(bpage); return(bpage); } mutex_exit(&buf_pool->zip_mutex); } /* Free blocks from the end of the LRU list until enough space is available. */ for (bpage = UT_LIST_GET_LAST(buf_pool->LRU); bpage; bpage = UT_LIST_GET_PREV(LRU, bpage)) { void* ret; mutex_t* block_mutex = buf_page_get_mutex(bpage); if (!buf_page_in_file(bpage)) { /* This is most likely BUF_BLOCK_REMOVE_HASH, that is, the block is already being freed. */ continue; } mutex_enter(block_mutex); /* Keep the compressed pages of uncompressed blocks. */ if (!buf_LRU_free_block(bpage, FALSE)) { mutex_exit(block_mutex); continue; } mutex_exit(block_mutex); if (i < BUF_BUDDY_SIZES) { ret = buf_buddy_alloc_zip(i); if (ret) { return(ret); } } else { buf_block_t* block = buf_LRU_get_free_only(); if (block) { buf_buddy_block_register(block); return(block->frame); } } } return(NULL); } /************************************************************************** Allocate a block. */ void* buf_buddy_alloc_low( /*================*/ /* out: allocated block, or NULL if buf_pool->zip_free[] was empty */ ulint i, /* in: index of buf_pool->zip_free[], or BUF_BUDDY_SIZES */ ibool lru) /* in: TRUE=allocate from the LRU list if needed */ { buf_block_t* block; #ifdef UNIV_SYNC_DEBUG ut_a(mutex_own(&buf_pool->mutex)); #endif /* UNIV_SYNC_DEBUG */ if (i < BUF_BUDDY_SIZES) { /* Try to allocate from the buddy system. */ block = buf_buddy_alloc_zip(i); if (block) { return(block); } } /* Try allocating from the buf_pool->free list. */ block = buf_LRU_get_free_only(); if (block) { goto alloc_big; } if (!lru) { return(NULL); } /* Try replacing a clean page in the buffer pool. */ block = buf_buddy_alloc_clean(i); if (block) { return(block); } /* Try replacing an uncompressed page in the buffer pool. */ mutex_exit(&buf_pool->mutex); block = buf_LRU_get_free_block(0); mutex_enter(&buf_pool->mutex); alloc_big: buf_buddy_block_register(block); return(buf_buddy_alloc_from(block->frame, i, BUF_BUDDY_SIZES)); } /************************************************************************** Try to relocate a block. */ static ibool buf_buddy_relocate( /*===============*/ /* out: TRUE if relocated */ const void* src, /* in: block to relocate */ void* dst, /* in: free block to relocate to */ ulint i) /* in: index of buf_pool->zip_free[] */ { buf_page_t* bpage; const ulint size = BUF_BUDDY_LOW << i; #ifdef UNIV_SYNC_DEBUG ut_a(mutex_own(&buf_pool->mutex)); #endif /* UNIV_SYNC_DEBUG */ ut_ad(!ut_align_offset(src, size)); ut_ad(!ut_align_offset(dst, size)); /* We assume that all memory from buf_buddy_alloc() is used for either compressed pages or buf_page_t objects covering compressed pages. */ if (size >= PAGE_ZIP_MIN_SIZE) { /* This is a compressed page. */ mutex_t* mutex; bpage = buf_page_hash_get( mach_read_from_4(src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID), mach_read_from_4(src + FIL_PAGE_OFFSET)); if (!bpage || bpage->zip.data != src) { /* The block has probably been freshly allocated by buf_LRU_get_free_block() but not added to buf_pool->page_hash yet. Obviously, it cannot be relocated. */ return(FALSE); } if (page_zip_get_size(&bpage->zip) != size) { /* The block is of different size. We would have to relocate all blocks covered by src. For the sake of simplicity, give up. */ ut_ad(page_zip_get_size(&bpage->zip) < size); return(FALSE); } mutex = buf_page_get_mutex(bpage); mutex_enter(mutex); if (buf_flush_ready_for_replace(bpage)) { switch (buf_page_get_state(bpage)) { case BUF_BLOCK_ZIP_FREE: case BUF_BLOCK_NOT_USED: case BUF_BLOCK_READY_FOR_USE: case BUF_BLOCK_MEMORY: case BUF_BLOCK_REMOVE_HASH: ut_error; break; case BUF_BLOCK_ZIP_PAGE: case BUF_BLOCK_ZIP_DIRTY: case BUF_BLOCK_FILE_PAGE: /* Relocate the compressed page. */ ut_a(bpage->zip.data == src); memcpy(dst, src, size); bpage->zip.data = dst; mutex_exit(mutex); return(TRUE); } } mutex_exit(mutex); } else { /* This must be a buf_page_t object. */ bpage = (buf_page_t*) src; switch (buf_page_get_state(bpage)) { case BUF_BLOCK_ZIP_FREE: case BUF_BLOCK_NOT_USED: case BUF_BLOCK_READY_FOR_USE: case BUF_BLOCK_FILE_PAGE: case BUF_BLOCK_MEMORY: case BUF_BLOCK_REMOVE_HASH: ut_error; break; case BUF_BLOCK_ZIP_DIRTY: /* Cannot relocate dirty pages. */ break; case BUF_BLOCK_ZIP_PAGE: mutex_enter(&buf_pool->zip_mutex); if (buf_flush_ready_for_replace(bpage)) { buf_page_t* dpage = (buf_page_t*) dst; buf_page_t* b; memcpy(dpage, bpage, size); buf_relocate(bpage, dpage); /* relocate buf_pool->zip_clean */ b = UT_LIST_GET_PREV(list, bpage); UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage); if (b) { UT_LIST_INSERT_AFTER( list, buf_pool->zip_clean, b, dpage); } else { UT_LIST_ADD_FIRST( list, buf_pool->zip_clean, dpage); } } mutex_exit(&buf_pool->zip_mutex); return(TRUE); } } return(FALSE); } /************************************************************************** Deallocate a block. */ void buf_buddy_free_low( /*===============*/ void* buf, /* in: block to be freed, must not be pointed to by the buffer pool */ ulint i) /* in: index of buf_pool->zip_free[] */ { buf_page_t* bpage; buf_page_t* buddy; #ifdef UNIV_SYNC_DEBUG ut_a(mutex_own(&buf_pool->mutex)); #endif /* UNIV_SYNC_DEBUG */ recombine: if (i == BUF_BUDDY_SIZES) { buf_buddy_block_free(buf); return; } ut_ad(i < BUF_BUDDY_SIZES); ut_ad(buf == ut_align_down(buf, BUF_BUDDY_LOW << i)); ut_ad(!buf_pool_contains_zip(buf)); /* Try to combine adjacent blocks. */ buddy = (buf_page_t*) buf_buddy_get(((byte*) buf), BUF_BUDDY_LOW << i); if (buddy->state != BUF_BLOCK_ZIP_FREE) { goto buddy_nonfree; } /* The field buddy->state can only be trusted for free blocks. If buddy->state == BUF_BLOCK_ZIP_FREE, the block is free if it is in the free list. */ for (bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); bpage; bpage = UT_LIST_GET_NEXT(list, bpage)) { ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE); if (bpage == buddy) { buddy_free: /* The buddy is free: recombine */ UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage); buddy_free2: ut_ad(!buf_pool_contains_zip(buddy)); i++; buf = ut_align_down(buf, BUF_BUDDY_LOW << i); goto recombine; } ut_a(bpage != buf); } buddy_nonfree: ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i])); /* The buddy is not free. Is there a free block of this size? */ bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); if (bpage) { /* Remove the block from the free list, because a successful buf_buddy_relocate() will overwrite bpage->list. */ UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage); /* Try to relocate the buddy of buf to the free block. */ if (buf_buddy_relocate(buddy, bpage, i)) { goto buddy_free2; } UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage); /* Try to relocate the buddy of the free block to buf. */ buddy = (buf_page_t*) buf_buddy_get(((byte*) bpage), BUF_BUDDY_LOW << i); #ifdef UNIV_DEBUG { const buf_page_t* b; /* The buddy must not be free, because we always recombine adjacent free blocks. */ for (b = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); b; b = UT_LIST_GET_NEXT(list, b)) { ut_a(b != buddy); } } #endif /* UNIV_DEBUG */ if (buf_buddy_relocate(buddy, buf, i)) { buf = bpage; goto buddy_free; } } /* Free the block to the buddy list. */ bpage = buf; ut_d(memset(bpage, i, BUF_BUDDY_LOW << i)); bpage->state = BUF_BLOCK_ZIP_FREE; ut_ad(buf_pool->zip_free[i].start != bpage); UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage); }