branches/zip: In r988, the function buf_block_align() was enclosed

within UNIV_DEBUG. The two remaining callers in non-debug builds,
btr_search_guess_on_hash() and btr_search_validate(), were rewritten
to call buf_page_hash_get().

To implement support for a resizeable buffer pool, the function
buf_block_align() had been rewritten to perform a page hash lookup in
the buffer pool. The caller was also made responsible for holding the
buffer pool mutex.

Because the page hash lookup is expensive and it has to be done while
holding the buffer pool mutex, implement buf_block_align() by pointer
arithmetics again, and make btr_search_guess_on_hash() call it. Note
that this will have to be adjusted if the interface to the resizeable
buffer pool is actually implemented.

rb://83 approved by Heikki Tuuri, to address Issue #161.

As a deviation from the approved patch, this patch also makes
btr_search_validate() (invoked by CHECK TABLE) check that
buf_pool->page_hash is consistent with buf_block_align().
This commit is contained in:
marko 2009-01-26 20:33:20 +00:00
parent 4e96a6c8d4
commit 60cf6c98ac
8 changed files with 91 additions and 90 deletions

View file

@ -1,3 +1,15 @@
2009-01-26 The InnoDB Team
* include/buf0buf.h, include/buf0buf.ic, buf/buf0buf.c,
include/mtr0log.ic, include/row0upd.ic, mtr/mtr0mtr.c,
btr/btr0sea.c:
Implement buf_block_align() with pointer arithmetics, as it is in
the built-in InnoDB distributed with MySQL. Do not acquire the
buffer pool mutex before buf_block_align(). This removes a
scalability bottleneck in the adaptive hash index lookup. In
CHECK TABLE, check that buf_pool->page_hash is consistent with
buf_block_align().
2009-01-23 The InnoDB Team
* btr/btr0sea.c:

View file

@ -758,7 +758,6 @@ btr_search_guess_on_hash(
{
buf_block_t* block;
rec_t* rec;
const page_t* page;
ulint fold;
dulint index_id;
#ifdef notdefined
@ -809,29 +808,7 @@ btr_search_guess_on_hash(
goto failure_unlock;
}
page = page_align(rec);
{
ulint page_no = page_get_page_no(page);
ulint space_id = page_get_space_id(page);
buf_pool_mutex_enter();
block = (buf_block_t*) buf_page_hash_get(space_id, page_no);
buf_pool_mutex_exit();
}
if (UNIV_UNLIKELY(!block)
|| UNIV_UNLIKELY(buf_block_get_state(block)
!= BUF_BLOCK_FILE_PAGE)) {
/* The block is most probably being freed.
The function buf_LRU_search_and_free_block()
first removes the block from buf_pool->page_hash
by calling buf_LRU_block_remove_hashed_page().
After that, it invokes btr_search_drop_page_hash_index().
Let us pretend that the block was also removed from
the adaptive hash index. */
goto failure_unlock;
}
block = buf_block_align(rec);
if (UNIV_LIKELY(!has_search_latch)) {
@ -848,8 +825,9 @@ btr_search_guess_on_hash(
buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
}
if (UNIV_UNLIKELY(buf_block_get_state(block)
== BUF_BLOCK_REMOVE_HASH)) {
if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
ut_ad(buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH);
if (UNIV_LIKELY(!has_search_latch)) {
btr_leaf_page_release(block, latch_mode, mtr);
@ -858,7 +836,6 @@ btr_search_guess_on_hash(
goto failure;
}
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(page_rec_is_user_rec(rec));
btr_cur_position(index, rec, block, cursor);
@ -870,8 +847,8 @@ btr_search_guess_on_hash(
is positioned on. We cannot look at the next of the previous
record to determine if our guess for the cursor position is
right. */
if (UNIV_EXPECT(
ut_dulint_cmp(index_id, btr_page_get_index_id(page)), 0)
if (UNIV_EXPECT
(ut_dulint_cmp(index_id, btr_page_get_index_id(block->frame)), 0)
|| !btr_search_check_guess(cursor,
has_search_latch,
tuple, mode, mtr)) {
@ -1690,7 +1667,6 @@ btr_search_validate(void)
/*=====================*/
/* out: TRUE if ok */
{
page_t* page;
ha_node_t* node;
ulint n_page_dumps = 0;
ibool ok = TRUE;
@ -1725,28 +1701,30 @@ btr_search_validate(void)
node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node;
for (; node != NULL; node = node->next) {
const buf_block_t* block;
const buf_block_t* block
= buf_block_align(node->data);
const buf_block_t* hash_block
= buf_block_hash_get(
buf_block_get_space(block),
buf_block_get_page_no(block));
const page_t* page
= buf_block_get_frame(block);
page = page_align(node->data);
{
ulint page_no = page_get_page_no(page);
ulint space_id= page_get_space_id(page);
block = buf_block_hash_get(space_id, page_no);
}
if (UNIV_UNLIKELY(!block)) {
/* The block is most probably being freed.
The function buf_LRU_search_and_free_block()
first removes the block from
if (hash_block) {
ut_a(hash_block == block);
} else {
/* When a block is being freed,
buf_LRU_search_and_free_block() first
removes the block from
buf_pool->page_hash by calling
buf_LRU_block_remove_hashed_page().
After that, it invokes
btr_search_drop_page_hash_index().
Let us pretend that the block was also removed
from the adaptive hash index. */
continue;
btr_search_drop_page_hash_index() to
remove the block from
btr_search_sys->hash_index. */
ut_a(buf_block_get_state(block)
== BUF_BLOCK_REMOVE_HASH);
}
ut_a(!dict_index_is_ibuf(block->index));

View file

@ -1750,6 +1750,53 @@ buf_zip_decompress(
return(FALSE);
}
/***********************************************************************
Gets the block to whose frame the pointer is pointing to. */
UNIV_INTERN
buf_block_t*
buf_block_align(
/*============*/
/* out: pointer to block, never NULL */
const byte* ptr) /* in: pointer to a frame */
{
buf_chunk_t* chunk;
ulint i;
/* TODO: protect buf_pool->chunks with a mutex (it will
currently remain constant after buf_pool_init()) */
for (chunk = buf_pool->chunks, i = buf_pool->n_chunks; i--; chunk++) {
lint offs = ptr - chunk->blocks->frame;
if (UNIV_UNLIKELY(offs < 0)) {
continue;
}
offs >>= UNIV_PAGE_SIZE_SHIFT;
if (UNIV_LIKELY((ulint) offs < chunk->size)) {
buf_block_t* block = &chunk->blocks[offs];
/* The function buf_chunk_init() invokes
buf_block_init() so that block[n].frame ==
block->frame + n * UNIV_PAGE_SIZE. Check it. */
ut_ad(block->frame == page_align(ptr));
/* The space id and page number should be
stamped on the page. */
ut_ad(block->page.space
== page_get_space_id(page_align(ptr)));
ut_ad(block->page.offset
== page_get_page_no(page_align(ptr)));
return(block);
}
}
/* The block should always be found. */
ut_error;
return(NULL);
}
/************************************************************************
Find out if a buffer block was created by buf_chunk_init(). */
static

View file

@ -858,15 +858,15 @@ Gets the compressed page descriptor corresponding to an uncompressed page
if applicable. */
#define buf_block_get_page_zip(block) \
(UNIV_LIKELY_NULL((block)->page.zip.data) ? &(block)->page.zip : NULL)
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
/***********************************************************************
Gets the block to whose frame the pointer is pointing to. */
UNIV_INLINE
const buf_block_t*
UNIV_INTERN
buf_block_t*
buf_block_align(
/*============*/
/* out: pointer to block */
/* out: pointer to block, never NULL */
const byte* ptr); /* in: pointer to a frame */
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
/*************************************************************************
Gets the compressed page descriptor corresponding to an uncompressed page
if applicable. */

View file

@ -631,29 +631,6 @@ buf_block_get_zip_size(
}
#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
/***********************************************************************
Gets the block to whose frame the pointer is pointing to. */
UNIV_INLINE
const buf_block_t*
buf_block_align(
/*============*/
/* out: pointer to block */
const byte* ptr) /* in: pointer to a frame */
{
const buf_block_t* block;
ulint space_id, page_no;
ptr = (const byte*) ut_align_down(ptr, UNIV_PAGE_SIZE);
page_no = mach_read_from_4(ptr + FIL_PAGE_OFFSET);
space_id = mach_read_from_4(ptr + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
block = (const buf_block_t*) buf_page_hash_get(space_id, page_no);
ut_ad(block);
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->frame == ptr);
return(block);
}
/*************************************************************************
Gets the compressed page descriptor corresponding to an uncompressed page
if applicable. */
@ -664,11 +641,7 @@ buf_frame_get_page_zip(
/* out: compressed page descriptor, or NULL */
const byte* ptr) /* in: pointer to the page */
{
const page_zip_des_t* page_zip;
buf_pool_mutex_enter();
page_zip = buf_block_get_page_zip(buf_block_align(ptr));
buf_pool_mutex_exit();
return(page_zip);
return(buf_block_get_page_zip(buf_block_align(ptr)));
}
#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */

View file

@ -191,10 +191,8 @@ mlog_write_initial_log_record_fast(
#endif
#ifdef UNIV_DEBUG
buf_pool_mutex_enter();
/* We now assume that all x-latched pages have been modified! */
block = (buf_block_t*) buf_block_align(ptr);
buf_pool_mutex_exit();
if (!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY)) {

View file

@ -139,9 +139,7 @@ row_upd_rec_sys_fields(
ut_ad(rec_offs_validate(rec, index, offsets));
#ifdef UNIV_SYNC_DEBUG
if (!rw_lock_own(&btr_search_latch, RW_LOCK_EX)) {
buf_pool_mutex_enter();
ut_ad(!buf_block_align(rec)->is_hashed);
buf_pool_mutex_exit();
}
#endif /* UNIV_SYNC_DEBUG */

View file

@ -309,12 +309,7 @@ mtr_memo_contains_page(
const byte* ptr, /* in: pointer to buffer frame */
ulint type) /* in: type of object */
{
ibool ret;
buf_pool_mutex_enter();
ret = mtr_memo_contains(mtr, buf_block_align(ptr), type);
buf_pool_mutex_exit();
return(ret);
return(mtr_memo_contains(mtr, buf_block_align(ptr), type));
}
/*************************************************************