mirror of
https://github.com/MariaDB/server.git
synced 2025-01-21 22:34:18 +01:00
branches/zip: Do not decompress blocks in buf_page_init_for_read(),
but in buf_page_get_gen(). Adjust comments. This prevents unnecessary decompression in read-ahead when the compressed block exists in the buffer pool. If the block does not exist in the buffer pool, read-ahead will still allocate an uncompressed page and decompress the block. Move buf_block_init_low() and buf_zip_decompress() earlier in the file, because some compilers are unable to inline functions that are defined after the invocation. buf_page_get_gen(): Decompress the block if needed. buf_page_init_for_read(): Do not decompress.
This commit is contained in:
parent
8edd94a767
commit
329c469e70
3 changed files with 231 additions and 209 deletions
436
buf/buf0buf.c
436
buf/buf0buf.c
|
@ -1595,6 +1595,89 @@ lookup:
|
|||
return(bpage);
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
Initialize some fields of a control block. */
|
||||
UNIV_INLINE
|
||||
void
|
||||
buf_block_init_low(
|
||||
/*===============*/
|
||||
buf_block_t* block) /* in: block to init */
|
||||
{
|
||||
block->check_index_page_at_flush = FALSE;
|
||||
block->index = NULL;
|
||||
|
||||
block->n_hash_helps = 0;
|
||||
block->is_hashed = FALSE;
|
||||
block->n_fields = 1;
|
||||
block->n_bytes = 0;
|
||||
block->left_side = TRUE;
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
Decompress a block. */
|
||||
static
|
||||
ibool
|
||||
buf_zip_decompress(
|
||||
/*===============*/
|
||||
/* out: TRUE if successful */
|
||||
buf_block_t* block, /* in/out: block */
|
||||
ibool check) /* in: TRUE=verify the page checksum */
|
||||
{
|
||||
const byte* frame = block->page.zip.data;
|
||||
|
||||
ut_ad(buf_block_get_zip_size(block));
|
||||
ut_a(buf_block_get_space(block) != 0);
|
||||
|
||||
if (UNIV_LIKELY(check)) {
|
||||
ulint stamp_checksum = mach_read_from_4(
|
||||
frame + FIL_PAGE_SPACE_OR_CHKSUM);
|
||||
ulint calc_checksum = page_zip_calc_checksum(
|
||||
frame, page_zip_get_size(&block->page.zip));
|
||||
|
||||
if (UNIV_UNLIKELY(stamp_checksum != calc_checksum)) {
|
||||
ut_print_timestamp(stderr);
|
||||
fprintf(stderr,
|
||||
" InnoDB: compressed page checksum mismatch"
|
||||
" (space %u page %u): %lu != %lu\n",
|
||||
block->page.space, block->page.offset,
|
||||
stamp_checksum, calc_checksum);
|
||||
return(FALSE);
|
||||
}
|
||||
}
|
||||
|
||||
switch (fil_page_get_type(frame)) {
|
||||
case FIL_PAGE_INDEX:
|
||||
if (page_zip_decompress(&block->page.zip,
|
||||
block->frame)) {
|
||||
return(TRUE);
|
||||
}
|
||||
|
||||
fprintf(stderr,
|
||||
"InnoDB: unable to decompress space %lu page %lu\n",
|
||||
(ulong) block->page.space,
|
||||
(ulong) block->page.offset);
|
||||
return(FALSE);
|
||||
|
||||
case FIL_PAGE_TYPE_ALLOCATED:
|
||||
case FIL_PAGE_INODE:
|
||||
case FIL_PAGE_IBUF_BITMAP:
|
||||
case FIL_PAGE_TYPE_FSP_HDR:
|
||||
case FIL_PAGE_TYPE_XDES:
|
||||
case FIL_PAGE_TYPE_ZBLOB:
|
||||
/* Copy to uncompressed storage. */
|
||||
memcpy(block->frame, frame,
|
||||
buf_block_get_zip_size(block));
|
||||
return(TRUE);
|
||||
}
|
||||
|
||||
ut_print_timestamp(stderr);
|
||||
fprintf(stderr,
|
||||
" InnoDB: unknown compressed page"
|
||||
" type %lu\n",
|
||||
fil_page_get_type(frame));
|
||||
return(FALSE);
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
This is the general function used to get access to a database page. */
|
||||
|
||||
|
@ -1633,12 +1716,10 @@ buf_page_get_gen(
|
|||
#endif
|
||||
buf_pool->n_page_gets++;
|
||||
loop:
|
||||
block = NULL;
|
||||
block = guess;
|
||||
mutex_enter_fast(&(buf_pool->mutex));
|
||||
|
||||
if (guess) {
|
||||
block = guess;
|
||||
|
||||
if (block) {
|
||||
if (offset != block->page.offset
|
||||
|| space != block->page.space
|
||||
|| !buf_page_in_file(&block->page)) {
|
||||
|
@ -1651,15 +1732,13 @@ loop:
|
|||
block = (buf_block_t*) buf_page_hash_get(space, offset);
|
||||
}
|
||||
|
||||
if (block == NULL
|
||||
|| UNIV_UNLIKELY(buf_block_get_state(block)
|
||||
!= BUF_BLOCK_FILE_PAGE)) {
|
||||
/* Uncompressed page not in buf_pool: needs to be
|
||||
decompressed or read from file */
|
||||
loop2:
|
||||
if (block == NULL) {
|
||||
/* Page not in buf_pool: needs to be read from file */
|
||||
|
||||
mutex_exit(&(buf_pool->mutex));
|
||||
|
||||
if (!block && mode == BUF_GET_IF_IN_POOL) {
|
||||
if (mode == BUF_GET_IF_IN_POOL) {
|
||||
|
||||
return(NULL);
|
||||
}
|
||||
|
@ -1672,20 +1751,141 @@ loop:
|
|||
goto loop;
|
||||
}
|
||||
|
||||
mutex_enter(&block->mutex);
|
||||
|
||||
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
|
||||
ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
|
||||
|
||||
must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
|
||||
|
||||
if (must_read && mode == BUF_GET_IF_IN_POOL) {
|
||||
/* The page is only being read to buffer */
|
||||
mutex_exit(&buf_pool->mutex);
|
||||
mutex_exit(&block->mutex);
|
||||
|
||||
return(NULL);
|
||||
}
|
||||
|
||||
switch (buf_block_get_state(block)) {
|
||||
buf_page_t* bpage;
|
||||
|
||||
case BUF_BLOCK_FILE_PAGE:
|
||||
break;
|
||||
|
||||
case BUF_BLOCK_ZIP_PAGE:
|
||||
case BUF_BLOCK_ZIP_DIRTY:
|
||||
bpage = &block->page;
|
||||
|
||||
if (bpage->buf_fix_count
|
||||
|| buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
|
||||
|
||||
/* The block is buffer-fixed or I/O-fixed.
|
||||
Try again later. */
|
||||
mutex_exit(&buf_pool->mutex);
|
||||
os_thread_sleep(WAIT_FOR_READ);
|
||||
|
||||
goto loop;
|
||||
}
|
||||
|
||||
/* Allocate an uncompressed page. */
|
||||
mutex_exit(&buf_pool->mutex);
|
||||
|
||||
block = buf_LRU_get_free_block(0);
|
||||
ut_a(block);
|
||||
|
||||
mutex_enter(&buf_pool->mutex);
|
||||
mutex_enter(&block->mutex);
|
||||
|
||||
{
|
||||
buf_page_t* hash_bpage
|
||||
= buf_page_hash_get(space, offset);
|
||||
|
||||
if (UNIV_UNLIKELY(bpage != hash_bpage)) {
|
||||
/* The buf_pool->page_hash was modified
|
||||
while buf_pool->mutex was released.
|
||||
Free the block that was allocated. */
|
||||
|
||||
buf_LRU_block_free_non_file_page(block);
|
||||
mutex_exit(&block->mutex);
|
||||
|
||||
block = (buf_block_t*) hash_bpage;
|
||||
goto loop2;
|
||||
}
|
||||
}
|
||||
|
||||
/* Move the compressed page from bpage to block,
|
||||
and uncompress it. */
|
||||
|
||||
mutex_enter(&buf_pool->zip_mutex);
|
||||
|
||||
buf_relocate(bpage, &block->page);
|
||||
buf_block_init_low(block);
|
||||
block->lock_hash_val = lock_rec_hash(space, offset);
|
||||
|
||||
UNIV_MEM_DESC(bpage->zip.data,
|
||||
page_zip_get_size(&bpage->zip), block);
|
||||
|
||||
if (buf_page_get_state(&block->page)
|
||||
== BUF_BLOCK_ZIP_PAGE) {
|
||||
UT_LIST_REMOVE(list, buf_pool->zip_clean,
|
||||
&block->page);
|
||||
ut_ad(!block->page.in_flush_list);
|
||||
} else {
|
||||
/* Relocate buf_pool->flush_list. */
|
||||
buf_page_t* b;
|
||||
|
||||
b = UT_LIST_GET_PREV(list, &block->page);
|
||||
ut_ad(block->page.in_flush_list);
|
||||
UT_LIST_REMOVE(list, buf_pool->flush_list,
|
||||
&block->page);
|
||||
|
||||
if (b) {
|
||||
UT_LIST_INSERT_AFTER(
|
||||
list, buf_pool->flush_list, b,
|
||||
&block->page);
|
||||
} else {
|
||||
UT_LIST_ADD_FIRST(
|
||||
list, buf_pool->flush_list,
|
||||
&block->page);
|
||||
}
|
||||
}
|
||||
|
||||
/* Buffer-fix, I/O-fix, and X-latch the block
|
||||
for the duration of the decompression. */
|
||||
block->page.state = BUF_BLOCK_FILE_PAGE;
|
||||
block->page.buf_fix_count = 1;
|
||||
buf_block_set_io_fix(block, BUF_IO_READ);
|
||||
rw_lock_x_lock(&block->lock);
|
||||
mutex_exit(&block->mutex);
|
||||
mutex_exit(&buf_pool->zip_mutex);
|
||||
|
||||
buf_buddy_free(bpage, sizeof *bpage);
|
||||
|
||||
mutex_exit(&buf_pool->mutex);
|
||||
|
||||
/* Decompress the page while not holding
|
||||
buf_pool->mutex or block->mutex. */
|
||||
buf_zip_decompress(block, srv_use_checksums);
|
||||
|
||||
/* Unfix and unlatch the block. */
|
||||
mutex_enter(&buf_pool->mutex);
|
||||
mutex_enter(&block->mutex);
|
||||
block->page.buf_fix_count--;
|
||||
buf_block_set_io_fix(block, BUF_IO_NONE);
|
||||
mutex_exit(&block->mutex);
|
||||
rw_lock_x_unlock(&block->lock);
|
||||
|
||||
break;
|
||||
|
||||
case BUF_BLOCK_ZIP_FREE:
|
||||
case BUF_BLOCK_NOT_USED:
|
||||
case BUF_BLOCK_READY_FOR_USE:
|
||||
case BUF_BLOCK_MEMORY:
|
||||
case BUF_BLOCK_REMOVE_HASH:
|
||||
ut_error;
|
||||
break;
|
||||
}
|
||||
|
||||
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
|
||||
|
||||
mutex_enter(&block->mutex);
|
||||
|
||||
buf_block_buf_fix_inc(block, file, line);
|
||||
mutex_exit(&buf_pool->mutex);
|
||||
|
||||
|
@ -1998,24 +2198,6 @@ buf_page_get_known_nowait(
|
|||
return(TRUE);
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
Initialize some fields of a control block. */
|
||||
UNIV_INLINE
|
||||
void
|
||||
buf_block_init_low(
|
||||
/*===============*/
|
||||
buf_block_t* block) /* in: block to init */
|
||||
{
|
||||
block->check_index_page_at_flush = FALSE;
|
||||
block->index = NULL;
|
||||
|
||||
block->n_hash_helps = 0;
|
||||
block->is_hashed = FALSE;
|
||||
block->n_fields = 1;
|
||||
block->n_bytes = 0;
|
||||
block->left_side = TRUE;
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
Initialize some fields of a control block. */
|
||||
UNIV_INLINE
|
||||
|
@ -2133,71 +2315,6 @@ buf_page_init(
|
|||
buf_page_address_fold(space, offset), &block->page);
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
Decompress a block. */
|
||||
static
|
||||
ibool
|
||||
buf_zip_decompress(
|
||||
/*===============*/
|
||||
/* out: TRUE if successful */
|
||||
buf_block_t* block, /* in/out: block */
|
||||
ibool check) /* in: TRUE=verify the page checksum */
|
||||
{
|
||||
const byte* frame = block->page.zip.data;
|
||||
|
||||
ut_ad(buf_block_get_zip_size(block));
|
||||
ut_a(buf_block_get_space(block) != 0);
|
||||
|
||||
if (UNIV_LIKELY(check)) {
|
||||
ulint stamp_checksum = mach_read_from_4(
|
||||
frame + FIL_PAGE_SPACE_OR_CHKSUM);
|
||||
ulint calc_checksum = page_zip_calc_checksum(
|
||||
frame, page_zip_get_size(&block->page.zip));
|
||||
|
||||
if (UNIV_UNLIKELY(stamp_checksum != calc_checksum)) {
|
||||
ut_print_timestamp(stderr);
|
||||
fprintf(stderr,
|
||||
" InnoDB: compressed page checksum mismatch"
|
||||
" (space %u page %u): %lu != %lu\n",
|
||||
block->page.space, block->page.offset,
|
||||
stamp_checksum, calc_checksum);
|
||||
return(FALSE);
|
||||
}
|
||||
}
|
||||
|
||||
switch (fil_page_get_type(frame)) {
|
||||
case FIL_PAGE_INDEX:
|
||||
if (page_zip_decompress(&block->page.zip,
|
||||
block->frame)) {
|
||||
return(TRUE);
|
||||
}
|
||||
|
||||
fprintf(stderr,
|
||||
"InnoDB: unable to decompress space %lu page %lu\n",
|
||||
(ulong) block->page.space,
|
||||
(ulong) block->page.offset);
|
||||
return(FALSE);
|
||||
|
||||
case FIL_PAGE_TYPE_ALLOCATED:
|
||||
case FIL_PAGE_INODE:
|
||||
case FIL_PAGE_IBUF_BITMAP:
|
||||
case FIL_PAGE_TYPE_FSP_HDR:
|
||||
case FIL_PAGE_TYPE_XDES:
|
||||
case FIL_PAGE_TYPE_ZBLOB:
|
||||
/* Copy to uncompressed storage. */
|
||||
memcpy(block->frame, frame,
|
||||
buf_block_get_zip_size(block));
|
||||
return(TRUE);
|
||||
}
|
||||
|
||||
ut_print_timestamp(stderr);
|
||||
fprintf(stderr,
|
||||
" InnoDB: unknown compressed page"
|
||||
" type %lu\n",
|
||||
fil_page_get_type(frame));
|
||||
return(FALSE);
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
Function which inits a page for read to the buffer buf_pool. If the page is
|
||||
(1) already in buf_pool, or
|
||||
|
@ -2224,7 +2341,6 @@ buf_page_init_for_read(
|
|||
ulint offset) /* in: page number */
|
||||
{
|
||||
buf_block_t* block;
|
||||
buf_page_t* bpage;
|
||||
mtr_t mtr;
|
||||
|
||||
ut_ad(buf_pool);
|
||||
|
@ -2256,115 +2372,12 @@ buf_page_init_for_read(
|
|||
mutex_enter(&(buf_pool->mutex));
|
||||
mutex_enter(&block->mutex);
|
||||
|
||||
if (fil_tablespace_deleted_or_being_deleted_in_mem(
|
||||
space, tablespace_version)) {
|
||||
*err = DB_TABLESPACE_DELETED;
|
||||
|
||||
goto err_exit;
|
||||
}
|
||||
|
||||
bpage = buf_page_hash_get(space, offset);
|
||||
|
||||
if (UNIV_LIKELY_NULL(bpage)) {
|
||||
|
||||
switch (buf_page_get_state(bpage)) {
|
||||
case BUF_BLOCK_ZIP_PAGE:
|
||||
case BUF_BLOCK_ZIP_DIRTY:
|
||||
ut_a(page_zip_get_size(&bpage->zip) == zip_size);
|
||||
|
||||
if (bpage->buf_fix_count
|
||||
|| buf_page_get_io_fix(bpage)
|
||||
!= BUF_IO_NONE) {
|
||||
|
||||
goto err_exit;
|
||||
}
|
||||
|
||||
/* Move the compressed page from bpage to block,
|
||||
and uncompress it. */
|
||||
|
||||
mutex_enter(&buf_pool->zip_mutex);
|
||||
|
||||
buf_relocate(bpage, &block->page);
|
||||
buf_block_init_low(block);
|
||||
block->lock_hash_val = lock_rec_hash(space, offset);
|
||||
|
||||
UNIV_MEM_DESC(bpage->zip.data, zip_size, block);
|
||||
|
||||
if (buf_page_get_state(&block->page)
|
||||
== BUF_BLOCK_ZIP_PAGE) {
|
||||
UT_LIST_REMOVE(list, buf_pool->zip_clean,
|
||||
&block->page);
|
||||
ut_ad(!block->page.in_flush_list);
|
||||
} else {
|
||||
/* Relocate buf_pool->flush_list. */
|
||||
buf_page_t* b;
|
||||
|
||||
b = UT_LIST_GET_PREV(list, &block->page);
|
||||
ut_ad(block->page.in_flush_list);
|
||||
UT_LIST_REMOVE(list, buf_pool->flush_list,
|
||||
&block->page);
|
||||
|
||||
if (b) {
|
||||
UT_LIST_INSERT_AFTER(
|
||||
list, buf_pool->flush_list, b,
|
||||
&block->page);
|
||||
} else {
|
||||
UT_LIST_ADD_FIRST(
|
||||
list, buf_pool->flush_list,
|
||||
&block->page);
|
||||
}
|
||||
}
|
||||
|
||||
/* Buffer-fix, I/O-fix, and X-latch the block
|
||||
for the duration of the decompression. */
|
||||
block->page.state = BUF_BLOCK_FILE_PAGE;
|
||||
block->page.buf_fix_count = 1;
|
||||
buf_block_set_io_fix(block, BUF_IO_READ);
|
||||
rw_lock_x_lock(&block->lock);
|
||||
mutex_exit(&block->mutex);
|
||||
mutex_exit(&buf_pool->zip_mutex);
|
||||
|
||||
buf_buddy_free(bpage, sizeof *bpage);
|
||||
|
||||
mutex_exit(&buf_pool->mutex);
|
||||
|
||||
if (mode == BUF_READ_IBUF_PAGES_ONLY) {
|
||||
|
||||
mtr_commit(&mtr);
|
||||
}
|
||||
|
||||
/* Decompress the page while not holding
|
||||
buf_pool->mutex or block->mutex. */
|
||||
buf_zip_decompress(block, srv_use_checksums);
|
||||
|
||||
/* Unfix and unlatch the block. */
|
||||
mutex_enter(&buf_pool->mutex);
|
||||
mutex_enter(&block->mutex);
|
||||
block->page.buf_fix_count--;
|
||||
buf_block_set_io_fix(block, BUF_IO_NONE);
|
||||
mutex_exit(&buf_pool->mutex);
|
||||
mutex_exit(&block->mutex);
|
||||
rw_lock_x_unlock(&block->lock);
|
||||
|
||||
return(NULL);
|
||||
case BUF_BLOCK_FILE_PAGE:
|
||||
break;
|
||||
case BUF_BLOCK_ZIP_FREE:
|
||||
case BUF_BLOCK_NOT_USED:
|
||||
case BUF_BLOCK_READY_FOR_USE:
|
||||
case BUF_BLOCK_MEMORY:
|
||||
case BUF_BLOCK_REMOVE_HASH:
|
||||
ut_error;
|
||||
break;
|
||||
}
|
||||
if (buf_page_hash_get(space, offset)) {
|
||||
/* The page is already in the buffer pool. */
|
||||
|
||||
err_exit:
|
||||
/* The page belongs to a space which has been
|
||||
deleted or is being deleted, or the page is
|
||||
already in buf_pool, return */
|
||||
|
||||
mutex_exit(&block->mutex);
|
||||
mutex_exit(&(buf_pool->mutex));
|
||||
mutex_exit(&buf_pool->mutex);
|
||||
|
||||
buf_block_free(block);
|
||||
|
||||
|
@ -2376,6 +2389,15 @@ err_exit:
|
|||
return(NULL);
|
||||
}
|
||||
|
||||
if (fil_tablespace_deleted_or_being_deleted_in_mem(
|
||||
space, tablespace_version)) {
|
||||
/* The page belongs to a space which has been
|
||||
deleted or is being deleted. */
|
||||
*err = DB_TABLESPACE_DELETED;
|
||||
|
||||
goto err_exit;
|
||||
}
|
||||
|
||||
ut_ad(block);
|
||||
|
||||
buf_page_init(space, offset, block);
|
||||
|
@ -2427,7 +2449,7 @@ err_exit:
|
|||
Initializes a page to the buffer buf_pool. The page is usually not read
|
||||
from a file even if it cannot be found in the buffer buf_pool. This is one
|
||||
of the functions which perform to a block a state transition NOT_USED =>
|
||||
FILE_PAGE (the other is buf_page_init_for_read above). */
|
||||
FILE_PAGE (others are buf_page_init_for_read and buf_page_get_gen). */
|
||||
|
||||
buf_block_t*
|
||||
buf_page_create(
|
||||
|
@ -2915,8 +2937,8 @@ buf_validate(void)
|
|||
case BUF_IO_READ:
|
||||
/* In buf_LRU_free_block(), we temporarily set
|
||||
b->io_fix = BUF_IO_READ for a newly allocated
|
||||
control block in order to lock out
|
||||
buf_page_init_for_read(). */
|
||||
control block in order to prevent
|
||||
buf_page_get_gen() from decompressing the block. */
|
||||
break;
|
||||
default:
|
||||
ut_error;
|
||||
|
|
|
@ -992,7 +992,7 @@ alloc:
|
|||
bpage->zip.data = NULL;
|
||||
page_zip_set_size(&bpage->zip, 0);
|
||||
|
||||
/* Prevent buf_page_init_for_read() from
|
||||
/* Prevent buf_page_get_gen() from
|
||||
decompressing the block while we release
|
||||
buf_pool->mutex and block_mutex. */
|
||||
b->buf_fix_count++;
|
||||
|
|
|
@ -256,7 +256,7 @@ buf_page_get_gen(
|
|||
Initializes a page to the buffer buf_pool. The page is usually not read
|
||||
from a file even if it cannot be found in the buffer buf_pool. This is one
|
||||
of the functions which perform to a block a state transition NOT_USED =>
|
||||
FILE_PAGE (the other is buf_page_init_for_read above). */
|
||||
FILE_PAGE (others are buf_page_init_for_read and buf_page_get_gen). */
|
||||
|
||||
buf_block_t*
|
||||
buf_page_create(
|
||||
|
|
Loading…
Add table
Reference in a new issue