branches/zip: Introduce buf_page_t, a common structure for compressed-only

and uncompressed buffer pool pages.

buf_block_t: Replace page_zip, space, and offset with buf_page_t page.
Replace some integers with bit-fields.

enum buf_block_state: Rename to buf_page_state.  Add BUF_BLOCK_ZIP_PAGE.

page_zip_des_t: Add the field "state".  Make the integer fields bit-fields.

page_zip_copy(): Document which fields are copied.
This commit is contained in:
marko 2006-11-24 08:32:18 +00:00
parent 7070b7af75
commit dccaa03753
11 changed files with 140 additions and 110 deletions

View file

@ -603,7 +603,7 @@ buf_block_init(
buf_block_t* block, /* in: pointer to control block */
byte* frame) /* in: pointer to buffer frame */
{
block->state = BUF_BLOCK_NOT_USED;
block->page.zip.state = BUF_BLOCK_NOT_USED;
block->frame = frame;
@ -625,7 +625,7 @@ buf_block_init(
#ifdef UNIV_DEBUG
block->n_pointers = 0;
#endif /* UNIV_DEBUG */
page_zip_des_init(&block->page_zip);
page_zip_des_init(&block->page.zip);
mutex_create(&block->mutex, SYNC_BUF_BLOCK);
@ -798,7 +798,7 @@ buf_chunk_free(
for (block = chunk->blocks; block < block_end; block++) {
ut_a(buf_block_get_state(block) == BUF_BLOCK_NOT_USED);
ut_a(!block->page_zip.data);
ut_a(!block->page.zip.data);
ut_a(!block->in_LRU_list);
/* Remove the block from the free list. */
@ -1081,8 +1081,8 @@ buf_pool_page_hash_rebuild(void)
== BUF_BLOCK_FILE_PAGE) {
HASH_INSERT(buf_block_t, hash, page_hash,
buf_page_address_fold(
block->space,
block->offset),
block->page.space,
block->page.offset),
block);
}
}
@ -1397,7 +1397,8 @@ loop:
if (guess) {
block = guess;
if ((offset != block->offset) || (space != block->space)
if (offset != block->page.offset
|| space != block->page.space
|| buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
block = NULL;
@ -1799,12 +1800,12 @@ buf_page_init_for_backup_restore(
block->n_fields = 1;
block->n_bytes = 0;
block->left_side = TRUE;
page_zip_des_init(&block->page_zip);
/* We assume that block->page_zip.data has been allocated
page_zip_des_init(&block->page);
/* We assume that block->page.data has been allocated
with zip_size == UNIV_PAGE_SIZE. */
ut_ad(zip_size <= UNIV_PAGE_SIZE);
ut_ad(ut_is_2pow(zip_size));
block->page_zip.size = zip_size;
block->page.size = zip_size;
#ifdef UNIV_DEBUG_FILE_ACCESSES
block->file_page_was_freed = FALSE;
#endif /* UNIV_DEBUG_FILE_ACCESSES */
@ -2127,16 +2128,16 @@ buf_page_io_complete(
ulint read_space_id;
byte* frame;
if (block->page_zip.size) {
if (block->page.zip.size) {
ut_a(buf_block_get_space(block) != 0);
frame = block->page_zip.data;
frame = block->page.zip.data;
switch (fil_page_get_type(frame)) {
case FIL_PAGE_INDEX:
if (block->frame) {
if (!page_zip_decompress(
&block->page_zip,
&block->page.zip,
block->frame)) {
goto corrupt;
}
@ -2150,7 +2151,7 @@ buf_page_io_complete(
case FIL_PAGE_TYPE_ZBLOB:
/* Copy to uncompressed storage. */
memcpy(block->frame, frame,
block->page_zip.size);
block->page.zip.size);
break;
default:
ut_print_timestamp(stderr);
@ -2183,8 +2184,9 @@ buf_page_io_complete(
(ulong) buf_block_get_page_no(block));
} else if (!read_space_id && !read_page_no) {
/* This is likely an uninitialized page. */
} else if ((block->space && block->space != read_space_id)
|| block->offset != read_page_no) {
} else if ((block->page.space
&& block->page.space != read_space_id)
|| block->page.offset != read_page_no) {
/* We did not compare space_id to read_space_id
if block->space == 0, because the field on the
page may contain garbage in MySQL < 4.1.1,
@ -2197,13 +2199,14 @@ buf_page_io_complete(
"InnoDB: read in are %lu:%lu,"
" should be %lu:%lu!\n",
(ulong) read_space_id, (ulong) read_page_no,
(ulong) block->space, (ulong) block->offset);
(ulong) block->page.space,
(ulong) block->page.offset);
}
/* From version 3.23.38 up we store the page checksum
to the 4 first bytes of the page end lsn field */
if (buf_page_is_corrupted(frame, block->page_zip.size)) {
if (buf_page_is_corrupted(frame, block->page.zip.size)) {
corrupt:
fprintf(stderr,
"InnoDB: Database page corruption on disk"
@ -2211,15 +2214,15 @@ corrupt:
"InnoDB: file read of page %lu.\n"
"InnoDB: You may have to recover"
" from a backup.\n",
(ulong) block->offset);
buf_page_print(frame, block->page_zip.size);
(ulong) block->page.offset);
buf_page_print(frame, block->page.zip.size);
fprintf(stderr,
"InnoDB: Database page corruption on disk"
" or a failed\n"
"InnoDB: file read of page %lu.\n"
"InnoDB: You may have to recover"
" from a backup.\n",
(ulong) block->offset);
(ulong) block->page.offset);
fputs("InnoDB: It is also possible that"
" your operating\n"
"InnoDB: system has corrupted its"
@ -2255,7 +2258,7 @@ corrupt:
if (!recv_no_ibuf_operations) {
ibuf_merge_or_delete_for_page(
block, block->space, block->offset,
block, block->page.space, block->page.offset,
buf_block_get_zip_size(block), TRUE);
}
}
@ -2767,8 +2770,8 @@ buf_all_freed(void)
if (UNIV_LIKELY_NULL(block)) {
fprintf(stderr,
"Page %lu %lu still fixed or dirty\n",
(ulong) block->space,
(ulong) block->offset);
(ulong) block->page.space,
(ulong) block->page.offset);
ut_error;
}
}

View file

@ -263,7 +263,7 @@ buf_flush_buffered_writes(void)
block = trx_doublewrite->buf_block_arr[i];
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
if (UNIV_LIKELY_NULL(block->page_zip.data)) {
if (UNIV_LIKELY_NULL(block->page.zip.data)) {
/* No simple validate for compressed pages exists. */
continue;
}
@ -328,7 +328,7 @@ corrupted_page:
for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len;
len2 += UNIV_PAGE_SIZE, i++) {
block = trx_doublewrite->buf_block_arr[i];
if (UNIV_LIKELY(!block->page_zip.data)
if (UNIV_LIKELY(!block->page.zip.data)
&& UNIV_UNLIKELY
(memcmp(write_buf + len2 + (FIL_PAGE_LSN + 4),
write_buf + len2
@ -361,7 +361,7 @@ corrupted_page:
for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len;
len2 += UNIV_PAGE_SIZE, i++) {
block = trx_doublewrite->buf_block_arr[i];
if (UNIV_LIKELY(!block->page_zip.data)
if (UNIV_LIKELY(!block->page.zip.data)
&& UNIV_UNLIKELY
(memcmp(write_buf + len2 + (FIL_PAGE_LSN + 4),
write_buf + len2
@ -389,13 +389,13 @@ flush:
for (i = 0; i < trx_doublewrite->first_free; i++) {
block = trx_doublewrite->buf_block_arr[i];
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
if (UNIV_UNLIKELY(block->page_zip.size)) {
if (UNIV_UNLIKELY(block->page.zip.size)) {
fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
FALSE, buf_block_get_space(block),
block->page_zip.size,
block->page.zip.size,
buf_block_get_page_no(block), 0,
block->page_zip.size,
(void*)block->page_zip.data,
block->page.zip.size,
(void*)block->page.zip.data,
(void*)block);
continue;
} else if (UNIV_UNLIKELY
@ -471,13 +471,13 @@ try_again:
goto try_again;
}
zip_size = block->page_zip.size;
zip_size = block->page.zip.size;
if (UNIV_UNLIKELY(zip_size)) {
/* Copy the compressed page and clear the rest. */
memcpy(trx_doublewrite->write_buf
+ UNIV_PAGE_SIZE * trx_doublewrite->first_free,
block->page_zip.data, zip_size);
block->page.zip.data, zip_size);
memset(trx_doublewrite->write_buf
+ UNIV_PAGE_SIZE * trx_doublewrite->first_free
+ zip_size, 0, UNIV_PAGE_SIZE - zip_size);
@ -607,9 +607,9 @@ buf_flush_write_block_low(
block->newest_modification);
if (!srv_use_doublewrite_buf || !trx_doublewrite) {
fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
FALSE, buf_block_get_space(block), block->page_zip.size,
buf_block_get_page_no(block), 0, block->page_zip.size
? block->page_zip.size : UNIV_PAGE_SIZE,
FALSE, buf_block_get_space(block), block->page.zip.size,
buf_block_get_page_no(block), 0, block->page.zip.size
? block->page.zip.size : UNIV_PAGE_SIZE,
(void*)block->frame, (void*)block);
} else {
buf_flush_post_to_doublewrite_buf(block);

View file

@ -444,20 +444,20 @@ loop:
ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
ut_a(!block->in_LRU_list);
if (block->page_zip.size != zip_size) {
block->page_zip.size = zip_size;
block->page_zip.n_blobs = 0;
block->page_zip.m_start = 0;
block->page_zip.m_end = 0;
if (block->page_zip.data) {
ut_free(block->page_zip.data);
if (block->page.zip.size != zip_size) {
block->page.zip.size = zip_size;
block->page.zip.n_blobs = 0;
block->page.zip.m_start = 0;
block->page.zip.m_end = 0;
if (block->page.zip.data) {
ut_free(block->page.zip.data);
}
if (zip_size) {
/* TODO: allocate zip from an aligned pool */
block->page_zip.data = ut_malloc(zip_size);
block->page.zip.data = ut_malloc(zip_size);
} else {
block->page_zip.data = NULL;
block->page.zip.data = NULL;
}
}
@ -878,11 +878,11 @@ buf_LRU_block_free_non_file_page(
memset(block->frame + FIL_PAGE_OFFSET, 0xfe, 4);
memset(block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xfe, 4);
#endif
if (block->page_zip.data) {
if (block->page.zip.data) {
/* TODO: return zip to an aligned pool */
ut_free(block->page_zip.data);
block->page_zip.data = NULL;
block->page_zip.size = 0;
ut_free(block->page.zip.data);
block->page.zip.data = NULL;
block->page.zip.size = 0;
}
UT_LIST_ADD_FIRST(free, buf_pool->free, block);
@ -920,21 +920,22 @@ buf_LRU_block_remove_hashed_page(
buf_block_modify_clock_inc(block);
hashed_block = buf_page_hash_get(block->space, block->offset);
hashed_block = buf_page_hash_get(block->page.space,
block->page.offset);
if (UNIV_UNLIKELY(block != hashed_block)) {
fprintf(stderr,
"InnoDB: Error: page %lu %lu not found"
" in the hash table\n",
(ulong) block->space,
(ulong) block->offset);
(ulong) block->page.space,
(ulong) block->page.offset);
if (hashed_block) {
fprintf(stderr,
"InnoDB: In hash table we find block"
" %p of %lu %lu which is not %p\n",
(const void*) hashed_block,
(ulong) hashed_block->space,
(ulong) hashed_block->offset,
(ulong) hashed_block->page.space,
(ulong) hashed_block->page.offset,
(void*) block);
}
@ -948,7 +949,8 @@ buf_LRU_block_remove_hashed_page(
}
HASH_DELETE(buf_block_t, hash, buf_pool->page_hash,
buf_page_address_fold(block->space, block->offset),
buf_page_address_fold(block->page.space,
block->page.offset),
block);
memset(block->frame + FIL_PAGE_OFFSET, 0xff, 4);
memset(block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xff, 4);

View file

@ -134,7 +134,7 @@ buf_read_page_low(
if (zip_size) {
*err = fil_io(OS_FILE_READ | wake_later,
sync, space, zip_size, offset, 0, zip_size,
(void*) block->page_zip.data, (void*) block);
(void*) block->page.zip.data, (void*) block);
} else {
*err = fil_io(OS_FILE_READ | wake_later,
sync, space, 0, offset, 0, UNIV_PAGE_SIZE,

View file

@ -2660,7 +2660,8 @@ error_exit2:
page_zip_des_t page_zip;
page_zip.size = zip_size;
page_zip.data = page + UNIV_PAGE_SIZE;
page_zip.n_blobs = page_zip.m_start = page_zip.m_end = 0;
page_zip.state = page_zip.n_blobs
= page_zip.m_start = page_zip.m_end = 0;
buf_flush_init_for_writing(page, &page_zip, ut_dulint_zero);
ret = os_file_write(path, file, page_zip.data, 0, 0, zip_size);
}

View file

@ -66,8 +66,12 @@ extern ulint srv_buf_pool_write_requests; /* variable to count write request
issued */
/* States of a control block */
enum buf_block_state {
BUF_BLOCK_NOT_USED = 211, /* is in the free list */
enum buf_page_state {
BUF_BLOCK_ZIP_PAGE = 1, /* contains a compressed page only;
must be smaller than
BUF_BLOCK_NOT_USED;
cf. buf_block_state_valid() */
BUF_BLOCK_NOT_USED, /* is in the free list */
BUF_BLOCK_READY_FOR_USE, /* when buf_get_free_block returns
a block, it is in this state */
BUF_BLOCK_FILE_PAGE, /* contains a buffered file page */
@ -536,7 +540,7 @@ buf_block_dbg_add_level(
/*************************************************************************
Gets the state of a block. */
UNIV_INLINE
enum buf_block_state
enum buf_page_state
buf_block_get_state(
/*================*/
/* out: state */
@ -549,7 +553,7 @@ void
buf_block_set_state(
/*================*/
buf_block_t* block, /* in/out: pointer to control block */
enum buf_block_state state); /* in: state */
enum buf_page_state state); /* in: state */
/*************************************************************************
Map a block to a file page. */
UNIV_INLINE
@ -702,25 +706,30 @@ buf_get_free_list_len(void);
/* The common buffer control block structure
for compressed and uncompressed frames */
struct buf_page_struct{
ulint space:32; /* tablespace id */
ulint offset:32; /* page number */
page_zip_des_t zip; /* compressed page; zip.state
is relevant for all pages */
};
/* The buffer control block structure */
struct buf_block_struct{
/* 1. General fields */
ulint state; /* state of the control block:
BUF_BLOCK_NOT_USED, ...; changing
this is only allowed when a thread
has BOTH the buffer pool mutex AND
block->mutex locked */
buf_page_t page; /* page information; this must
be the first field, so that
buf_pool->page_hash can point
to buf_page_t or buf_block_t */
byte* frame; /* pointer to buffer frame which
is of size UNIV_PAGE_SIZE, and
aligned to an address divisible by
UNIV_PAGE_SIZE */
ulint space; /* space id of the page */
ulint offset; /* page number within the space */
ulint lock_hash_val; /* hashed value of the page address
in the record lock hash table */
mutex_t mutex; /* mutex protecting this block:
state (also protected by the buffer
pool mutex), io_fix, buf_fix_count,
@ -731,7 +740,9 @@ struct buf_block_struct{
frame */
buf_block_t* hash; /* node used in chaining to the page
hash table */
ibool check_index_page_at_flush;
ulint lock_hash_val:32;/* hashed value of the page address
in the record lock hash table */
ulint check_index_page_at_flush:1;
/* TRUE if we know that this is
an index page, and want the database
to check its consistency before flush;
@ -785,20 +796,20 @@ struct buf_block_struct{
without holding any mutex or latch */
ibool old; /* TRUE if the block is in the old
blocks in the LRU list */
ibool accessed; /* TRUE if the page has been accessed
ulint accessed:1; /* TRUE if the page has been accessed
while in the buffer pool: read-ahead
may read in pages which have not been
accessed yet; this is protected by
block->mutex; a thread is allowed to
read this for heuristic purposes
without holding any mutex or latch */
ulint buf_fix_count; /* count of how manyfold this block
is currently bufferfixed; this is
protected by block->mutex */
ulint io_fix; /* if a read is pending to the frame,
ulint io_fix:2; /* if a read is pending to the frame,
io_fix is BUF_IO_READ, in the case
of a write BUF_IO_WRITE, otherwise 0;
this is protected by block->mutex */
ulint buf_fix_count:29;/* count of how manyfold this block
is currently bufferfixed; this is
protected by block->mutex */
/* 4. Optimistic search field */
dulint modify_clock; /* this clock is incremented every
@ -835,26 +846,23 @@ struct buf_block_struct{
An exception to this is when we init or create a page
in the buffer pool in buf0buf.c. */
ibool is_hashed; /* TRUE if hash index has already been
built on this page; note that it does
not guarantee that the index is
complete, though: there may have been
hash collisions, record deletions,
etc. */
#ifdef UNIV_DEBUG
ulint n_pointers; /* used in debugging: the number of
pointers in the adaptive hash index
pointing to this frame */
#endif /* UNIV_DEBUG */
ulint curr_n_fields; /* prefix length for hash indexing:
ulint is_hashed:1; /* TRUE if hash index has already been
built on this page; note that it does
not guarantee that the index is
complete, though: there may have been
hash collisions, record deletions,
etc. */
ulint curr_n_fields:10;/* prefix length for hash indexing:
number of full fields */
ulint curr_n_bytes; /* number of bytes in hash indexing */
ibool curr_left_side; /* TRUE or FALSE in hash indexing */
ulint curr_n_bytes:15;/* number of bytes in hash indexing */
ibool curr_left_side:1;/* TRUE or FALSE in hash indexing */
dict_index_t* index; /* Index for which the adaptive
hash index has been created. */
/* TODO: how to protect this? */
page_zip_des_t page_zip; /* compressed page info */
/* 6. Debug fields */
#ifdef UNIV_SYNC_DEBUG
rw_lock_t debug_latch; /* in the debug version, each thread
@ -959,9 +967,9 @@ struct buf_pool_struct{
LRU_old == NULL */
};
/* Io_fix states of a control block; these must be != 0 */
#define BUF_IO_READ 561
#define BUF_IO_WRITE 562
/* Io_fix states of a control block; these must be 1..3 */
#define BUF_IO_READ 1
#define BUF_IO_WRITE 2
/************************************************************************
Let us list the consistency conditions for different control block states.

View file

@ -86,16 +86,17 @@ buf_pool_clock_tic(void)
/*************************************************************************
Gets the state of a block. */
UNIV_INLINE
enum buf_block_state
enum buf_page_state
buf_block_get_state(
/*================*/
/* out: state */
const buf_block_t* block) /* in: pointer to the control block */
{
enum buf_block_state state = block->state;
enum buf_page_state state = block->page.zip.state;
#ifdef UNIV_DEBUG
switch (state) {
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_READY_FOR_USE:
case BUF_BLOCK_FILE_PAGE:
@ -116,12 +117,15 @@ void
buf_block_set_state(
/*================*/
buf_block_t* block, /* in/out: pointer to control block */
enum buf_block_state state) /* in: state */
enum buf_page_state state) /* in: state */
{
#ifdef UNIV_DEBUG
enum buf_block_state old_state = buf_block_get_state(block);
enum buf_page_state old_state = buf_block_get_state(block);
switch (old_state) {
case BUF_BLOCK_ZIP_PAGE:
ut_error;
break;
case BUF_BLOCK_NOT_USED:
ut_a(state == BUF_BLOCK_READY_FOR_USE);
break;
@ -142,7 +146,7 @@ buf_block_set_state(
break;
}
#endif /* UNIV_DEBUG */
block->state = state;
block->page.zip.state = state;
ut_ad(buf_block_get_state(block) == state);
}
/*************************************************************************
@ -156,8 +160,8 @@ buf_block_set_file_page(
ulint page_no)/* in: page number */
{
buf_block_set_state(block, BUF_BLOCK_FILE_PAGE);
block->space = space;
block->offset = page_no;
block->page.space = space;
block->page.offset = page_no;
}
/*************************************************************************
@ -189,7 +193,7 @@ buf_block_get_space(
ut_ad(block);
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
return(block->space);
return(block->page.space);
}
/*************************************************************************
@ -204,7 +208,7 @@ buf_block_get_page_no(
ut_ad(block);
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
return(block->offset);
return(block->page.offset);
}
/*************************************************************************
@ -216,7 +220,7 @@ buf_block_get_zip_size(
/* out: compressed page size, or 0 */
const buf_block_t* block) /* in: pointer to the control block */
{
return(block->page_zip.size);
return(block->page.zip.size);
}
/*************************************************************************
@ -229,8 +233,8 @@ buf_block_get_page_zip(
/* out: compressed page descriptor, or NULL */
buf_block_t* block) /* in: pointer to the control block */
{
if (UNIV_LIKELY_NULL(block->page_zip.data)) {
return(&block->page_zip);
if (UNIV_LIKELY_NULL(block->page.zip.data)) {
return(&block->page.zip);
}
return(NULL);
@ -517,7 +521,8 @@ buf_page_hash_get(
fold = buf_page_address_fold(space, offset);
HASH_SEARCH(hash, buf_pool->page_hash, fold, block,
(block->space == space) && (block->offset == offset));
block->page.space == space
&& block->page.offset == offset);
ut_a(!block || buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
return(block);

View file

@ -9,6 +9,7 @@ Created 11/17/1995 Heikki Tuuri
#ifndef buf0types_h
#define buf0types_h
typedef struct buf_page_struct buf_page_t;
typedef struct buf_block_struct buf_block_t;
typedef struct buf_chunk_struct buf_chunk_t;
typedef struct buf_pool_struct buf_pool_t;

View file

@ -31,11 +31,17 @@ page0*.h includes rem0rec.h and may include rem0rec.ic. */
struct page_zip_des_struct
{
page_zip_t* data; /* compressed page data */
ulint size; /* total size of compressed page */
ulint n_blobs; /* number of externally stored
columns */
ulint m_start; /* start offset of modification log */
ulint m_end; /* end offset of modification log */
ulint state:3; /* state of the control block
(cf. enum buf_page_state) */
ulint :1; /* reserved */
ulint n_blobs:12; /* number of externally stored
columns on the page; the maximum
is 744 on a 16 KiB page */
ulint size:16; /* compressed page size in bytes;
must be a power of 2 and
at least PAGE_ZIP_MIN_SIZE */
ulint m_start:16; /* start offset of modification log */
ulint m_end:16; /* end offset of modification log */
};
#define PAGE_ZIP_MIN_SIZE 1024 /* smallest page_zip_des_struct.size */

View file

@ -348,7 +348,9 @@ Copy a page byte for byte, except for the file page header and trailer. */
void
page_zip_copy(
/*==========*/
page_zip_des_t* page_zip, /* out: copy of src_zip */
page_zip_des_t* page_zip, /* out: copy of src_zip
(n_blobs, m_start, m_end,
data[0..size-1]) */
page_t* page, /* out: copy of src */
const page_zip_des_t* src_zip, /* in: compressed page */
const page_t* src, /* in: page */

View file

@ -3571,7 +3571,9 @@ Copy a page byte for byte, except for the file page header and trailer. */
void
page_zip_copy(
/*==========*/
page_zip_des_t* page_zip, /* out: copy of src_zip */
page_zip_des_t* page_zip, /* out: copy of src_zip
(n_blobs, m_start, m_end,
data[0..size-1]) */
page_t* page, /* out: copy of src */
const page_zip_des_t* src_zip, /* in: compressed page */
const page_t* src, /* in: page */