Percona-Server-5.5.30-rel30.1.tar.gz

This commit is contained in:
Sergei Golubchik 2013-03-08 13:13:46 +01:00
commit 086b54c281
35 changed files with 1137 additions and 547 deletions

View file

@ -3141,6 +3141,8 @@ btr_lift_page_up(
buf_block_t* blocks[BTR_MAX_LEVELS];
ulint n_blocks; /*!< last used index in blocks[] */
ulint i;
ibool lift_father_up = FALSE;
buf_block_t* block_orig = block;
ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
@ -3151,11 +3153,13 @@ btr_lift_page_up(
{
btr_cur_t cursor;
mem_heap_t* heap = mem_heap_create(100);
ulint* offsets;
ulint* offsets = NULL;
mem_heap_t* heap = mem_heap_create(
sizeof(*offsets)
* (REC_OFFS_HEADER_SIZE + 1 + 1 + index->n_fields));
buf_block_t* b;
offsets = btr_page_get_father_block(NULL, heap, index,
offsets = btr_page_get_father_block(offsets, heap, index,
block, mtr, &cursor);
father_block = btr_cur_get_block(&cursor);
father_page_zip = buf_block_get_page_zip(father_block);
@ -3179,6 +3183,29 @@ btr_lift_page_up(
blocks[n_blocks++] = b = btr_cur_get_block(&cursor);
}
if (n_blocks && page_level == 0) {
/* The father page also should be the only on its level (not
root). We should lift up the father page at first.
Because the leaf page should be lifted up only for root page.
The freeing page is based on page_level (==0 or !=0)
to choose segment. If the page_level is changed ==0 from !=0,
later freeing of the page doesn't find the page allocation
to be freed.*/
lift_father_up = TRUE;
block = father_block;
page = buf_block_get_frame(block);
page_level = btr_page_get_level(page, mtr);
ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
father_block = blocks[0];
father_page_zip = buf_block_get_page_zip(father_block);
father_page = buf_block_get_frame(father_block);
}
mem_heap_free(heap);
}
@ -3186,6 +3213,7 @@ btr_lift_page_up(
/* Make the father empty */
btr_page_empty(father_block, father_page_zip, index, page_level, mtr);
page_level++;
/* Copy the records to the father page one by one. */
if (0
@ -3218,7 +3246,7 @@ btr_lift_page_up(
lock_update_copy_and_discard(father_block, block);
/* Go upward to root page, decrementing levels by one. */
for (i = 0; i < n_blocks; i++, page_level++) {
for (i = lift_father_up ? 1 : 0; i < n_blocks; i++, page_level++) {
page_t* page = buf_block_get_frame(blocks[i]);
page_zip_des_t* page_zip= buf_block_get_page_zip(blocks[i]);
@ -3240,7 +3268,7 @@ btr_lift_page_up(
ut_ad(page_validate(father_page, index));
ut_ad(btr_check_node_ptr(index, father_block, mtr));
return(father_block);
return(lift_father_up ? block_orig : father_block);
}
/*************************************************************//**

View file

@ -97,6 +97,11 @@ srv_refresh_innodb_monitor_stats(). Referenced by
srv_printf_innodb_monitor(). */
UNIV_INTERN ulint btr_cur_n_sea_old = 0;
#ifdef UNIV_DEBUG
/* Flag to limit optimistic insert records */
UNIV_INTERN uint btr_cur_limit_optimistic_insert_debug = 0;
#endif /* UNIV_DEBUG */
/** In the optimistic insert, if the insert does not fit, but this much space
can be released by page reorganize, then it is reorganized */
#define BTR_CUR_PAGE_REORGANIZE_LIMIT (UNIV_PAGE_SIZE / 32)
@ -1378,6 +1383,9 @@ btr_cur_optimistic_insert(
}
}
LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page),
goto fail);
/* If there have been many consecutive inserts, and we are on the leaf
level, check if we have to split the page to reserve enough free space
for future updates of records. */

View file

@ -66,9 +66,7 @@ _increment_page_get_statistics(buf_block_t* block, trx_t* trx)
byte block_hash_offset;
ut_ad(block);
if (!innobase_get_slow_log() || !trx || !trx->take_stats)
return;
ut_ad(trx && trx->take_stats);
if (!trx->distinct_page_access_hash) {
trx->distinct_page_access_hash = mem_alloc(DPAH_SIZE);
@ -411,6 +409,33 @@ buf_get_total_list_len(
}
}
/********************************************************************//**
Get total list size in bytes from all buffer pools. */
UNIV_INTERN
void
buf_get_total_list_size_in_bytes(
/*=============================*/
buf_pools_list_size_t* buf_pools_list_size) /*!< out: list sizes
in all buffer pools */
{
ulint i;
ut_ad(buf_pools_list_size);
memset(buf_pools_list_size, 0, sizeof(*buf_pools_list_size));
for (i = 0; i < srv_buf_pool_instances; i++) {
buf_pool_t* buf_pool;
buf_pool = buf_pool_from_array(i);
/* We don't need mutex protection since this is
for statistics purpose */
buf_pools_list_size->LRU_bytes += buf_pool->stat.LRU_bytes;
buf_pools_list_size->unzip_LRU_bytes +=
UT_LIST_GET_LEN(buf_pool->unzip_LRU) * UNIV_PAGE_SIZE;
buf_pools_list_size->flush_list_bytes +=
buf_pool->stat.flush_list_bytes;
}
}
/********************************************************************//**
Get total buffer pool statistics. */
UNIV_INTERN
@ -1807,40 +1832,24 @@ buf_page_make_young(
}
/********************************************************************//**
Sets the time of the first access of a page and moves a page to the
start of the buffer pool LRU list if it is too old. This high-level
function can be used to prevent an important page from slipping
out of the buffer pool. */
Moves a page to the start of the buffer pool LRU list if it is too old.
This high-level function can be used to prevent an important page from
slipping out of the buffer pool. */
static
void
buf_page_set_accessed_make_young(
/*=============================*/
buf_page_t* bpage, /*!< in/out: buffer block of a
buf_page_make_young_if_needed(
/*==========================*/
buf_page_t* bpage) /*!< in/out: buffer block of a
file page */
unsigned access_time) /*!< in: bpage->access_time
read under mutex protection,
or 0 if unknown */
{
#ifdef UNIV_DEBUG
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
ut_ad(!buf_pool_mutex_own(buf_pool));
#endif /* UNIV_DEBUG */
ut_a(buf_page_in_file(bpage));
if (buf_page_peek_if_too_old(bpage)) {
//buf_pool_mutex_enter(buf_pool);
mutex_enter(&buf_pool->LRU_list_mutex);
buf_LRU_make_block_young(bpage);
//buf_pool_mutex_exit(buf_pool);
mutex_exit(&buf_pool->LRU_list_mutex);
} else if (!access_time) {
ulint time_ms = ut_time_ms();
mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
//buf_pool_mutex_enter(buf_pool);
if (block_mutex) {
buf_page_set_accessed(bpage, time_ms);
mutex_exit(block_mutex);
}
//buf_pool_mutex_exit(buf_pool);
buf_page_make_young(bpage);
}
}
@ -1959,7 +1968,6 @@ buf_page_get_zip(
buf_page_t* bpage;
mutex_t* block_mutex;
ibool must_read;
unsigned access_time;
trx_t* trx = NULL;
ulint sec;
ulint ms;
@ -1967,7 +1975,7 @@ buf_page_get_zip(
ib_uint64_t finish_time;
buf_pool_t* buf_pool = buf_pool_get(space, offset);
if (innobase_get_slow_log()) {
if (UNIV_UNLIKELY(innobase_get_slow_log())) {
trx = innobase_get_trx();
}
buf_pool->stat.n_page_gets++;
@ -2089,13 +2097,14 @@ err_exit:
got_block:
must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
access_time = buf_page_is_accessed(bpage);
//buf_pool_mutex_exit(buf_pool);
buf_page_set_accessed(bpage);
mutex_exit(block_mutex);
buf_page_set_accessed_make_young(bpage, access_time);
buf_page_make_young_if_needed(bpage);
#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
ut_a(!bpage->file_page_was_freed);
@ -2111,7 +2120,7 @@ got_block:
/* Let us wait until the read operation
completes */
if (innobase_get_slow_log() && trx && trx->take_stats)
if (UNIV_UNLIKELY(trx && trx->take_stats))
{
ut_usectime(&sec, &ms);
start_time = (ib_uint64_t)sec * 1000000 + ms;
@ -2132,7 +2141,7 @@ got_block:
break;
}
}
if (innobase_get_slow_log() && trx && trx->take_stats && start_time)
if (UNIV_UNLIKELY(start_time != 0))
{
ut_usectime(&sec, &ms);
finish_time = (ib_uint64_t)sec * 1000000 + ms;
@ -2487,7 +2496,7 @@ buf_page_get_gen(
|| ibuf_page_low(space, zip_size, offset,
FALSE, file, line, NULL));
#endif
if (innobase_get_slow_log()) {
if (UNIV_UNLIKELY(innobase_get_slow_log())) {
trx = innobase_get_trx();
}
buf_pool->stat.n_page_gets++;
@ -2774,6 +2783,8 @@ wait_until_unfixed:
UNIV_MEM_INVALID(bpage, sizeof *bpage);
access_time = buf_page_is_accessed(&block->page);
mutex_exit(block_mutex);
mutex_exit(&buf_pool->zip_mutex);
@ -2781,18 +2792,22 @@ wait_until_unfixed:
buf_pool->n_pend_unzip++;
buf_pool_mutex_exit(buf_pool);
//buf_pool_mutex_exit(buf_pool);
buf_page_free_descriptor(bpage);
/* Decompress the page and apply buffered operations
while not holding buf_pool->mutex or block->mutex. */
/* Decompress the page while not holding
buf_pool->mutex or block->mutex. */
success = buf_zip_decompress(block, srv_use_checksums);
ut_a(success);
if (UNIV_LIKELY(!recv_no_ibuf_operations)) {
ibuf_merge_or_delete_for_page(block, space, offset,
zip_size, TRUE);
if (access_time) {
#ifdef UNIV_IBUF_COUNT_DEBUG
ut_a(ibuf_count_get(space, offset) == 0);
#endif /* UNIV_IBUF_COUNT_DEBUG */
} else {
ibuf_merge_or_delete_for_page(
block, space, offset, zip_size, TRUE);
}
}
/* Unfix and unlatch the block. */
@ -2888,17 +2903,16 @@ wait_until_unfixed:
ut_a(mode == BUF_GET_POSSIBLY_FREED
|| !block->page.file_page_was_freed);
#endif
//mutex_exit(&block->mutex);
/* Check if this is the first access to the page */
access_time = buf_page_is_accessed(&block->page);
//buf_pool_mutex_exit(buf_pool);
mutex_exit(block_mutex);
buf_page_set_accessed(&block->page);
if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL)) {
buf_page_set_accessed_make_young(&block->page, access_time);
mutex_exit(&block->mutex);
if (mode != BUF_PEEK_IF_IN_POOL) {
buf_page_make_young_if_needed(&block->page);
}
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
@ -2913,7 +2927,7 @@ wait_until_unfixed:
/* Let us wait until the read operation
completes */
if (innobase_get_slow_log() && trx && trx->take_stats)
if (UNIV_UNLIKELY(trx && trx->take_stats))
{
ut_usectime(&sec, &ms);
start_time = (ib_uint64_t)sec * 1000000 + ms;
@ -2935,7 +2949,7 @@ wait_until_unfixed:
break;
}
}
if (innobase_get_slow_log() && trx && trx->take_stats && start_time)
if (UNIV_UNLIKELY(start_time != 0))
{
ut_usectime(&sec, &ms);
finish_time = (ib_uint64_t)sec * 1000000 + ms;
@ -2962,7 +2976,7 @@ wait_until_unfixed:
mtr_memo_push(mtr, block, fix_type);
if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL) && !access_time) {
if (mode != BUF_PEEK_IF_IN_POOL && !access_time) {
/* In the case of a first access, try to apply linear
read-ahead */
@ -2974,7 +2988,7 @@ wait_until_unfixed:
ut_a(ibuf_count_get(buf_block_get_space(block),
buf_block_get_page_no(block)) == 0);
#endif
if (innobase_get_slow_log()) {
if (UNIV_UNLIKELY(trx && trx->take_stats)) {
_increment_page_get_statistics(block, trx);
}
@ -3019,15 +3033,13 @@ buf_page_optimistic_get(
buf_block_buf_fix_inc(block, file, line);
access_time = buf_page_is_accessed(&block->page);
buf_page_set_accessed(&block->page);
mutex_exit(&block->mutex);
/* Check if this is the first access to the page.
We do a dirty read on purpose, to avoid mutex contention.
This field is only used for heuristic purposes; it does not
affect correctness. */
access_time = buf_page_is_accessed(&block->page);
buf_page_set_accessed_make_young(&block->page, access_time);
buf_page_make_young_if_needed(&block->page);
ut_ad(!ibuf_inside(mtr)
|| ibuf_page(buf_block_get_space(block),
@ -3079,11 +3091,11 @@ buf_page_optimistic_get(
#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
ut_a(block->page.file_page_was_freed == FALSE);
#endif
if (innobase_get_slow_log()) {
if (UNIV_UNLIKELY(innobase_get_slow_log())) {
trx = innobase_get_trx();
}
if (UNIV_UNLIKELY(!access_time)) {
if (!access_time) {
/* In the case of a first access, try to apply linear
read-ahead */
@ -3100,7 +3112,7 @@ buf_page_optimistic_get(
buf_pool = buf_pool_from_block(block);
buf_pool->stat.n_page_gets++;
if (innobase_get_slow_log()) {
if (UNIV_UNLIKELY(trx && trx->take_stats)) {
_increment_page_get_statistics(block, trx);
}
return(TRUE);
@ -3150,28 +3162,14 @@ buf_page_get_known_nowait(
buf_block_buf_fix_inc(block, file, line);
buf_page_set_accessed(&block->page);
mutex_exit(&block->mutex);
buf_pool = buf_pool_from_block(block);
if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
//buf_pool_mutex_enter(buf_pool);
mutex_enter(&buf_pool->LRU_list_mutex);
buf_LRU_make_block_young(&block->page);
//buf_pool_mutex_exit(buf_pool);
mutex_exit(&buf_pool->LRU_list_mutex);
} else if (!buf_page_is_accessed(&block->page)) {
/* Above, we do a dirty read on purpose, to avoid
mutex contention. The field buf_page_t::access_time
is only used for heuristic purposes. Writes to the
field must be protected by mutex, however. */
ulint time_ms = ut_time_ms();
//buf_pool_mutex_enter(buf_pool);
mutex_enter(&block->mutex);
buf_page_set_accessed(&block->page, time_ms);
//buf_pool_mutex_exit(buf_pool);
mutex_exit(&block->mutex);
if (mode == BUF_MAKE_YOUNG) {
buf_page_make_young_if_needed(&block->page);
}
ut_ad(!ibuf_inside(mtr) || mode == BUF_KEEP_OLD);
@ -3212,9 +3210,13 @@ buf_page_get_known_nowait(
#endif
buf_pool->stat.n_page_gets++;
if (innobase_get_slow_log()) {
if (UNIV_UNLIKELY(innobase_get_slow_log())) {
trx = innobase_get_trx();
_increment_page_get_statistics(block, trx);
if (trx != NULL && trx->take_stats) {
_increment_page_get_statistics(block, trx);
}
}
return(TRUE);
@ -3343,6 +3345,7 @@ buf_page_init(
ulint offset, /*!< in: offset of the page within space
in units of a page */
ulint fold, /*!< in: buf_page_address_fold(space,offset) */
ulint zip_size,/*!< in: compressed page size, or 0 */
buf_block_t* block) /*!< in/out: block to init */
{
buf_page_t* hash_page;
@ -3412,6 +3415,9 @@ buf_page_init(
ut_d(block->page.in_page_hash = TRUE);
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
fold, &block->page);
if (zip_size) {
page_zip_set_size(&block->page.zip, zip_size);
}
}
/********************************************************************//**
@ -3538,7 +3544,7 @@ err_exit:
ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
buf_page_init(buf_pool, space, offset, fold, block);
buf_page_init(buf_pool, space, offset, fold, zip_size, block);
rw_lock_x_unlock(&buf_pool->page_hash_latch);
@ -3558,8 +3564,6 @@ err_exit:
buf_page_set_io_fix(bpage, BUF_IO_READ);
if (UNIV_UNLIKELY(zip_size)) {
page_zip_set_size(&block->page.zip, zip_size);
/* buf_pool->mutex may be released and
reacquired by buf_buddy_alloc(). Thus, we
must release block->mutex in order not to
@ -3659,7 +3663,8 @@ err_exit:
rw_lock_x_unlock(&buf_pool->page_hash_latch);
/* The block must be put to the LRU list, to the old blocks */
/* The block must be put to the LRU list, to the old blocks
The zip_size is already set into the page zip */
buf_LRU_add_block(bpage, TRUE/* to old blocks */);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
buf_LRU_insert_zip_clean(bpage);
@ -3707,7 +3712,6 @@ buf_page_create(
buf_block_t* block;
ulint fold;
buf_block_t* free_block = NULL;
ulint time_ms = ut_time_ms();
buf_pool_t* buf_pool = buf_pool_get(space, offset);
ut_ad(mtr);
@ -3775,7 +3779,7 @@ retry:
mutex_enter(&block->mutex);
buf_page_init(buf_pool, space, offset, fold, block);
buf_page_init(buf_pool, space, offset, fold, zip_size,block);
rw_lock_x_unlock(&buf_pool->page_hash_latch);
/* The block must be put to the LRU list */
@ -3794,8 +3798,6 @@ retry:
buf_page_set_io_fix(&block->page, BUF_IO_READ);
rw_lock_x_lock(&block->lock);
page_zip_set_size(&block->page.zip, zip_size);
mutex_exit(&block->mutex);
/* buf_pool->mutex may be released and reacquired by
buf_buddy_alloc(). Thus, we must release block->mutex
@ -3819,13 +3821,12 @@ retry:
rw_lock_x_unlock(&block->lock);
}
buf_page_set_accessed(&block->page, time_ms);
//buf_pool_mutex_exit(buf_pool);
mutex_exit(&buf_pool->LRU_list_mutex);
mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
buf_page_set_accessed(&block->page);
mutex_exit(&block->mutex);
/* Delete possible entries for the page from the insert buffer:

View file

@ -79,6 +79,23 @@ static buf_flush_stat_t buf_flush_stat_sum;
/* @} */
/******************************************************************//**
Increases flush_list size in bytes with zip_size for compressed page,
UNIV_PAGE_SIZE for uncompressed page in inline function */
static inline
void
incr_flush_list_size_in_bytes(
/*==========================*/
buf_block_t* block, /*!< in: control block */
buf_pool_t* buf_pool) /*!< in: buffer pool instance */
{
ulint zip_size;
ut_ad(buf_flush_list_mutex_own(buf_pool));
zip_size = page_zip_get_size(&block->page.zip);
buf_pool->stat.flush_list_bytes += zip_size ? zip_size : UNIV_PAGE_SIZE;
ut_ad(buf_pool->stat.flush_list_bytes <= buf_pool->curr_pool_size);
}
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/******************************************************************//**
Validates the flush list.
@ -308,6 +325,7 @@ buf_flush_insert_into_flush_list(
ut_d(block->page.in_flush_list = TRUE);
block->page.oldest_modification = lsn;
UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, &block->page);
incr_flush_list_size_in_bytes(block, buf_pool);
#ifdef UNIV_DEBUG_VALGRIND
{
@ -412,6 +430,8 @@ buf_flush_insert_sorted_into_flush_list(
prev_b, &block->page);
}
incr_flush_list_size_in_bytes(block, buf_pool);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(buf_flush_validate_low(buf_pool));
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
@ -514,6 +534,7 @@ buf_flush_remove(
buf_page_t* bpage) /*!< in: pointer to the block in question */
{
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
ulint zip_size;
//ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
@ -552,6 +573,9 @@ buf_flush_remove(
because we assert on in_flush_list in comparison function. */
ut_d(bpage->in_flush_list = FALSE);
zip_size = page_zip_get_size(&bpage->zip);
buf_pool->stat.flush_list_bytes -= zip_size ? zip_size : UNIV_PAGE_SIZE;
bpage->oldest_modification = 0;
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG

View file

@ -152,6 +152,23 @@ buf_LRU_block_free_hashed_page(
be in a state where it can be freed */
ibool have_page_hash_mutex);
/******************************************************************//**
Increases LRU size in bytes with zip_size for compressed page,
UNIV_PAGE_SIZE for uncompressed page in inline function */
static inline
void
incr_LRU_size_in_bytes(
/*===================*/
buf_page_t* bpage, /*!< in: control block */
buf_pool_t* buf_pool) /*!< in: buffer pool instance */
{
ulint zip_size;
ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
zip_size = page_zip_get_size(&bpage->zip);
buf_pool->stat.LRU_bytes += zip_size ? zip_size : UNIV_PAGE_SIZE;
ut_ad(buf_pool->stat.LRU_bytes <= buf_pool->curr_pool_size);
}
/******************************************************************//**
Determines if the unzip_LRU list should be used for evicting a victim
instead of the general LRU list.
@ -1551,6 +1568,7 @@ buf_LRU_remove_block(
buf_page_t* bpage) /*!< in: control block */
{
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
ulint zip_size;
ut_ad(buf_pool);
ut_ad(bpage);
@ -1587,6 +1605,9 @@ buf_LRU_remove_block(
UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
bpage->in_LRU_list = FALSE;
zip_size = page_zip_get_size(&bpage->zip);
buf_pool->stat.LRU_bytes -= zip_size ? zip_size : UNIV_PAGE_SIZE;
buf_unzip_LRU_remove_block_if_needed(bpage);
/* If the LRU list is so short that LRU_old is not defined,
@ -1648,7 +1669,10 @@ buf_unzip_LRU_add_block(
}
/******************************************************************//**
Adds a block to the LRU list end. */
Adds a block to the LRU list end. Please make sure that the zip_size is
already set into the page zip when invoking the function, so that we
can get correct zip_size from the buffer page when adding a block
into LRU */
UNIV_INLINE
void
buf_LRU_add_block_to_end_low(
@ -1668,6 +1692,8 @@ buf_LRU_add_block_to_end_low(
UT_LIST_ADD_LAST(LRU, buf_pool->LRU, bpage);
bpage->in_LRU_list = TRUE;
incr_LRU_size_in_bytes(bpage, buf_pool);
if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
ut_ad(buf_pool->LRU_old);
@ -1696,7 +1722,10 @@ buf_LRU_add_block_to_end_low(
}
/******************************************************************//**
Adds a block to the LRU list. */
Adds a block to the LRU list. Please make sure that the zip_size is
already set into the page zip when invoking the function, so that we
can get correct zip_size from the buffer page when adding a block
into LRU */
UNIV_INLINE
void
buf_LRU_add_block_low(
@ -1739,6 +1768,8 @@ buf_LRU_add_block_low(
bpage->in_LRU_list = TRUE;
incr_LRU_size_in_bytes(bpage, buf_pool);
if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
ut_ad(buf_pool->LRU_old);
@ -1766,7 +1797,10 @@ buf_LRU_add_block_low(
}
/******************************************************************//**
Adds a block to the LRU list. */
Adds a block to the LRU list. Please make sure that the zip_size is
already set into the page zip when invoking the function, so that we
can get correct zip_size from the buffer page when adding a block
into LRU */
UNIV_INTERN
void
buf_LRU_add_block(
@ -1995,6 +2029,8 @@ not_freed:
UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU,
prev_b, b);
incr_LRU_size_in_bytes(b, buf_pool);
if (buf_page_is_old(b)) {
buf_pool->LRU_old_len++;
if (UNIV_UNLIKELY

View file

@ -2400,7 +2400,8 @@ dict_load_foreigns(
ibool check_charsets) /*!< in: TRUE=check charset
compatibility */
{
char tuple_buf[DTUPLE_EST_ALLOC(1)];
ulint tuple_buf[(DTUPLE_EST_ALLOC(1) + sizeof(ulint) - 1)
/ sizeof(ulint)];
btr_pcur_t pcur;
dtuple_t* tuple;
dfield_t* dfield;

View file

@ -1479,7 +1479,7 @@ fil_space_get_size(
ut_ad(fil_system);
fil_mutex_enter_and_prepare_for_io(id);
mutex_enter(&fil_system->mutex);
space = fil_space_get_by_id(id);
@ -1494,6 +1494,23 @@ fil_space_get_size(
ut_a(1 == UT_LIST_GET_LEN(space->chain));
mutex_exit(&fil_system->mutex);
/* It is possible that the space gets evicted at this point
before the fil_mutex_enter_and_prepare_for_io() acquires
the fil_system->mutex. Check for this after completing the
call to fil_mutex_enter_and_prepare_for_io(). */
fil_mutex_enter_and_prepare_for_io(id);
/* We are still holding the fil_system->mutex. Check if
the space is still in memory cache. */
space = fil_space_get_by_id(id);
if (space == NULL) {
mutex_exit(&fil_system->mutex);
return(0);
}
node = UT_LIST_GET_FIRST(space->chain);
/* It must be a single-table tablespace and we have not opened
@ -1531,7 +1548,7 @@ fil_space_get_flags(
return(0);
}
fil_mutex_enter_and_prepare_for_io(id);
mutex_enter(&fil_system->mutex);
space = fil_space_get_by_id(id);
@ -1546,6 +1563,23 @@ fil_space_get_flags(
ut_a(1 == UT_LIST_GET_LEN(space->chain));
mutex_exit(&fil_system->mutex);
/* It is possible that the space gets evicted at this point
before the fil_mutex_enter_and_prepare_for_io() acquires
the fil_system->mutex. Check for this after completing the
call to fil_mutex_enter_and_prepare_for_io(). */
fil_mutex_enter_and_prepare_for_io(id);
/* We are still holding the fil_system->mutex. Check if
the space is still in memory cache. */
space = fil_space_get_by_id(id);
if (space == NULL) {
mutex_exit(&fil_system->mutex);
return(0);
}
node = UT_LIST_GET_FIRST(space->chain);
/* It must be a single-table tablespace and we have not opened
@ -2732,7 +2766,7 @@ retry:
mutex_exit(&fil_system->mutex);
#ifndef UNIV_HOTBACKUP
if (success) {
if (success && !recv_recovery_on) {
mtr_t mtr;
mtr_start(&mtr);
@ -6006,3 +6040,26 @@ fil_space_set_corrupt(
mutex_exit(&fil_system->mutex);
}
/****************************************************************//**
Generate redo logs for swapping two .ibd files */
UNIV_INTERN
void
fil_mtr_rename_log(
/*===============*/
ulint old_space_id, /*!< in: tablespace id of the old
table. */
const char* old_name, /*!< in: old table name */
ulint new_space_id, /*!< in: tablespace id of the new
table */
const char* new_name, /*!< in: new table name */
const char* tmp_name) /*!< in: temp table name used while
swapping */
{
mtr_t mtr;
mtr_start(&mtr);
fil_op_write_log(MLOG_FILE_RENAME, old_space_id,
0, 0, old_name, tmp_name, &mtr);
fil_op_write_log(MLOG_FILE_RENAME, new_space_id,
0, 0, new_name, old_name, &mtr);
mtr_commit(&mtr);
}

View file

@ -692,8 +692,12 @@ static SHOW_VAR innodb_status_variables[]= {
(char*) &export_vars.innodb_background_log_sync, SHOW_LONG},
{"buffer_pool_pages_data",
(char*) &export_vars.innodb_buffer_pool_pages_data, SHOW_LONG},
{"buffer_pool_bytes_data",
(char*) &export_vars.innodb_buffer_pool_bytes_data, SHOW_LONG},
{"buffer_pool_pages_dirty",
(char*) &export_vars.innodb_buffer_pool_pages_dirty, SHOW_LONG},
{"buffer_pool_bytes_dirty",
(char*) &export_vars.innodb_buffer_pool_bytes_dirty, SHOW_LONG},
{"buffer_pool_pages_flushed",
(char*) &export_vars.innodb_buffer_pool_pages_flushed, SHOW_LONG},
{"buffer_pool_pages_LRU_flushed",
@ -874,6 +878,12 @@ static SHOW_VAR innodb_status_variables[]= {
(char*) &export_vars.innodb_x_lock_spin_rounds, SHOW_LONGLONG},
{"x_lock_spin_waits",
(char*) &export_vars.innodb_x_lock_spin_waits, SHOW_LONGLONG},
#ifdef UNIV_DEBUG
{"purge_trx_id_age",
(char*) &export_vars.innodb_purge_trx_id_age, SHOW_LONG},
{"purge_view_trx_id_age",
(char*) &export_vars.innodb_purge_view_trx_id_age, SHOW_LONG},
#endif /* UNIV_DEBUG */
{NullS, NullS, SHOW_LONG}
};
@ -1289,6 +1299,8 @@ convert_error_code_to_mysql(
return(HA_ERR_INDEX_CORRUPT);
case DB_UNDO_RECORD_TOO_BIG:
return(HA_ERR_UNDO_REC_TOO_BIG);
case DB_OUT_OF_MEMORY:
return(HA_ERR_OUT_OF_MEM);
}
}
@ -1466,100 +1478,6 @@ innobase_get_lower_case_table_names(void)
return(lower_case_table_names);
}
#if defined (__WIN__) && defined (MYSQL_DYNAMIC_PLUGIN)
extern MYSQL_PLUGIN_IMPORT MY_TMPDIR mysql_tmpdir_list;
/*******************************************************************//**
Map an OS error to an errno value. The OS error number is stored in
_doserrno and the mapped value is stored in errno) */
extern "C"
void __cdecl
_dosmaperr(
unsigned long); /*!< in: OS error value */
/*********************************************************************//**
Creates a temporary file.
@return temporary file descriptor, or < 0 on error */
extern "C" UNIV_INTERN
int
innobase_mysql_tmpfile(void)
/*========================*/
{
int fd; /* handle of opened file */
HANDLE osfh; /* OS handle of opened file */
char* tmpdir; /* point to the directory
where to create file */
TCHAR path_buf[MAX_PATH - 14]; /* buffer for tmp file path.
The length cannot be longer
than MAX_PATH - 14, or
GetTempFileName will fail. */
char filename[MAX_PATH]; /* name of the tmpfile */
DWORD fileaccess = GENERIC_READ /* OS file access */
| GENERIC_WRITE
| DELETE;
DWORD fileshare = FILE_SHARE_READ /* OS file sharing mode */
| FILE_SHARE_WRITE
| FILE_SHARE_DELETE;
DWORD filecreate = CREATE_ALWAYS; /* OS method of open/create */
DWORD fileattrib = /* OS file attribute flags */
FILE_ATTRIBUTE_NORMAL
| FILE_FLAG_DELETE_ON_CLOSE
| FILE_ATTRIBUTE_TEMPORARY
| FILE_FLAG_SEQUENTIAL_SCAN;
DBUG_ENTER("innobase_mysql_tmpfile");
tmpdir = my_tmpdir(&mysql_tmpdir_list);
/* The tmpdir parameter can not be NULL for GetTempFileName. */
if (!tmpdir) {
uint ret;
/* Use GetTempPath to determine path for temporary files. */
ret = GetTempPath(sizeof(path_buf), path_buf);
if (ret > sizeof(path_buf) || (ret == 0)) {
_dosmaperr(GetLastError()); /* map error */
DBUG_RETURN(-1);
}
tmpdir = path_buf;
}
/* Use GetTempFileName to generate a unique filename. */
if (!GetTempFileName(tmpdir, "ib", 0, filename)) {
_dosmaperr(GetLastError()); /* map error */
DBUG_RETURN(-1);
}
DBUG_PRINT("info", ("filename: %s", filename));
/* Open/Create the file. */
osfh = CreateFile(filename, fileaccess, fileshare, NULL,
filecreate, fileattrib, NULL);
if (osfh == INVALID_HANDLE_VALUE) {
/* open/create file failed! */
_dosmaperr(GetLastError()); /* map error */
DBUG_RETURN(-1);
}
do {
/* Associates a CRT file descriptor with the OS file handle. */
fd = _open_osfhandle((intptr_t) osfh, 0);
} while (fd == -1 && errno == EINTR);
if (fd == -1) {
/* Open failed, close the file handle. */
_dosmaperr(GetLastError()); /* map error */
CloseHandle(osfh); /* no need to check if
CloseHandle fails */
}
DBUG_RETURN(fd);
}
#else
/*********************************************************************//**
Creates a temporary file.
@return temporary file descriptor, or < 0 on error */
@ -1569,7 +1487,15 @@ innobase_mysql_tmpfile(void)
/*========================*/
{
int fd2 = -1;
File fd = mysql_tmpfile("ib");
File fd;
DBUG_EXECUTE_IF(
"innobase_tmpfile_creation_failure",
return(-1);
);
fd = mysql_tmpfile("ib");
if (fd >= 0) {
/* Copy the file descriptor, so that the additional resources
allocated by create_temp_file() can be freed by invoking
@ -1613,7 +1539,6 @@ innobase_mysql_tmpfile(void)
}
return(fd2);
}
#endif /* defined (__WIN__) && defined (MYSQL_DYNAMIC_PLUGIN) */
/*********************************************************************//**
Wrapper around MySQL's copy_and_convert function.
@ -3378,7 +3303,7 @@ innobase_commit_low(
header for undo purposes, see the comment at corresponding call
at innobase_xa_prepare(). */
innobase_copy_repl_coords_to_trx(current_thd, trx);
innobase_copy_repl_coords_to_trx((THD *) trx->mysql_thd, trx);
trx_commit_for_mysql(trx);
}
@ -10359,23 +10284,26 @@ ha_innobase::external_lock(
if (trx->n_mysql_tables_in_use == 0) {
#ifdef EXTENDED_SLOWLOG
increment_thd_innodb_stats(thd,
(unsigned long long) trx->id,
trx->io_reads,
trx->io_read,
trx->io_reads_wait_timer,
trx->lock_que_wait_timer,
trx->innodb_que_wait_timer,
trx->distinct_page_access);
if (UNIV_UNLIKELY(trx->take_stats)) {
increment_thd_innodb_stats(thd,
(unsigned long long) trx->id,
trx->io_reads,
trx->io_read,
trx->io_reads_wait_timer,
trx->lock_que_wait_timer,
trx->innodb_que_wait_timer,
trx->distinct_page_access);
trx->io_reads = 0;
trx->io_read = 0;
trx->io_reads_wait_timer = 0;
trx->lock_que_wait_timer = 0;
trx->innodb_que_wait_timer = 0;
trx->distinct_page_access = 0;
if (trx->distinct_page_access_hash)
memset(trx->distinct_page_access_hash, 0, DPAH_SIZE);
trx->io_reads = 0;
trx->io_read = 0;
trx->io_reads_wait_timer = 0;
trx->lock_que_wait_timer = 0;
trx->innodb_que_wait_timer = 0;
trx->distinct_page_access = 0;
if (trx->distinct_page_access_hash)
memset(trx->distinct_page_access_hash, 0,
DPAH_SIZE);
}
#endif
trx->mysql_n_tables_locked = 0;
@ -12912,6 +12840,18 @@ static MYSQL_SYSVAR_UINT(trx_rseg_n_slots_debug, trx_rseg_n_slots_debug,
PLUGIN_VAR_RQCMDARG,
"Debug flags for InnoDB to limit TRX_RSEG_N_SLOTS for trx_rsegf_undo_find_free()",
NULL, NULL, 0, 0, 1024, 0);
static MYSQL_SYSVAR_UINT(limit_optimistic_insert_debug,
btr_cur_limit_optimistic_insert_debug, PLUGIN_VAR_RQCMDARG,
"Artificially limit the number of records per B-tree page (0=unlimited).",
NULL, NULL, 0, 0, UINT_MAX32, 0);
static MYSQL_SYSVAR_BOOL(trx_purge_view_update_only_debug,
srv_purge_view_update_only_debug, PLUGIN_VAR_NOCMDARG,
"Pause actual purging any delete-marked records, but merely update the purge view. "
"It is to create artificially the situation the purge view have been updated "
"but the each purges were not done yet.",
NULL, NULL, FALSE);
#endif /* UNIV_DEBUG */
static MYSQL_SYSVAR_LONGLONG(ibuf_max_size, srv_ibuf_max_size,
@ -13091,6 +13031,11 @@ static MYSQL_SYSVAR_BOOL(locking_fake_changes, srv_fake_changes_locks,
"not take any locks at all.",
NULL, NULL, TRUE);
static MYSQL_SYSVAR_BOOL(print_all_deadlocks, srv_print_all_deadlocks,
PLUGIN_VAR_OPCMDARG,
"Print all deadlocks to MySQL error log (off by default)",
NULL, NULL, FALSE);
static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(page_size),
MYSQL_SYSVAR(log_block_size),
@ -13197,12 +13142,15 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(rollback_segments),
#ifdef UNIV_DEBUG
MYSQL_SYSVAR(trx_rseg_n_slots_debug),
MYSQL_SYSVAR(limit_optimistic_insert_debug),
MYSQL_SYSVAR(trx_purge_view_update_only_debug),
#endif /* UNIV_DEBUG */
MYSQL_SYSVAR(corrupt_table_action),
MYSQL_SYSVAR(lazy_drop_table),
MYSQL_SYSVAR(fake_changes),
MYSQL_SYSVAR(locking_fake_changes),
MYSQL_SYSVAR(merge_sort_block_size),
MYSQL_SYSVAR(print_all_deadlocks),
NULL
};

View file

@ -101,8 +101,6 @@ innobase_col_to_mysql(
ut_ad(flen >= len);
ut_ad(DATA_MBMAXLEN(col->mbminmaxlen)
>= DATA_MBMINLEN(col->mbminmaxlen));
ut_ad(DATA_MBMAXLEN(col->mbminmaxlen)
> DATA_MBMINLEN(col->mbminmaxlen) || flen == len);
memcpy(dest, data, len);
break;

View file

@ -806,6 +806,11 @@ srv_printf_innodb_monitor(). */
extern ulint btr_cur_n_sea_old;
#endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_DEBUG
/* Flag to limit optimistic insert records */
extern uint btr_cur_limit_optimistic_insert_debug;
#endif /* UNIV_DEBUG */
#ifndef UNIV_NONINL
#include "btr0cur.ic"
#endif

View file

@ -26,6 +26,16 @@ Created 10/16/1994 Heikki Tuuri
#ifndef UNIV_HOTBACKUP
#include "btr0btr.h"
#ifdef UNIV_DEBUG
# define LIMIT_OPTIMISTIC_INSERT_DEBUG(NREC, CODE)\
if (btr_cur_limit_optimistic_insert_debug\
&& (NREC) >= (ulint)btr_cur_limit_optimistic_insert_debug) {\
CODE;\
}
#else
# define LIMIT_OPTIMISTIC_INSERT_DEBUG(NREC, CODE)
#endif /* UNIV_DEBUG */
#ifdef UNIV_DEBUG
/*********************************************************//**
Returns the page cursor component of a tree cursor.
@ -146,6 +156,9 @@ btr_cur_compress_recommendation(
page = btr_cur_get_page(cursor);
LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page) * 2,
return(FALSE));
if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT)
|| ((btr_page_get_next(page, mtr) == FIL_NULL)
&& (btr_page_get_prev(page, mtr) == FIL_NULL))) {

View file

@ -199,6 +199,15 @@ struct buf_pool_info_struct{
typedef struct buf_pool_info_struct buf_pool_info_t;
/** The occupied bytes of lists in all buffer pools */
struct buf_pools_list_size_struct {
ulint LRU_bytes; /*!< LRU size in bytes */
ulint unzip_LRU_bytes; /*!< unzip_LRU size in bytes */
ulint flush_list_bytes; /*!< flush_list size in bytes */
};
typedef struct buf_pools_list_size_struct buf_pools_list_size_t;
#ifndef UNIV_HOTBACKUP
/********************************************************************//**
Acquire mutex on all buffer pool instances */
@ -1074,8 +1083,7 @@ UNIV_INLINE
void
buf_page_set_accessed(
/*==================*/
buf_page_t* bpage, /*!< in/out: control block */
ulint time_ms) /*!< in: ut_time_ms() */
buf_page_t* bpage) /*!< in/out: control block */
__attribute__((nonnull));
/*********************************************************************//**
Gets the buf_block_t handle of a buffered file block if an uncompressed
@ -1392,6 +1400,14 @@ buf_get_total_list_len(
ulint* free_len, /*!< out: length of all free lists */
ulint* flush_list_len);/*!< out: length of all flush lists */
/********************************************************************//**
Get total list size in bytes from all buffer pools. */
UNIV_INTERN
void
buf_get_total_list_size_in_bytes(
/*=============================*/
buf_pools_list_size_t* buf_pools_list_size); /*!< out: list sizes
in all buffer pools */
/********************************************************************//**
Get total buffer pool statistics. */
UNIV_INTERN
void
@ -1566,10 +1582,11 @@ struct buf_page_struct{
to read this for heuristic
purposes without holding any
mutex or latch */
unsigned access_time:32; /*!< time of first access, or
0 if the block was never accessed
in the buffer pool */
/* @} */
unsigned access_time; /*!< time of first access, or
0 if the block was never accessed
in the buffer pool. Protected by
block mutex */
ibool space_was_being_deleted;
ibool is_corrupt;
# if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
@ -1759,6 +1776,8 @@ struct buf_pool_stat_struct{
young because the first access
was not long enough ago, in
buf_page_peek_if_too_old() */
ulint LRU_bytes; /*!< LRU size in bytes */
ulint flush_list_bytes;/*!< flush_list size in bytes */
};
/** Statistics of buddy blocks of a given size. */

View file

@ -665,19 +665,18 @@ UNIV_INLINE
void
buf_page_set_accessed(
/*==================*/
buf_page_t* bpage, /*!< in/out: control block */
ulint time_ms) /*!< in: ut_time_ms() */
buf_page_t* bpage) /*!< in/out: control block */
{
#ifdef UNIV_DEBUG
//buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
//ut_ad(buf_pool_mutex_own(buf_pool));
#endif
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
ut_ad(!buf_pool_mutex_own(buf_pool));
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
#endif
ut_a(buf_page_in_file(bpage));
if (!bpage->access_time) {
/* Make this the time of the first access. */
bpage->access_time = time_ms;
bpage->access_time = ut_time_ms();
}
}

View file

@ -158,7 +158,10 @@ buf_LRU_block_free_non_file_page(
buf_block_t* block, /*!< in: block, must not contain a file page */
ibool have_page_hash_mutex);
/******************************************************************//**
Adds a block to the LRU list. */
Adds a block to the LRU list. Please make sure that the zip_size is
already set into the page zip when invoking the function, so that we
can get correct zip_size from the buffer page when adding a block
into LRU */
UNIV_INTERN
void
buf_LRU_add_block(

View file

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -459,36 +459,18 @@ dtype_get_fixed_size_low(
} else if (!comp) {
return(len);
} else {
/* We play it safe here and ask MySQL for
mbminlen and mbmaxlen. Although
mbminlen and mbmaxlen are
initialized if and only if prtype
is (in one of the 3 functions in this file),
it could be that none of these functions
has been called. */
#ifdef UNIV_DEBUG
ulint i_mbminlen, i_mbmaxlen;
innobase_get_cset_width(
dtype_get_charset_coll(prtype),
&i_mbminlen, &i_mbmaxlen);
if (UNIV_UNLIKELY
(DATA_MBMINMAXLEN(i_mbminlen, i_mbmaxlen)
!= mbminmaxlen)) {
ut_print_timestamp(stderr);
fprintf(stderr, " InnoDB: "
"mbminlen=%lu, "
"mbmaxlen=%lu, "
"type->mbminlen=%lu, "
"type->mbmaxlen=%lu\n",
(ulong) i_mbminlen,
(ulong) i_mbmaxlen,
(ulong) DATA_MBMINLEN(mbminmaxlen),
(ulong) DATA_MBMAXLEN(mbminmaxlen));
}
if (i_mbminlen == i_mbmaxlen) {
ut_ad(DATA_MBMINMAXLEN(i_mbminlen, i_mbmaxlen)
== mbminmaxlen);
#endif /* UNIV_DEBUG */
if (DATA_MBMINLEN(mbminmaxlen)
== DATA_MBMAXLEN(mbminmaxlen)) {
return(len);
}
}

View file

@ -776,6 +776,21 @@ fil_space_set_corrupt(
/*==================*/
ulint space_id);
/****************************************************************//**
Generate redo logs for swapping two .ibd files */
UNIV_INTERN
void
fil_mtr_rename_log(
/*===============*/
ulint old_space_id, /*!< in: tablespace id of the old
table. */
const char* old_name, /*!< in: old table name */
ulint new_space_id, /*!< in: tablespace id of the new
table */
const char* new_name, /*!< in: new table name */
const char* tmp_name); /*!< in: temp table name used while
swapping */
typedef struct fil_space_struct fil_space_t;
#endif

View file

@ -798,14 +798,22 @@ lock_rec_get_page_no(
remains set when the waiting lock is granted,
or if the lock is inherited to a neighboring
record */
#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_MODE_MASK
#define LOCK_CONV_BY_OTHER 4096 /*!< this bit is set when the lock is created
by other transaction */
#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION|LOCK_CONV_BY_OTHER)&LOCK_MODE_MASK
# error
#endif
#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_TYPE_MASK
#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION|LOCK_CONV_BY_OTHER)&LOCK_TYPE_MASK
# error
#endif
/* @} */
/** Checks if this is a waiting lock created by lock->trx itself.
@param type_mode lock->type_mode
@return whether it is a waiting lock belonging to lock->trx */
#define lock_is_wait_not_by_other(type_mode) \
((type_mode & (LOCK_CONV_BY_OTHER | LOCK_WAIT)) == LOCK_WAIT)
/** Lock operation struct */
typedef struct lock_op_struct lock_op_t;
/** Lock operation struct */

View file

@ -361,24 +361,6 @@ rec_get_offsets_func(
#define rec_get_offsets(rec,index,offsets,n,heap) \
rec_get_offsets_func(rec,index,offsets,n,heap,__FILE__,__LINE__)
/******************************************************//**
Determine the offset to each field in a leaf-page record
in ROW_FORMAT=COMPACT. This is a special case of
rec_init_offsets() and rec_get_offsets_func(). */
UNIV_INTERN
void
rec_init_offsets_comp_ordinary(
/*===========================*/
const rec_t* rec, /*!< in: physical record in
ROW_FORMAT=COMPACT */
ulint extra, /*!< in: number of bytes to reserve
between the record header and
the data payload
(usually REC_N_NEW_EXTRA_BYTES) */
const dict_index_t* index, /*!< in: record descriptor */
ulint* offsets);/*!< in/out: array of offsets;
in: n=rec_offs_n_fields(offsets) */
/******************************************************//**
The following function determines the offsets to each field
in the record. It can reuse a previously allocated array. */
@ -644,8 +626,48 @@ rec_copy(
/*=====*/
void* buf, /*!< in: buffer */
const rec_t* rec, /*!< in: physical record */
const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
__attribute__((nonnull));
#ifndef UNIV_HOTBACKUP
/**********************************************************//**
Determines the size of a data tuple prefix in a temporary file.
@return total size */
UNIV_INTERN
ulint
rec_get_converted_size_temp(
/*========================*/
const dict_index_t* index, /*!< in: record descriptor */
const dfield_t* fields, /*!< in: array of data fields */
ulint n_fields,/*!< in: number of data fields */
ulint* extra) /*!< out: extra size */
__attribute__((warn_unused_result, nonnull));
/******************************************************//**
Determine the offset to each field in temporary file.
@see rec_convert_dtuple_to_temp() */
UNIV_INTERN
void
rec_init_offsets_temp(
/*==================*/
const rec_t* rec, /*!< in: temporary file record */
const dict_index_t* index, /*!< in: record descriptor */
ulint* offsets)/*!< in/out: array of offsets;
in: n=rec_offs_n_fields(offsets) */
__attribute__((nonnull));
/*********************************************************//**
Builds a temporary file record out of a data tuple.
@see rec_init_offsets_temp() */
UNIV_INTERN
void
rec_convert_dtuple_to_temp(
/*=======================*/
rec_t* rec, /*!< out: record */
const dict_index_t* index, /*!< in: record descriptor */
const dfield_t* fields, /*!< in: array of data fields */
ulint n_fields) /*!< in: number of fields */
__attribute__((nonnull));
/**************************************************************//**
Copies the first n fields of a physical record to a new physical record in
a buffer.
@ -680,21 +702,6 @@ rec_fold(
__attribute__((pure));
#endif /* !UNIV_HOTBACKUP */
/*********************************************************//**
Builds a ROW_FORMAT=COMPACT record out of a data tuple. */
UNIV_INTERN
void
rec_convert_dtuple_to_rec_comp(
/*===========================*/
rec_t* rec, /*!< in: origin of record */
ulint extra, /*!< in: number of bytes to
reserve between the record
header and the data payload
(normally REC_N_NEW_EXTRA_BYTES) */
const dict_index_t* index, /*!< in: record descriptor */
ulint status, /*!< in: status bits of the record */
const dfield_t* fields, /*!< in: array of data fields */
ulint n_fields);/*!< in: number of data fields */
/*********************************************************//**
Builds a physical record out of a data tuple and
stores it into the given buffer.
@return pointer to the origin of physical record */
@ -727,10 +734,7 @@ UNIV_INTERN
ulint
rec_get_converted_size_comp_prefix(
/*===============================*/
const dict_index_t* index, /*!< in: record descriptor;
dict_table_is_comp() is
assumed to hold, even if
it does not */
const dict_index_t* index, /*!< in: record descriptor */
const dfield_t* fields, /*!< in: array of data fields */
ulint n_fields,/*!< in: number of data fields */
ulint* extra); /*!< out: extra size */

View file

@ -317,6 +317,10 @@ extern ulint srv_fatal_semaphore_wait_threshold;
extern ulint srv_dml_needed_delay;
extern lint srv_kill_idle_transaction;
#ifdef UNIV_DEBUG
extern my_bool srv_purge_view_update_only_debug;
#endif /* UNIV_DEBUG */
extern mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs,
query threads, and lock table: we allocate
it from dynamic memory to get it to the
@ -397,6 +401,9 @@ extern ibool srv_blocking_lru_restore;
When FALSE, row locks are not taken at all. */
extern my_bool srv_fake_changes_locks;
/** print all user-level transactions deadlocks to mysqld stderr */
extern my_bool srv_print_all_deadlocks;
/** Status variables to be passed to MySQL */
typedef struct export_var_struct export_struc;
@ -791,7 +798,9 @@ struct export_var_struct{
ulint innodb_dict_tables;
ulint innodb_buffer_pool_pages_total; /*!< Buffer pool size */
ulint innodb_buffer_pool_pages_data; /*!< Data pages */
ulint innodb_buffer_pool_bytes_data; /*!< File bytes used */
ulint innodb_buffer_pool_pages_dirty; /*!< Dirty data pages */
ulint innodb_buffer_pool_bytes_dirty; /*!< File bytes modified */
ulint innodb_buffer_pool_pages_misc; /*!< Miscellanous pages */
ulint innodb_buffer_pool_pages_free; /*!< Free pages */
#ifdef UNIV_DEBUG
@ -877,6 +886,11 @@ struct export_var_struct{
ib_int64_t innodb_x_lock_os_waits;
ib_int64_t innodb_x_lock_spin_rounds;
ib_int64_t innodb_x_lock_spin_waits;
#ifdef UNIV_DEBUG
ulint innodb_purge_trx_id_age; /*!< max_trx_id - purged trx_id */
ulint innodb_purge_view_trx_id_age; /*!< rw_max_trx_id
- purged view's min trx_id */
#endif /* UNIV_DEBUG */
};
/** Thread slot in the thread table */

View file

@ -156,6 +156,10 @@ struct trx_purge_struct{
than this */
undo_no_t purge_undo_no; /*!< Purge has advanced past all records
whose undo number is less than this */
#ifdef UNIV_DEBUG
trx_id_t done_trx_no; /* Indicate 'purge pointer' which have
purged already accurately. */
#endif /* UNIV_DEBUG */
/*-----------------------------*/
ibool next_stored; /*!< TRUE if the info of the next record
to purge is stored below: if yes, then

View file

@ -49,14 +49,10 @@ Created 1/20/1994 Heikki Tuuri
#define _IB_TO_STR(s) #s
#define IB_TO_STR(s) _IB_TO_STR(s)
#define INNODB_VERSION_MAJOR 1
#define INNODB_VERSION_MINOR 1
#define INNODB_VERSION_BUGFIX 8
#ifndef PERCONA_INNODB_VERSION
#define PERCONA_INNODB_VERSION 29.3
#endif
#include <mysql_version.h>
#define INNODB_VERSION_MAJOR MYSQL_MAJOR_VERSION
#define INNODB_VERSION_MINOR MYSQL_MINOR_VERSION
/* The following is the InnoDB version as shown in
SELECT plugin_version FROM information_schema.plugins;
@ -67,11 +63,11 @@ component, i.e. we show M.N.P as M.N */
#define INNODB_VERSION_SHORT \
(INNODB_VERSION_MAJOR << 8 | INNODB_VERSION_MINOR)
#define INNODB_VERSION_STR \
IB_TO_STR(INNODB_VERSION_MAJOR) "." \
IB_TO_STR(INNODB_VERSION_MINOR) "." \
IB_TO_STR(INNODB_VERSION_BUGFIX) "-" \
IB_TO_STR(PERCONA_INNODB_VERSION)
#ifndef PERCONA_INNODB_VERSION
#define PERCONA_INNODB_VERSION 29.3
#endif
#define INNODB_VERSION_STR MYSQL_SERVER_VERSION "-" IB_TO_STR(PERCONA_INNODB_VERSION)
#define REFMAN "http://dev.mysql.com/doc/refman/" \
IB_TO_STR(MYSQL_MAJOR_VERSION) "." \

View file

@ -790,12 +790,16 @@ lock_reset_lock_and_trx_wait(
/*=========================*/
lock_t* lock) /*!< in: record lock */
{
ut_ad((lock->trx)->wait_lock == lock);
ut_ad(lock_get_wait(lock));
/* Reset the back pointer in trx to this waiting lock request */
(lock->trx)->wait_lock = NULL;
if (!(lock->type_mode & LOCK_CONV_BY_OTHER)) {
ut_ad((lock->trx)->wait_lock == lock);
(lock->trx)->wait_lock = NULL;
} else {
ut_ad(lock_get_type_low(lock) == LOCK_REC);
}
lock->type_mode &= ~LOCK_WAIT;
}
@ -1431,9 +1435,9 @@ lock_rec_has_expl(
while (lock) {
if (lock->trx == trx
&& !lock_is_wait_not_by_other(lock->type_mode)
&& lock_mode_stronger_or_eq(lock_get_mode(lock),
precise_mode & LOCK_MODE_MASK)
&& !lock_get_wait(lock)
&& (!lock_rec_get_rec_not_gap(lock)
|| (precise_mode & LOCK_REC_NOT_GAP)
|| heap_no == PAGE_HEAP_NO_SUPREMUM)
@ -1731,9 +1735,9 @@ lock_rec_create(
HASH_INSERT(lock_t, hash, lock_sys->rec_hash,
lock_rec_fold(space, page_no), lock);
lock_sys->rec_num++;
if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
lock_sys->rec_num++;
if (lock_is_wait_not_by_other(type_mode)) {
lock_set_lock_and_trx_wait(lock, trx);
}
@ -1763,10 +1767,11 @@ lock_rec_enqueue_waiting(
const buf_block_t* block, /*!< in: buffer block containing
the record */
ulint heap_no,/*!< in: heap number of the record */
lock_t* lock, /*!< in: lock object; NULL if a new
one should be created. */
dict_index_t* index, /*!< in: index of record */
que_thr_t* thr) /*!< in: query thread */
{
lock_t* lock;
trx_t* trx;
ulint sec;
ulint ms;
@ -1803,9 +1808,17 @@ lock_rec_enqueue_waiting(
ut_ad(0);
}
/* Enqueue the lock request that will wait to be granted */
lock = lock_rec_create(type_mode | LOCK_WAIT,
block, heap_no, index, trx);
if (lock == NULL) {
/* Enqueue the lock request that will wait to be granted */
lock = lock_rec_create(type_mode | LOCK_WAIT,
block, heap_no, index, trx);
} else {
ut_ad(lock->type_mode & LOCK_WAIT);
ut_ad(lock->type_mode & LOCK_CONV_BY_OTHER);
lock->type_mode &= ~LOCK_CONV_BY_OTHER;
lock_set_lock_and_trx_wait(lock, trx);
}
/* Check if a deadlock occurs: if yes, remove the lock request and
return an error code */
@ -1829,7 +1842,7 @@ lock_rec_enqueue_waiting(
trx->que_state = TRX_QUE_LOCK_WAIT;
trx->was_chosen_as_deadlock_victim = FALSE;
trx->wait_started = time(NULL);
if (innobase_get_slow_log() && trx->take_stats) {
if (UNIV_UNLIKELY(trx->take_stats)) {
ut_usectime(&sec, &ms);
trx->lock_que_wait_ustarted = (ib_uint64_t)sec * 1000000 + ms;
}
@ -2054,6 +2067,7 @@ lock_rec_lock_slow(
que_thr_t* thr) /*!< in: query thread */
{
trx_t* trx;
lock_t* lock;
ut_ad(mutex_own(&kernel_mutex));
ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
@ -2068,7 +2082,27 @@ lock_rec_lock_slow(
trx = thr_get_trx(thr);
if (lock_rec_has_expl(mode, block, heap_no, trx)) {
lock = lock_rec_has_expl(mode, block, heap_no, trx);
if (lock) {
if (lock->type_mode & LOCK_CONV_BY_OTHER) {
/* This lock or lock waiting was created by the other
transaction, not by the transaction (trx) itself.
So, the transaction (trx) should treat it collectly
according as whether granted or not. */
if (lock->type_mode & LOCK_WAIT) {
/* This lock request was not granted yet.
Should wait for granted. */
goto enqueue_waiting;
} else {
/* This lock request was already granted.
Just clearing the flag. */
lock->type_mode &= ~LOCK_CONV_BY_OTHER;
}
}
/* The trx already has a strong enough lock on rec: do
nothing */
@ -2078,8 +2112,10 @@ lock_rec_lock_slow(
the queue, as this transaction does not have a lock strong
enough already granted on the record, we have to wait. */
ut_ad(lock == NULL);
enqueue_waiting:
return(lock_rec_enqueue_waiting(mode, block, heap_no,
index, thr));
lock, index, thr));
} else if (!impl) {
/* Set the requested lock on the record */
@ -2221,7 +2257,8 @@ lock_grant(
TRX_QUE_LOCK_WAIT state, and there is no need to end the lock wait
for it */
if (lock->trx->que_state == TRX_QUE_LOCK_WAIT) {
if (!(lock->type_mode & LOCK_CONV_BY_OTHER)
&& lock->trx->que_state == TRX_QUE_LOCK_WAIT) {
trx_end_lock_wait(lock->trx);
}
}
@ -2238,6 +2275,7 @@ lock_rec_cancel(
{
ut_ad(mutex_own(&kernel_mutex));
ut_ad(lock_get_type_low(lock) == LOCK_REC);
ut_ad(!(lock->type_mode & LOCK_CONV_BY_OTHER));
/* Reset the bit (there can be only one set bit) in the lock bitmap */
lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock));
@ -2382,8 +2420,12 @@ lock_rec_reset_and_release_wait(
lock = lock_rec_get_first(block, heap_no);
while (lock != NULL) {
if (lock_get_wait(lock)) {
if (lock_is_wait_not_by_other(lock->type_mode)) {
lock_rec_cancel(lock);
} else if (lock_get_wait(lock)) {
/* just reset LOCK_WAIT */
lock_rec_reset_nth_bit(lock, heap_no);
lock_reset_lock_and_trx_wait(lock);
} else {
lock_rec_reset_nth_bit(lock, heap_no);
}
@ -3271,6 +3313,80 @@ lock_rec_restore_from_page_infimum(
/*=========== DEADLOCK CHECKING ======================================*/
/*********************************************************************//**
rewind(3) the file used for storing the latest detected deadlock and
print a heading message to stderr if printing of all deadlocks to stderr
is enabled. */
UNIV_INLINE
void
lock_deadlock_start_print()
/*=======================*/
{
rewind(lock_latest_err_file);
ut_print_timestamp(lock_latest_err_file);
if (srv_print_all_deadlocks) {
fprintf(stderr, "InnoDB: transactions deadlock detected, "
"dumping detailed information.\n");
ut_print_timestamp(stderr);
}
}
/*********************************************************************//**
Print a message to the deadlock file and possibly to stderr. */
UNIV_INLINE
void
lock_deadlock_fputs(
/*================*/
const char* msg) /*!< in: message to print */
{
fputs(msg, lock_latest_err_file);
if (srv_print_all_deadlocks) {
fputs(msg, stderr);
}
}
/*********************************************************************//**
Print transaction data to the deadlock file and possibly to stderr. */
UNIV_INLINE
void
lock_deadlock_trx_print(
/*====================*/
trx_t* trx, /*!< in: transaction */
ulint max_query_len) /*!< in: max query length to print, or 0 to
use the default max length */
{
trx_print(lock_latest_err_file, trx, max_query_len);
if (srv_print_all_deadlocks) {
trx_print(stderr, trx, max_query_len);
}
}
/*********************************************************************//**
Print lock data to the deadlock file and possibly to stderr. */
UNIV_INLINE
void
lock_deadlock_lock_print(
/*=====================*/
const lock_t* lock) /*!< in: record or table type lock */
{
if (lock_get_type_low(lock) == LOCK_REC) {
lock_rec_print(lock_latest_err_file, lock);
if (srv_print_all_deadlocks) {
lock_rec_print(stderr, lock);
}
} else {
lock_table_print(lock_latest_err_file, lock);
if (srv_print_all_deadlocks) {
lock_table_print(stderr, lock);
}
}
}
/********************************************************************//**
Checks if a lock request results in a deadlock.
@return TRUE if a deadlock was detected and we chose trx as a victim;
@ -3314,31 +3430,26 @@ retry:
/* If the lock search exceeds the max step
or the max depth, the current trx will be
the victim. Print its information. */
rewind(lock_latest_err_file);
ut_print_timestamp(lock_latest_err_file);
lock_deadlock_start_print();
fputs("TOO DEEP OR LONG SEARCH IN THE LOCK TABLE"
" WAITS-FOR GRAPH, WE WILL ROLL BACK"
" FOLLOWING TRANSACTION \n",
lock_latest_err_file);
lock_deadlock_fputs(
"TOO DEEP OR LONG SEARCH IN THE LOCK TABLE"
" WAITS-FOR GRAPH, WE WILL ROLL BACK"
" FOLLOWING TRANSACTION \n\n"
"*** TRANSACTION:\n");
fputs("\n*** TRANSACTION:\n", lock_latest_err_file);
trx_print(lock_latest_err_file, trx, 3000);
lock_deadlock_trx_print(trx, 3000);
fputs("*** WAITING FOR THIS LOCK TO BE GRANTED:\n",
lock_latest_err_file);
lock_deadlock_fputs(
"*** WAITING FOR THIS LOCK TO BE GRANTED:\n");
lock_deadlock_lock_print(lock);
if (lock_get_type(lock) == LOCK_REC) {
lock_rec_print(lock_latest_err_file, lock);
} else {
lock_table_print(lock_latest_err_file, lock);
}
break;
case LOCK_VICTIM_IS_START:
srv_n_lock_deadlock_count++;
fputs("*** WE ROLL BACK TRANSACTION (2)\n",
lock_latest_err_file);
lock_deadlock_fputs("*** WE ROLL BACK TRANSACTION (2)\n");
break;
default:
@ -3453,45 +3564,33 @@ lock_deadlock_recursive(
point: a deadlock detected; or we have
searched the waits-for graph too long */
FILE* ef = lock_latest_err_file;
lock_deadlock_start_print();
rewind(ef);
ut_print_timestamp(ef);
lock_deadlock_fputs("\n*** (1) TRANSACTION:\n");
fputs("\n*** (1) TRANSACTION:\n", ef);
lock_deadlock_trx_print(wait_lock->trx, 3000);
trx_print(ef, wait_lock->trx, 3000);
lock_deadlock_fputs(
"*** (1) WAITING FOR THIS LOCK"
" TO BE GRANTED:\n");
fputs("*** (1) WAITING FOR THIS LOCK"
" TO BE GRANTED:\n", ef);
lock_deadlock_lock_print(wait_lock);
if (lock_get_type_low(wait_lock) == LOCK_REC) {
lock_rec_print(ef, wait_lock);
} else {
lock_table_print(ef, wait_lock);
}
lock_deadlock_fputs("*** (2) TRANSACTION:\n");
fputs("*** (2) TRANSACTION:\n", ef);
lock_deadlock_trx_print(lock->trx, 3000);
trx_print(ef, lock->trx, 3000);
lock_deadlock_fputs(
"*** (2) HOLDS THE LOCK(S):\n");
fputs("*** (2) HOLDS THE LOCK(S):\n", ef);
lock_deadlock_lock_print(lock);
if (lock_get_type_low(lock) == LOCK_REC) {
lock_rec_print(ef, lock);
} else {
lock_table_print(ef, lock);
}
lock_deadlock_fputs(
"*** (2) WAITING FOR THIS LOCK"
" TO BE GRANTED:\n");
fputs("*** (2) WAITING FOR THIS LOCK"
" TO BE GRANTED:\n", ef);
lock_deadlock_lock_print(start->wait_lock);
if (lock_get_type_low(start->wait_lock)
== LOCK_REC) {
lock_rec_print(ef, start->wait_lock);
} else {
lock_table_print(ef, start->wait_lock);
}
#ifdef UNIV_DEBUG
if (lock_print_waits) {
fputs("Deadlock detected\n",
@ -3514,8 +3613,8 @@ lock_deadlock_recursive(
as a victim to try to avoid deadlocking our
recursion starting point transaction */
fputs("*** WE ROLL BACK TRANSACTION (1)\n",
ef);
lock_deadlock_fputs(
"*** WE ROLL BACK TRANSACTION (1)\n");
wait_lock->trx->was_chosen_as_deadlock_victim
= TRUE;
@ -3600,6 +3699,7 @@ lock_table_create(
ut_ad(table && trx);
ut_ad(mutex_own(&kernel_mutex));
ut_ad(!(type_mode & LOCK_CONV_BY_OTHER));
if ((type_mode & LOCK_MODE_MASK) == LOCK_AUTO_INC) {
++table->n_waiting_or_granted_auto_inc_locks;
@ -3837,7 +3937,7 @@ lock_table_enqueue_waiting(
return(DB_SUCCESS);
}
if (innobase_get_slow_log() && trx->take_stats) {
if (UNIV_UNLIKELY(trx->take_stats)) {
ut_usectime(&sec, &ms);
trx->lock_que_wait_ustarted = (ib_uint64_t)sec * 1000000 + ms;
}
@ -4163,6 +4263,7 @@ lock_cancel_waiting_and_release(
lock_t* lock) /*!< in: waiting lock request */
{
ut_ad(mutex_own(&kernel_mutex));
ut_ad(!(lock->type_mode & LOCK_CONV_BY_OTHER));
if (lock_get_type_low(lock) == LOCK_REC) {
@ -5215,7 +5316,7 @@ lock_rec_insert_check_and_lock(
err = lock_rec_enqueue_waiting(LOCK_X | LOCK_GAP
| LOCK_INSERT_INTENTION,
block, next_rec_heap_no,
index, thr);
NULL, index, thr);
} else {
err = DB_SUCCESS;
}
@ -5291,10 +5392,23 @@ lock_rec_convert_impl_to_expl(
if (!lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block,
heap_no, impl_trx)) {
ulint type_mode = (LOCK_REC | LOCK_X
| LOCK_REC_NOT_GAP);
/* If the delete-marked record was locked already,
we should reserve lock waiting for impl_trx as
implicit lock. Because cannot lock at this moment.*/
if (rec_get_deleted_flag(rec, rec_offs_comp(offsets))
&& lock_rec_other_has_conflicting(
LOCK_X | LOCK_REC_NOT_GAP, block,
heap_no, impl_trx)) {
type_mode |= (LOCK_WAIT | LOCK_CONV_BY_OTHER);
}
lock_rec_add_to_queue(
LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP,
block, heap_no, index, impl_trx);
type_mode, block, heap_no, index, impl_trx);
}
}
}

View file

@ -987,8 +987,11 @@ recv_parse_or_apply_log_rec_body(
not NULL, then the log record is
applied to the page, and the log
record should be complete then */
mtr_t* mtr) /*!< in: mtr or NULL; should be non-NULL
mtr_t* mtr, /*!< in: mtr or NULL; should be non-NULL
if and only if block is non-NULL */
ulint space_id)
/*!< in: tablespace id obtained by
parsing initial log record */
{
dict_index_t* index = NULL;
page_t* page;
@ -1260,8 +1263,11 @@ recv_parse_or_apply_log_rec_body(
ut_ad(!page || page_type != FIL_PAGE_TYPE_ALLOCATED);
ptr = mlog_parse_string(ptr, end_ptr, page, page_zip);
break;
case MLOG_FILE_CREATE:
case MLOG_FILE_RENAME:
ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type,
space_id, 0);
break;
case MLOG_FILE_CREATE:
case MLOG_FILE_DELETE:
case MLOG_FILE_CREATE2:
ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, 0, 0);
@ -1665,7 +1671,8 @@ recv_recover_page_func(
recv_parse_or_apply_log_rec_body(recv->type, buf,
buf + recv->len,
block, &mtr);
block, &mtr,
recv_addr->space);
if (srv_recovery_stats) {
mutex_enter(&(recv_sys->mutex));
@ -2150,7 +2157,7 @@ recv_parse_log_rec(
#endif /* UNIV_LOG_LSN_DEBUG */
new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr,
NULL, NULL);
NULL, NULL, *space);
if (UNIV_UNLIKELY(new_ptr == NULL)) {
return(0);

View file

@ -2289,7 +2289,7 @@ os_file_pread(
os_n_file_reads++;
if (innobase_get_slow_log() && trx && trx->take_stats)
if (UNIV_UNLIKELY(trx && trx->take_stats))
{
trx->io_reads++;
trx->io_read += n;
@ -2322,7 +2322,7 @@ os_file_pread(
os_n_pending_reads--;
os_mutex_exit(os_file_count_mutex);
if (innobase_get_slow_log() && trx && trx->take_stats && start_time)
if (UNIV_UNLIKELY(start_time != 0))
{
ut_usectime(&sec, &ms);
finish_time = (ib_uint64_t)sec * 1000000 + ms;
@ -2376,7 +2376,7 @@ os_file_pread(
os_n_pending_reads--;
os_mutex_exit(os_file_count_mutex);
if (innobase_get_slow_log() && trx && trx->take_stats && start_time)
if (UNIV_UNLIKELY(start_time != 0)
{
ut_usectime(&sec, &ms);
finish_time = (ib_uint64_t)sec * 1000000 + ms;

View file

@ -167,7 +167,6 @@ rec_get_n_extern_new(
{
const byte* nulls;
const byte* lens;
dict_field_t* field;
ulint null_mask;
ulint n_extern;
ulint i;
@ -188,10 +187,13 @@ rec_get_n_extern_new(
/* read the lengths of fields 0..n */
do {
ulint len;
const dict_field_t* field
= dict_index_get_nth_field(index, i);
const dict_col_t* col
= dict_field_get_col(field);
ulint len;
field = dict_index_get_nth_field(index, i);
if (!(dict_field_get_col(field)->prtype & DATA_NOT_NULL)) {
if (!(col->prtype & DATA_NOT_NULL)) {
/* nullable field => read the null flag */
if (UNIV_UNLIKELY(!(byte) null_mask)) {
@ -209,8 +211,6 @@ rec_get_n_extern_new(
if (UNIV_UNLIKELY(!field->fixed_len)) {
/* Variable-length field: read the length */
const dict_col_t* col
= dict_field_get_col(field);
len = *lens--;
/* If the maximum length of the field is up
to 255 bytes, the actual length is always
@ -239,16 +239,15 @@ rec_get_n_extern_new(
Determine the offset to each field in a leaf-page record
in ROW_FORMAT=COMPACT. This is a special case of
rec_init_offsets() and rec_get_offsets_func(). */
UNIV_INTERN
UNIV_INLINE __attribute__((nonnull))
void
rec_init_offsets_comp_ordinary(
/*===========================*/
const rec_t* rec, /*!< in: physical record in
ROW_FORMAT=COMPACT */
ulint extra, /*!< in: number of bytes to reserve
between the record header and
the data payload
(usually REC_N_NEW_EXTRA_BYTES) */
ibool temp, /*!< in: whether to use the
format for temporary files in
index creation */
const dict_index_t* index, /*!< in: record descriptor */
ulint* offsets)/*!< in/out: array of offsets;
in: n=rec_offs_n_fields(offsets) */
@ -256,27 +255,38 @@ rec_init_offsets_comp_ordinary(
ulint i = 0;
ulint offs = 0;
ulint any_ext = 0;
const byte* nulls = rec - (extra + 1);
const byte* nulls = temp
? rec - 1
: rec - (1 + REC_N_NEW_EXTRA_BYTES);
const byte* lens = nulls
- UT_BITS_IN_BYTES(index->n_nullable);
dict_field_t* field;
ulint null_mask = 1;
#ifdef UNIV_DEBUG
/* We cannot invoke rec_offs_make_valid() here, because it can hold
that extra != REC_N_NEW_EXTRA_BYTES. Similarly, rec_offs_validate()
will fail in that case, because it invokes rec_get_status(). */
/* We cannot invoke rec_offs_make_valid() here if temp=TRUE.
Similarly, rec_offs_validate() will fail in that case, because
it invokes rec_get_status(). */
offsets[2] = (ulint) rec;
offsets[3] = (ulint) index;
#endif /* UNIV_DEBUG */
ut_ad(temp || dict_table_is_comp(index->table));
if (temp && dict_table_is_comp(index->table)) {
/* No need to do adjust fixed_len=0. We only need to
adjust it for ROW_FORMAT=REDUNDANT. */
temp = FALSE;
}
/* read the lengths of fields 0..n */
do {
ulint len;
const dict_field_t* field
= dict_index_get_nth_field(index, i);
const dict_col_t* col
= dict_field_get_col(field);
ulint len;
field = dict_index_get_nth_field(index, i);
if (!(dict_field_get_col(field)->prtype
& DATA_NOT_NULL)) {
if (!(col->prtype & DATA_NOT_NULL)) {
/* nullable field => read the null flag */
if (UNIV_UNLIKELY(!(byte) null_mask)) {
@ -296,10 +306,9 @@ rec_init_offsets_comp_ordinary(
null_mask <<= 1;
}
if (UNIV_UNLIKELY(!field->fixed_len)) {
if (!field->fixed_len
|| (temp && !dict_col_get_fixed_size(col, temp))) {
/* Variable-length field: read the length */
const dict_col_t* col
= dict_field_get_col(field);
len = *lens--;
/* If the maximum length of the field is up
to 255 bytes, the actual length is always
@ -393,9 +402,8 @@ rec_init_offsets(
= dict_index_get_n_unique_in_tree(index);
break;
case REC_STATUS_ORDINARY:
rec_init_offsets_comp_ordinary(rec,
REC_N_NEW_EXTRA_BYTES,
index, offsets);
rec_init_offsets_comp_ordinary(
rec, FALSE, index, offsets);
return;
}
@ -766,17 +774,19 @@ rec_get_nth_field_offs_old(
/**********************************************************//**
Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
@return total size */
UNIV_INTERN
UNIV_INLINE __attribute__((warn_unused_result, nonnull(1,2)))
ulint
rec_get_converted_size_comp_prefix(
/*===============================*/
rec_get_converted_size_comp_prefix_low(
/*===================================*/
const dict_index_t* index, /*!< in: record descriptor;
dict_table_is_comp() is
assumed to hold, even if
it does not */
const dfield_t* fields, /*!< in: array of data fields */
ulint n_fields,/*!< in: number of data fields */
ulint* extra) /*!< out: extra size */
ulint* extra, /*!< out: extra size */
ibool temp) /*!< in: whether this is a
temporary file record */
{
ulint extra_size;
ulint data_size;
@ -785,15 +795,25 @@ rec_get_converted_size_comp_prefix(
ut_ad(fields);
ut_ad(n_fields > 0);
ut_ad(n_fields <= dict_index_get_n_fields(index));
ut_ad(!temp || extra);
extra_size = REC_N_NEW_EXTRA_BYTES
extra_size = temp
? UT_BITS_IN_BYTES(index->n_nullable)
: REC_N_NEW_EXTRA_BYTES
+ UT_BITS_IN_BYTES(index->n_nullable);
data_size = 0;
if (temp && dict_table_is_comp(index->table)) {
/* No need to do adjust fixed_len=0. We only need to
adjust it for ROW_FORMAT=REDUNDANT. */
temp = FALSE;
}
/* read the lengths of fields 0..n */
for (i = 0; i < n_fields; i++) {
const dict_field_t* field;
ulint len;
ulint fixed_len;
const dict_col_t* col;
field = dict_index_get_nth_field(index, i);
@ -809,8 +829,14 @@ rec_get_converted_size_comp_prefix(
continue;
}
ut_ad(len <= col->len || col->mtype == DATA_BLOB);
ut_ad(len <= col->len || col->mtype == DATA_BLOB
|| (col->len == 0 && col->mtype == DATA_VARCHAR));
fixed_len = field->fixed_len;
if (temp && fixed_len
&& !dict_col_get_fixed_size(col, temp)) {
fixed_len = 0;
}
/* If the maximum length of a variable-length field
is up to 255 bytes, the actual length is always stored
in one byte. If the maximum length is more than 255
@ -818,11 +844,20 @@ rec_get_converted_size_comp_prefix(
0..127. The length will be encoded in two bytes when
it is 128 or more, or when the field is stored externally. */
if (field->fixed_len) {
ut_ad(len == field->fixed_len);
if (fixed_len) {
#ifdef UNIV_DEBUG
ulint mbminlen = DATA_MBMINLEN(col->mbminmaxlen);
ulint mbmaxlen = DATA_MBMAXLEN(col->mbminmaxlen);
ut_ad(len <= fixed_len);
ut_ad(!mbmaxlen || len >= mbminlen
* (fixed_len / mbmaxlen));
/* dict_index_add_col() should guarantee this */
ut_ad(!field->prefix_len
|| field->fixed_len == field->prefix_len);
|| fixed_len == field->prefix_len);
#endif /* UNIV_DEBUG */
} else if (dfield_is_ext(&fields[i])) {
ut_ad(col->len >= 256 || col->mtype == DATA_BLOB);
extra_size += 2;
@ -839,13 +874,30 @@ rec_get_converted_size_comp_prefix(
data_size += len;
}
if (UNIV_LIKELY_NULL(extra)) {
if (extra) {
*extra = extra_size;
}
return(extra_size + data_size);
}
/**********************************************************//**
Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
@return total size */
UNIV_INTERN
ulint
rec_get_converted_size_comp_prefix(
/*===============================*/
const dict_index_t* index, /*!< in: record descriptor */
const dfield_t* fields, /*!< in: array of data fields */
ulint n_fields,/*!< in: number of data fields */
ulint* extra) /*!< out: extra size */
{
ut_ad(dict_table_is_comp(index->table));
return(rec_get_converted_size_comp_prefix_low(
index, fields, n_fields, extra, FALSE));
}
/**********************************************************//**
Determines the size of a data tuple in ROW_FORMAT=COMPACT.
@return total size */
@ -890,8 +942,8 @@ rec_get_converted_size_comp(
return(ULINT_UNDEFINED);
}
return(size + rec_get_converted_size_comp_prefix(index, fields,
n_fields, extra));
return(size + rec_get_converted_size_comp_prefix_low(
index, fields, n_fields, extra, FALSE));
}
/***********************************************************//**
@ -1068,19 +1120,18 @@ rec_convert_dtuple_to_rec_old(
/*********************************************************//**
Builds a ROW_FORMAT=COMPACT record out of a data tuple. */
UNIV_INTERN
UNIV_INLINE __attribute__((nonnull))
void
rec_convert_dtuple_to_rec_comp(
/*===========================*/
rec_t* rec, /*!< in: origin of record */
ulint extra, /*!< in: number of bytes to
reserve between the record
header and the data payload
(normally REC_N_NEW_EXTRA_BYTES) */
const dict_index_t* index, /*!< in: record descriptor */
ulint status, /*!< in: status bits of the record */
const dfield_t* fields, /*!< in: array of data fields */
ulint n_fields)/*!< in: number of data fields */
ulint n_fields,/*!< in: number of data fields */
ulint status, /*!< in: status bits of the record */
ibool temp) /*!< in: whether to use the
format for temporary files in
index creation */
{
const dfield_t* field;
const dtype_t* type;
@ -1092,31 +1143,44 @@ rec_convert_dtuple_to_rec_comp(
ulint n_node_ptr_field;
ulint fixed_len;
ulint null_mask = 1;
ut_ad(extra == 0 || dict_table_is_comp(index->table));
ut_ad(extra == 0 || extra == REC_N_NEW_EXTRA_BYTES);
ut_ad(temp || dict_table_is_comp(index->table));
ut_ad(n_fields > 0);
switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
case REC_STATUS_ORDINARY:
if (temp) {
ut_ad(status == REC_STATUS_ORDINARY);
ut_ad(n_fields <= dict_index_get_n_fields(index));
n_node_ptr_field = ULINT_UNDEFINED;
break;
case REC_STATUS_NODE_PTR:
ut_ad(n_fields == dict_index_get_n_unique_in_tree(index) + 1);
n_node_ptr_field = n_fields - 1;
break;
case REC_STATUS_INFIMUM:
case REC_STATUS_SUPREMUM:
ut_ad(n_fields == 1);
n_node_ptr_field = ULINT_UNDEFINED;
break;
default:
ut_error;
return;
nulls = rec - 1;
if (dict_table_is_comp(index->table)) {
/* No need to do adjust fixed_len=0. We only
need to adjust it for ROW_FORMAT=REDUNDANT. */
temp = FALSE;
}
} else {
nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
case REC_STATUS_ORDINARY:
ut_ad(n_fields <= dict_index_get_n_fields(index));
n_node_ptr_field = ULINT_UNDEFINED;
break;
case REC_STATUS_NODE_PTR:
ut_ad(n_fields
== dict_index_get_n_unique_in_tree(index) + 1);
n_node_ptr_field = n_fields - 1;
break;
case REC_STATUS_INFIMUM:
case REC_STATUS_SUPREMUM:
ut_ad(n_fields == 1);
n_node_ptr_field = ULINT_UNDEFINED;
break;
default:
ut_error;
return;
}
}
end = rec;
nulls = rec - (extra + 1);
lens = nulls - UT_BITS_IN_BYTES(index->n_nullable);
/* clear the SQL-null flags */
memset(lens + 1, 0, nulls - lens);
@ -1162,6 +1226,10 @@ rec_convert_dtuple_to_rec_comp(
ifield = dict_index_get_nth_field(index, i);
fixed_len = ifield->fixed_len;
if (temp && fixed_len
&& !dict_col_get_fixed_size(ifield->col, temp)) {
fixed_len = 0;
}
/* If the maximum length of a variable-length field
is up to 255 bytes, the actual length is always stored
in one byte. If the maximum length is more than 255
@ -1169,8 +1237,17 @@ rec_convert_dtuple_to_rec_comp(
0..127. The length will be encoded in two bytes when
it is 128 or more, or when the field is stored externally. */
if (fixed_len) {
ut_ad(len == fixed_len);
#ifdef UNIV_DEBUG
ulint mbminlen = DATA_MBMINLEN(
ifield->col->mbminmaxlen);
ulint mbmaxlen = DATA_MBMAXLEN(
ifield->col->mbminmaxlen);
ut_ad(len <= fixed_len);
ut_ad(!mbmaxlen || len >= mbminlen
* (fixed_len / mbmaxlen));
ut_ad(!dfield_is_ext(field));
#endif /* UNIV_DEBUG */
} else if (dfield_is_ext(field)) {
ut_ad(ifield->col->len >= 256
|| ifield->col->mtype == DATA_BLOB);
@ -1222,8 +1299,7 @@ rec_convert_dtuple_to_rec_new(
rec = buf + extra_size;
rec_convert_dtuple_to_rec_comp(
rec, REC_N_NEW_EXTRA_BYTES, index, status,
dtuple->fields, dtuple->n_fields);
rec, index, dtuple->fields, dtuple->n_fields, status, FALSE);
/* Set the info bits of the record */
rec_set_info_and_status_bits(rec, dtuple_get_info_bits(dtuple));
@ -1285,6 +1361,54 @@ rec_convert_dtuple_to_rec(
return(rec);
}
#ifndef UNIV_HOTBACKUP
/**********************************************************//**
Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
@return total size */
UNIV_INTERN
ulint
rec_get_converted_size_temp(
/*========================*/
const dict_index_t* index, /*!< in: record descriptor */
const dfield_t* fields, /*!< in: array of data fields */
ulint n_fields,/*!< in: number of data fields */
ulint* extra) /*!< out: extra size */
{
return(rec_get_converted_size_comp_prefix_low(
index, fields, n_fields, extra, TRUE));
}
/******************************************************//**
Determine the offset to each field in temporary file.
@see rec_convert_dtuple_to_temp() */
UNIV_INTERN
void
rec_init_offsets_temp(
/*==================*/
const rec_t* rec, /*!< in: temporary file record */
const dict_index_t* index, /*!< in: record descriptor */
ulint* offsets)/*!< in/out: array of offsets;
in: n=rec_offs_n_fields(offsets) */
{
rec_init_offsets_comp_ordinary(rec, TRUE, index, offsets);
}
/*********************************************************//**
Builds a temporary file record out of a data tuple.
@see rec_init_offsets_temp() */
UNIV_INTERN
void
rec_convert_dtuple_to_temp(
/*=======================*/
rec_t* rec, /*!< out: record */
const dict_index_t* index, /*!< in: record descriptor */
const dfield_t* fields, /*!< in: array of data fields */
ulint n_fields) /*!< in: number of fields */
{
rec_convert_dtuple_to_rec_comp(rec, index, fields, n_fields,
REC_STATUS_ORDINARY, TRUE);
}
/**************************************************************//**
Copies the first n fields of a physical record to a data tuple. The fields
are copied to the memory heap. */
@ -1495,6 +1619,7 @@ rec_copy_prefix_to_buf(
return(*buf + (rec - (lens + 1)));
}
#endif /* UNIV_HOTBACKUP */
/***************************************************************//**
Validates the consistency of an old-style physical record.

View file

@ -2288,7 +2288,10 @@ row_ins_index_entry(
err = row_ins_index_entry_low(BTR_MODIFY_LEAF, index, entry,
n_ext, thr);
if (err != DB_FAIL) {
if (index == dict_table_get_first_index(index->table)
&& thr_get_trx(thr)->mysql_thd != 0) {
DEBUG_SYNC_C("row_ins_clust_index_entry_leaf_after");
}
return(err);
}

View file

@ -301,6 +301,7 @@ row_merge_buf_add(
for (i = 0; i < n_fields; i++, field++, ifield++) {
const dict_col_t* col;
ulint col_no;
ulint fixed_len;
const dfield_t* row_field;
ulint len;
@ -349,9 +350,30 @@ row_merge_buf_add(
ut_ad(len <= col->len || col->mtype == DATA_BLOB);
if (ifield->fixed_len) {
ut_ad(len == ifield->fixed_len);
fixed_len = ifield->fixed_len;
if (fixed_len && !dict_table_is_comp(index->table)
&& DATA_MBMINLEN(col->mbminmaxlen)
!= DATA_MBMAXLEN(col->mbminmaxlen)) {
/* CHAR in ROW_FORMAT=REDUNDANT is always
fixed-length, but in the temporary file it is
variable-length for variable-length character
sets. */
fixed_len = 0;
}
if (fixed_len) {
#ifdef UNIV_DEBUG
ulint mbminlen = DATA_MBMINLEN(col->mbminmaxlen);
ulint mbmaxlen = DATA_MBMAXLEN(col->mbminmaxlen);
/* len should be between size calcualted base on
mbmaxlen and mbminlen */
ut_ad(len <= fixed_len);
ut_ad(!mbmaxlen || len >= mbminlen
* (fixed_len / mbmaxlen));
ut_ad(!dfield_is_ext(field));
#endif /* UNIV_DEBUG */
} else if (dfield_is_ext(field)) {
extra_size += 2;
} else if (len < 128
@ -372,12 +394,11 @@ row_merge_buf_add(
ulint size;
ulint extra;
size = rec_get_converted_size_comp(index,
REC_STATUS_ORDINARY,
entry, n_fields, &extra);
size = rec_get_converted_size_temp(
index, entry, n_fields, &extra);
ut_ad(data_size + extra_size + REC_N_NEW_EXTRA_BYTES == size);
ut_ad(extra_size + REC_N_NEW_EXTRA_BYTES == extra);
ut_ad(data_size + extra_size == size);
ut_ad(extra_size == extra);
}
#endif /* UNIV_DEBUG */
@ -581,14 +602,9 @@ row_merge_buf_write(
ulint extra_size;
const dfield_t* entry = buf->tuples[i];
size = rec_get_converted_size_comp(index,
REC_STATUS_ORDINARY,
entry, n_fields,
&extra_size);
size = rec_get_converted_size_temp(
index, entry, n_fields, &extra_size);
ut_ad(size >= extra_size);
ut_ad(extra_size >= REC_N_NEW_EXTRA_BYTES);
extra_size -= REC_N_NEW_EXTRA_BYTES;
size -= REC_N_NEW_EXTRA_BYTES;
/* Encode extra_size + 1 */
if (extra_size + 1 < 0x80) {
@ -601,9 +617,8 @@ row_merge_buf_write(
ut_ad(b + size < block[1]);
rec_convert_dtuple_to_rec_comp(b + extra_size, 0, index,
REC_STATUS_ORDINARY,
entry, n_fields);
rec_convert_dtuple_to_temp(b + extra_size, index,
entry, n_fields);
b += size;
@ -709,6 +724,8 @@ row_merge_read(
ib_uint64_t ofs = ((ib_uint64_t) offset) * block_size;
ibool success;
DBUG_EXECUTE_IF("row_merge_read_failure", return(FALSE););
#ifdef UNIV_DEBUG
if (row_merge_print_block_read) {
fprintf(stderr, "row_merge_read fd=%d ofs=%lu\n",
@ -756,6 +773,8 @@ row_merge_write(
(ulint) (ofs >> 32),
block_size);
DBUG_EXECUTE_IF("row_merge_write_failure", return(FALSE););
#ifdef UNIV_DEBUG
if (row_merge_print_block_write) {
fprintf(stderr, "row_merge_write fd=%d ofs=%lu\n",
@ -872,7 +891,7 @@ err_exit:
*mrec = *buf + extra_size;
rec_init_offsets_comp_ordinary(*mrec, 0, index, offsets);
rec_init_offsets_temp(*mrec, index, offsets);
data_size = rec_offs_data_size(offsets);
@ -891,7 +910,7 @@ err_exit:
*mrec = b + extra_size;
rec_init_offsets_comp_ordinary(*mrec, 0, index, offsets);
rec_init_offsets_temp(*mrec, index, offsets);
data_size = rec_offs_data_size(offsets);
ut_ad(extra_size + data_size < block_size);
@ -2261,7 +2280,7 @@ row_merge_drop_temp_indexes(void)
/*********************************************************************//**
Creates temperary merge files, and if UNIV_PFS_IO defined, register
the file descriptor with Performance Schema.
@return File descriptor */
@return file descriptor, or -1 on failure */
UNIV_INLINE
int
row_merge_file_create_low(void)
@ -2283,12 +2302,19 @@ row_merge_file_create_low(void)
#ifdef UNIV_PFS_IO
register_pfs_file_open_end(locker, fd);
#endif
if (fd < 0) {
fprintf(stderr,
"InnoDB: Error: Cannot create temporary merge file\n");
return(-1);
}
return(fd);
}
/*********************************************************************//**
Create a merge file. */
static
void
Create a merge file.
@return file descriptor, or -1 on failure */
static __attribute__((nonnull, warn_unused_result))
int
row_merge_file_create(
/*==================*/
merge_file_t* merge_file) /*!< out: merge file structure */
@ -2296,6 +2322,7 @@ row_merge_file_create(
merge_file->fd = row_merge_file_create_low();
merge_file->offset = 0;
merge_file->n_rec = 0;
return(merge_file->fd);
}
/*********************************************************************//**
@ -2541,6 +2568,28 @@ row_merge_rename_tables(
goto err_exit;
}
/* Generate the redo logs for file operations */
fil_mtr_rename_log(old_table->space, old_name,
new_table->space, new_table->name, tmp_name);
/* What if the redo logs are flushed to disk here? This is
tested with following crash point */
DBUG_EXECUTE_IF("bug14669848_precommit", log_buffer_flush_to_disk();
DBUG_SUICIDE(););
/* File operations cannot be rolled back. So, before proceeding
with file operations, commit the dictionary changes.*/
trx_commit_for_mysql(trx);
/* If server crashes here, the dictionary in InnoDB and MySQL
will differ. The .ibd files and the .frm files must be swapped
manually by the administrator. No loss of data. */
DBUG_EXECUTE_IF("bug14669848", DBUG_SUICIDE(););
/* Ensure that the redo logs are flushed to disk. The config
innodb_flush_log_at_trx_commit must not affect this. */
log_buffer_flush_to_disk();
/* The following calls will also rename the .ibd data files if
the tables are stored in a single-table tablespace */
@ -2715,7 +2764,7 @@ row_merge_build_indexes(
ulint block_size;
ulint i;
ulint error;
int tmpfd;
int tmpfd = -1;
ulint merge_sort_block_size;
void* block_mem;
@ -2741,13 +2790,31 @@ row_merge_build_indexes(
i * merge_sort_block_size);
}
/* Initialize all the merge file descriptors, so that we
don't call row_merge_file_destroy() on uninitialized
merge file descriptor */
for (i = 0; i < n_indexes; i++) {
merge_files[i].fd = -1;
}
for (i = 0; i < n_indexes; i++) {
row_merge_file_create(&merge_files[i]);
if (row_merge_file_create(&merge_files[i]) < 0)
{
error = DB_OUT_OF_MEMORY;
goto func_exit;
}
}
tmpfd = row_merge_file_create_low();
if (tmpfd < 0)
{
error = DB_OUT_OF_MEMORY;
goto func_exit;
}
/* Reset the MySQL row buffer that is used when reporting
duplicate keys. */
innobase_rec_reset(table);

View file

@ -4272,6 +4272,13 @@ end:
trx->error_state = DB_SUCCESS;
trx_general_rollback_for_mysql(trx, NULL);
trx->error_state = DB_SUCCESS;
} else {
if (old_is_tmp && !new_is_tmp) {
/* After ALTER TABLE the table statistics
needs to be rebuilt. It will be rebuilt
when the table is loaded again. */
table->stat_initialized = FALSE;
}
}
}

View file

@ -3520,13 +3520,13 @@ row_search_for_mysql(
should_release = 0;
for (i = 0; i < btr_search_index_num; i++) {
/* we should check all latches (fix Bug#791030) */
if (rw_lock_get_writer(btr_search_latch_part[i])
!= RW_LOCK_NOT_LOCKED) {
if (UNIV_UNLIKELY(rw_lock_get_writer(btr_search_latch_part[i])
!= RW_LOCK_NOT_LOCKED)) {
should_release |= ((ulint)1 << i);
}
}
if (should_release) {
if (UNIV_UNLIKELY(should_release)) {
/* There is an x-latch request on the adaptive hash index:
release the s-latch to reduce starvation and wait for

View file

@ -519,6 +519,7 @@ row_undo_mod_upd_del_sec(
ulint err = DB_SUCCESS;
ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
ut_ad(!node->undo_row);
heap = mem_heap_create(1024);
while (node->index != NULL) {
@ -580,6 +581,8 @@ row_undo_mod_del_mark_sec(
dict_index_t* index;
ulint err;
ut_ad(!node->undo_row);
heap = mem_heap_create(1024);
while (node->index != NULL) {

View file

@ -216,7 +216,7 @@ row_undo_search_clust_to_pcur(
node->row = row_build(ROW_COPY_DATA, clust_index, rec,
offsets, NULL, ext, node->heap);
if (node->update) {
if (node->rec_type == TRX_UNDO_UPD_EXIST_REC) {
node->undo_row = dtuple_copy(node->row, node->heap);
row_upd_replace(node->undo_row, &node->undo_ext,
clust_index, node->update, node->heap);

View file

@ -58,6 +58,8 @@ Created 10/8/1995 Heikki Tuuri
*******************************************************/
/* Dummy comment */
#include "m_string.h" /* for my_sys.h */
#include "my_sys.h" /* DEBUG_SYNC_C */
#include "srv0srv.h"
#include "ut0mem.h"
@ -312,58 +314,11 @@ UNIV_INTERN ulong srv_purge_batch_size = 20;
/* the number of rollback segments to use */
UNIV_INTERN ulong srv_rollback_segments = TRX_SYS_N_RSEGS;
/* variable counts amount of data read in total (in bytes) */
UNIV_INTERN ulint srv_data_read = 0;
/* Internal setting for "innodb_stats_method". Decides how InnoDB treats
NULL value when collecting statistics. By default, it is set to
SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */
ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL;
/* here we count the amount of data written in total (in bytes) */
UNIV_INTERN ulint srv_data_written = 0;
/* the number of the log write requests done */
UNIV_INTERN ulint srv_log_write_requests = 0;
/* the number of physical writes to the log performed */
UNIV_INTERN ulint srv_log_writes = 0;
/* amount of data written to the log files in bytes */
UNIV_INTERN ulint srv_os_log_written = 0;
/* amount of writes being done to the log files */
UNIV_INTERN ulint srv_os_log_pending_writes = 0;
/* we increase this counter, when there we don't have enough space in the
log buffer and have to flush it */
UNIV_INTERN ulint srv_log_waits = 0;
/* this variable counts the amount of times, when the doublewrite buffer
was flushed */
UNIV_INTERN ulint srv_dblwr_writes = 0;
/* here we store the number of pages that have been flushed to the
doublewrite buffer */
UNIV_INTERN ulint srv_dblwr_pages_written = 0;
/* in this variable we store the number of write requests issued */
UNIV_INTERN ulint srv_buf_pool_write_requests = 0;
/* here we store the number of times when we had to wait for a free page
in the buffer pool. It happens when the buffer pool is full and we need
to make a flush, in order to be able to read or create a page. */
UNIV_INTERN ulint srv_buf_pool_wait_free = 0;
/* variable to count the number of pages that were written from buffer
pool to the disk */
UNIV_INTERN ulint srv_buf_pool_flushed = 0;
UNIV_INTERN ulint buf_lru_flush_page_count = 0;
/** Number of buffer pool reads that led to the
reading of a disk page */
UNIV_INTERN ulint srv_buf_pool_reads = 0;
/** Time in seconds between automatic buffer pool dumps */
UNIV_INTERN uint srv_auto_lru_dump = 0;
@ -406,6 +361,9 @@ UNIV_INTERN lint srv_conc_n_threads = 0;
InnoDB */
UNIV_INTERN ulint srv_conc_n_waiting_threads = 0;
/* print all user-level transactions deadlocks to mysqld stderr */
UNIV_INTERN my_bool srv_print_all_deadlocks = FALSE;
typedef struct srv_conc_slot_struct srv_conc_slot_t;
struct srv_conc_slot_struct{
os_event_t event; /*!< event to wait */
@ -489,23 +447,83 @@ UNIV_INTERN ibool srv_print_log_io = FALSE;
UNIV_INTERN ibool srv_print_latch_waits = FALSE;
#endif /* UNIV_DEBUG */
UNIV_INTERN ulint srv_n_rows_inserted = 0;
UNIV_INTERN ulint srv_n_rows_updated = 0;
UNIV_INTERN ulint srv_n_rows_deleted = 0;
UNIV_INTERN ulint srv_n_rows_read = 0;
static ulint srv_n_rows_inserted_old = 0;
static ulint srv_n_rows_updated_old = 0;
static ulint srv_n_rows_deleted_old = 0;
static ulint srv_n_rows_read_old = 0;
UNIV_INTERN ulint srv_n_lock_deadlock_count = 0;
UNIV_INTERN ulint srv_n_lock_wait_count = 0;
UNIV_INTERN ulint srv_n_lock_wait_current_count = 0;
UNIV_INTERN ib_int64_t srv_n_lock_wait_time = 0;
UNIV_INTERN ulint srv_n_lock_max_wait_time = 0;
/* Ensure counters are on separate cache lines */
UNIV_INTERN ulint srv_truncated_status_writes = 0;
#define CACHE_LINE_SIZE 64
#define CACHE_ALIGNED __attribute__ ((aligned (CACHE_LINE_SIZE)))
UNIV_INTERN byte
counters_pad_start[CACHE_LINE_SIZE] __attribute__((unused)) = {0};
UNIV_INTERN ulint srv_n_rows_inserted CACHE_ALIGNED = 0;
UNIV_INTERN ulint srv_n_rows_updated CACHE_ALIGNED = 0;
UNIV_INTERN ulint srv_n_rows_deleted CACHE_ALIGNED = 0;
UNIV_INTERN ulint srv_n_rows_read CACHE_ALIGNED = 0;
UNIV_INTERN ulint srv_n_lock_deadlock_count CACHE_ALIGNED = 0;
UNIV_INTERN ulint srv_n_lock_wait_count CACHE_ALIGNED = 0;
UNIV_INTERN ulint srv_n_lock_wait_current_count CACHE_ALIGNED = 0;
UNIV_INTERN ib_int64_t srv_n_lock_wait_time CACHE_ALIGNED = 0;
UNIV_INTERN ulint srv_n_lock_max_wait_time CACHE_ALIGNED = 0;
UNIV_INTERN ulint srv_truncated_status_writes CACHE_ALIGNED = 0;
/* variable counts amount of data read in total (in bytes) */
UNIV_INTERN ulint srv_data_read CACHE_ALIGNED = 0;
/* here we count the amount of data written in total (in bytes) */
UNIV_INTERN ulint srv_data_written CACHE_ALIGNED = 0;
/* the number of the log write requests done */
UNIV_INTERN ulint srv_log_write_requests CACHE_ALIGNED = 0;
/* the number of physical writes to the log performed */
UNIV_INTERN ulint srv_log_writes CACHE_ALIGNED = 0;
/* amount of data written to the log files in bytes */
UNIV_INTERN ulint srv_os_log_written CACHE_ALIGNED = 0;
/* amount of writes being done to the log files */
UNIV_INTERN ulint srv_os_log_pending_writes CACHE_ALIGNED = 0;
/* we increase this counter, when there we don't have enough space in the
log buffer and have to flush it */
UNIV_INTERN ulint srv_log_waits CACHE_ALIGNED = 0;
/* this variable counts the amount of times, when the doublewrite buffer
was flushed */
UNIV_INTERN ulint srv_dblwr_writes CACHE_ALIGNED = 0;
/* here we store the number of pages that have been flushed to the
doublewrite buffer */
UNIV_INTERN ulint srv_dblwr_pages_written CACHE_ALIGNED = 0;
/* in this variable we store the number of write requests issued */
UNIV_INTERN ulint srv_buf_pool_write_requests CACHE_ALIGNED = 0;
/* here we store the number of times when we had to wait for a free page
in the buffer pool. It happens when the buffer pool is full and we need
to make a flush, in order to be able to read or create a page. */
UNIV_INTERN ulint srv_buf_pool_wait_free CACHE_ALIGNED = 0;
/** Number of buffer pool reads that led to the
reading of a disk page */
UNIV_INTERN ulint srv_buf_pool_reads CACHE_ALIGNED = 0;
/* variable to count the number of pages that were written from buffer
pool to the disk */
UNIV_INTERN ulint srv_buf_pool_flushed CACHE_ALIGNED = 0;
/* variable to count the number of LRU flushed pages */
UNIV_INTERN ulint buf_lru_flush_page_count CACHE_ALIGNED = 0;
UNIV_INTERN byte
counters_pad_end[CACHE_LINE_SIZE] __attribute__((unused)) = {0};
/*
Set the following to 0 if you want InnoDB to write messages on
@ -1435,7 +1453,7 @@ retry:
ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
#endif /* UNIV_SYNC_DEBUG */
if (innobase_get_slow_log() && trx->take_stats) {
if (UNIV_UNLIKELY(trx->take_stats)) {
ut_usectime(&sec, &ms);
start_time = (ib_uint64_t)sec * 1000000 + ms;
} else {
@ -1450,7 +1468,7 @@ retry:
trx->op_info = "";
if (innobase_get_slow_log() && trx->take_stats && start_time) {
if (UNIV_UNLIKELY(start_time != 0)) {
ut_usectime(&sec, &ms);
finish_time = (ib_uint64_t)sec * 1000000 + ms;
trx->innodb_que_wait_timer += (ulint)(finish_time - start_time);
@ -1761,6 +1779,10 @@ srv_suspend_mysql_thread(
trx = thr_get_trx(thr);
if (trx->mysql_thd != 0) {
DEBUG_SYNC_C("srv_suspend_mysql_thread_enter");
}
os_event_set(srv_lock_timeout_thread_event);
mutex_enter(&kernel_mutex);
@ -2291,16 +2313,18 @@ void
srv_export_innodb_status(void)
/*==========================*/
{
buf_pool_stat_t stat;
ulint LRU_len;
ulint free_len;
ulint flush_list_len;
ulint mem_adaptive_hash, mem_dictionary;
read_view_t* oldest_view;
ulint i;
buf_pool_stat_t stat;
buf_pools_list_size_t buf_pools_list_size;
ulint LRU_len;
ulint free_len;
ulint flush_list_len;
ulint mem_adaptive_hash, mem_dictionary;
read_view_t* oldest_view;
ulint i;
buf_get_total_stat(&stat);
buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
buf_get_total_list_size_in_bytes(&buf_pools_list_size);
if (btr_search_sys && btr_search_sys->hash_index[0]->heap) {
mem_adaptive_hash = mem_heap_get_size(btr_search_sys->hash_index[0]->heap);
@ -2365,7 +2389,12 @@ srv_export_innodb_status(void)
export_vars.innodb_buffer_pool_read_ahead_evicted
= stat.n_ra_pages_evicted;
export_vars.innodb_buffer_pool_pages_data = LRU_len;
export_vars.innodb_buffer_pool_bytes_data =
buf_pools_list_size.LRU_bytes
+ buf_pools_list_size.unzip_LRU_bytes;
export_vars.innodb_buffer_pool_pages_dirty = flush_list_len;
export_vars.innodb_buffer_pool_bytes_dirty =
buf_pools_list_size.flush_list_bytes;
export_vars.innodb_buffer_pool_pages_free = free_len;
export_vars.innodb_deadlocks = srv_n_lock_deadlock_count;
#ifdef UNIV_DEBUG
@ -2498,6 +2527,23 @@ srv_export_innodb_status(void)
export_vars.innodb_rows_deleted = srv_n_rows_deleted;
export_vars.innodb_truncated_status_writes = srv_truncated_status_writes;
#ifdef UNIV_DEBUG
if (trx_sys->max_trx_id < purge_sys->done_trx_no) {
export_vars.innodb_purge_trx_id_age = 0;
} else {
export_vars.innodb_purge_trx_id_age =
trx_sys->max_trx_id - purge_sys->done_trx_no;
}
if (!purge_sys->view
|| trx_sys->max_trx_id < purge_sys->view->up_limit_id) {
export_vars.innodb_purge_view_trx_id_age = 0;
} else {
export_vars.innodb_purge_view_trx_id_age =
trx_sys->max_trx_id - purge_sys->view->up_limit_id;
}
#endif /* UNIV_DEBUG */
mutex_exit(&srv_innodb_monitor_mutex);
}
@ -3335,6 +3381,26 @@ loop:
for (i = 0; i < 10; i++) {
ulint cur_time = ut_time_ms();
#ifdef UNIV_DEBUG
if (btr_cur_limit_optimistic_insert_debug
&& srv_n_purge_threads == 0) {
/* If btr_cur_limit_optimistic_insert_debug is enabled
and no purge_threads, purge opportunity is increased
by x100 (1purge/100msec), to speed up debug scripts
which should wait for purged. */
next_itr_time -= 900;
srv_main_thread_op_info = "master purging";
srv_master_do_purge();
if (srv_fast_shutdown && srv_shutdown_state > 0) {
goto background_loop;
}
}
#endif /* UNIV_DEBUG */
n_pages_flushed = 0; /* initialize */
/* ALTER TABLE in MySQL requires on Unix that the table handler
@ -3590,7 +3656,6 @@ retry_flush_batch:
bpage = UT_LIST_GET_NEXT(flush_list, bpage);
new_blocks_num++;
}
buf_flush_list_mutex_exit(buf_pool);
flushed_blocks_num = new_blocks_num + prev_flush_info[j].count
- blocks_num;
@ -3598,7 +3663,6 @@ retry_flush_batch:
flushed_blocks_num = 0;
}
buf_flush_list_mutex_enter(buf_pool);
bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list);

View file

@ -61,6 +61,10 @@ UNIV_INTERN mysql_pfs_key_t trx_purge_latch_key;
UNIV_INTERN mysql_pfs_key_t purge_sys_bh_mutex_key;
#endif /* UNIV_PFS_MUTEX */
#ifdef UNIV_DEBUG
UNIV_INTERN my_bool srv_purge_view_update_only_debug;
#endif /* UNIV_DEBUG */
/*****************************************************************//**
Checks if trx_id is >= purge_view: then it is guaranteed that its update
undo log still exists in the system.
@ -236,6 +240,7 @@ trx_purge_sys_create(
purge_sys->purge_trx_no = 0;
purge_sys->purge_undo_no = 0;
purge_sys->next_stored = FALSE;
ut_d(purge_sys->done_trx_no = 0);
rw_lock_create(trx_purge_latch_key,
&purge_sys->latch, SYNC_PURGE_LATCH);
@ -656,6 +661,12 @@ trx_purge_truncate_if_arr_empty(void)
{
static ulint count;
#ifdef UNIV_DEBUG
if (purge_sys->arr->n_used == 0) {
purge_sys->done_trx_no = purge_sys->purge_trx_no;
}
#endif /* UNIV_DEBUG */
if (!(++count % TRX_SYS_N_RSEGS) && purge_sys->arr->n_used == 0) {
trx_purge_truncate_history();
@ -1172,6 +1183,12 @@ trx_purge(
rw_lock_x_unlock(&(purge_sys->latch));
#ifdef UNIV_DEBUG
if (srv_purge_view_update_only_debug) {
return(0);
}
#endif
purge_sys->state = TRX_PURGE_ON;
purge_sys->handle_limit = purge_sys->n_pages_handled + limit;

View file

@ -36,6 +36,7 @@ Created 3/26/1996 Heikki Tuuri
#ifndef UNIV_HOTBACKUP
#include "dict0dict.h"
#include "ut0mem.h"
#include "read0read.h"
#include "row0ext.h"
#include "row0upd.h"
#include "que0que.h"
@ -1647,6 +1648,25 @@ trx_undo_prev_version_build(
if (row_upd_changes_field_size_or_external(index, offsets, update)) {
ulint n_ext;
/* We should confirm the existence of disowned external data,
if the previous version record is delete marked. If the trx_id
of the previous record is seen by purge view, we should treat
it as missing history, because the disowned external data
might be purged already.
The inherited external data (BLOBs) can be freed (purged)
after trx_id was committed, provided that no view was started
before trx_id. If the purge view can see the committed
delete-marked record by trx_id, no transactions need to access
the BLOB. */
if ((update->info_bits & REC_INFO_DELETED_FLAG)
&& read_view_sees_trx_id(purge_sys->view, trx_id)) {
/* treat as a fresh insert, not to
cause assertion error at the caller. */
return(DB_SUCCESS);
}
/* We have to set the appropriate extern storage bits in the
old version of the record: the extern bits in rec for those
fields that update does NOT update, as well as the bits for

View file

@ -236,7 +236,7 @@ trx_allocate_for_mysql(void)
mutex_exit(&kernel_mutex);
if (innobase_get_slow_log() && trx->take_stats) {
if (UNIV_UNLIKELY(trx->take_stats)) {
trx->distinct_page_access_hash = mem_alloc(DPAH_SIZE);
memset(trx->distinct_page_access_hash, 0, DPAH_SIZE);
}
@ -1269,7 +1269,7 @@ trx_end_lock_wait(
thr = UT_LIST_GET_FIRST(trx->wait_thrs);
}
if (innobase_get_slow_log() && trx->take_stats) {
if (UNIV_UNLIKELY(trx->take_stats)) {
ut_usectime(&sec, &ms);
now = (ib_uint64_t)sec * 1000000 + ms;
trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted);
@ -1304,7 +1304,7 @@ trx_lock_wait_to_suspended(
thr = UT_LIST_GET_FIRST(trx->wait_thrs);
}
if (innobase_get_slow_log() && trx->take_stats) {
if (UNIV_UNLIKELY(trx->take_stats)) {
ut_usectime(&sec, &ms);
now = (ib_uint64_t)sec * 1000000 + ms;
trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted);