MDEV-13328 ALTER TABLE…DISCARD TABLESPACE takes a lot of time

With a big buffer pool that contains many data pages,
DISCARD TABLESPACE took a long time, because it would scan the
entire buffer pool to remove any pages that belong to the tablespace.
With a large buffer pool, this would take a lot of time, especially
when the table-to-discard is empty.

The minimum amount of work that DISCARD TABLESPACE must do is to
remove the pages of the to-be-discarded table from the
buf_pool->flush_list because any writes to the data file must be
prevented before the file is deleted.

If DISCARD TABLESPACE does not evict the pages from the buffer pool,
then IMPORT TABLESPACE must do it, because we must prevent pre-DISCARD,
not-yet-evicted pages from being mistaken for pages of the imported
tablespace.

It would not be a useful fix to simply move the buffer pool scan to
the IMPORT TABLESPACE step. What we can do is to actively evict those
pages that could be mistaken for imported pages. In this way, when
importing a small table into a big buffer pool, the import should
still run relatively fast.

Import is bypassing the buffer pool when reading pages for the
adjustment phase. In the adjustment phase, if a page exists in
the buffer pool, we could replace it with the page from the imported
file. Unfortunately I did not get this to work properly, so instead
we will simply evict any matching page from the buffer pool.

buf_page_get_gen(): Implement BUF_EVICT_IF_IN_POOL, a new mode
where the requested page will be evicted if it is found. There
must be no unwritten changes for the page.

buf_remove_t: Remove. Instead, use trx!=NULL to signify that a write
to file is desired, and use a separate parameter bool drop_ahi.

buf_LRU_flush_or_remove_pages(), fil_delete_tablespace():
Replace buf_remove_t.

buf_LRU_remove_pages(), buf_LRU_remove_all_pages(): Remove.

PageConverter::m_mtr: A dummy mini-transaction buffer

PageConverter::PageConverter(): Complete the member initialization list.

PageConverter::operator()(): Evict any 'shadow' pages from the
buffer pool so that pre-existing (garbage) pages cannot be mistaken
for pages that exist in the being-imported file.

row_discard_tablespace(): Remove a bogus comment that seems to
refer to IMPORT TABLESPACE, not DISCARD TABLESPACE.
This commit is contained in:
Marko Mäkelä 2017-11-02 22:38:37 +02:00
commit 51b4366bfb
22 changed files with 185 additions and 604 deletions

View file

@ -3060,8 +3060,8 @@ buf_page_get_gen(
ib_mutex_t* fix_mutex = NULL;
buf_pool_t* buf_pool = buf_pool_get(space, offset);
ut_ad(mtr);
ut_ad(mtr->state == MTR_ACTIVE);
ut_ad((mtr == NULL) == (mode == BUF_EVICT_IF_IN_POOL));
ut_ad(!mtr || mtr->state == MTR_ACTIVE);
ut_ad((rw_latch == RW_S_LATCH)
|| (rw_latch == RW_X_LATCH)
|| (rw_latch == RW_NO_LATCH));
@ -3072,23 +3072,29 @@ buf_page_get_gen(
#ifdef UNIV_DEBUG
switch (mode) {
case BUF_EVICT_IF_IN_POOL:
/* After DISCARD TABLESPACE, the tablespace would not exist,
but in IMPORT TABLESPACE, PageConverter::operator() must
replace any old pages, which were not evicted during DISCARD.
Skip the assertion on zip_size. */
break;
case BUF_GET_NO_LATCH:
ut_ad(rw_latch == RW_NO_LATCH);
break;
/* fall through */
case BUF_GET:
case BUF_GET_IF_IN_POOL:
case BUF_PEEK_IF_IN_POOL:
case BUF_GET_IF_IN_POOL_OR_WATCH:
case BUF_GET_POSSIBLY_FREED:
ut_ad(zip_size == fil_space_get_zip_size(space));
break;
default:
ut_error;
}
#endif /* UNIV_DEBUG */
ut_ad(zip_size == fil_space_get_zip_size(space));
ut_ad(ut_is_2pow(zip_size));
#ifndef UNIV_LOG_DEBUG
ut_ad(!ibuf_inside(mtr)
ut_ad(!mtr || !ibuf_inside(mtr)
|| ibuf_page_low(space, zip_size, offset,
FALSE, file, line, NULL));
#endif
@ -3153,9 +3159,11 @@ loop:
rw_lock_x_unlock(hash_lock);
}
if (mode == BUF_GET_IF_IN_POOL
|| mode == BUF_PEEK_IF_IN_POOL
|| mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
switch (mode) {
case BUF_GET_IF_IN_POOL:
case BUF_GET_IF_IN_POOL_OR_WATCH:
case BUF_PEEK_IF_IN_POOL:
case BUF_EVICT_IF_IN_POOL:
#ifdef UNIV_SYNC_DEBUG
ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
@ -3244,8 +3252,10 @@ got_block:
ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
if (mode == BUF_GET_IF_IN_POOL || mode == BUF_PEEK_IF_IN_POOL) {
switch (mode) {
case BUF_GET_IF_IN_POOL:
case BUF_PEEK_IF_IN_POOL:
case BUF_EVICT_IF_IN_POOL:
bool must_read;
{
@ -3274,6 +3284,19 @@ got_block:
buf_page_t* bpage;
case BUF_BLOCK_FILE_PAGE:
if (UNIV_UNLIKELY(mode == BUF_EVICT_IF_IN_POOL)) {
evict_from_pool:
ut_ad(!fix_block->page.oldest_modification);
buf_pool_mutex_enter(buf_pool);
buf_block_unfix(fix_block);
if (!buf_LRU_free_page(&fix_block->page, true)) {
ut_ad(0);
}
buf_pool_mutex_exit(buf_pool);
return(NULL);
}
break;
case BUF_BLOCK_ZIP_PAGE:
@ -3306,6 +3329,10 @@ got_block:
goto loop;
}
if (UNIV_UNLIKELY(mode == BUF_EVICT_IF_IN_POOL)) {
goto evict_from_pool;
}
/* Buffer-fix the block so that it cannot be evicted
or relocated while we are attempting to allocate an
uncompressed page. */

View file

@ -668,231 +668,27 @@ buf_flush_dirty_pages(buf_pool_t* buf_pool, ulint id, const trx_t* trx)
|| buf_pool_get_dirty_pages_count(buf_pool, id) == 0);
}
/******************************************************************//**
Remove all pages that belong to a given tablespace inside a specific
buffer pool instance when we are DISCARDing the tablespace. */
static MY_ATTRIBUTE((nonnull))
/** Empty the flush list for all pages belonging to a tablespace.
@param[in] id tablespace identifier
@param[in] trx transaction, for checking for user interrupt;
or NULL if nothing is to be written
@param[in] drop_ahi whether to drop the adaptive hash index */
UNIV_INTERN
void
buf_LRU_remove_all_pages(
/*=====================*/
buf_pool_t* buf_pool, /*!< buffer pool instance */
ulint id) /*!< in: space id */
buf_LRU_flush_or_remove_pages(ulint id, const trx_t* trx, bool drop_ahi)
{
buf_page_t* bpage;
ibool all_freed;
scan_again:
buf_pool_mutex_enter(buf_pool);
all_freed = TRUE;
for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
bpage != NULL;
/* No op */) {
rw_lock_t* hash_lock;
buf_page_t* prev_bpage;
ib_mutex_t* block_mutex = NULL;
ut_a(buf_page_in_file(bpage));
ut_ad(bpage->in_LRU_list);
prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
/* bpage->space and bpage->io_fix are protected by
buf_pool->mutex and the block_mutex. It is safe to check
them while holding buf_pool->mutex only. */
if (buf_page_get_space(bpage) != id) {
/* Skip this block, as it does not belong to
the space that is being invalidated. */
goto next_page;
} else if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
/* We cannot remove this page during this scan
yet; maybe the system is currently reading it
in, or flushing the modifications to the file */
all_freed = FALSE;
goto next_page;
} else {
ulint fold = buf_page_address_fold(
bpage->space, bpage->offset);
hash_lock = buf_page_hash_lock_get(buf_pool, fold);
rw_lock_x_lock(hash_lock);
block_mutex = buf_page_get_mutex(bpage);
mutex_enter(block_mutex);
if (bpage->buf_fix_count > 0) {
mutex_exit(block_mutex);
rw_lock_x_unlock(hash_lock);
/* We cannot remove this page during
this scan yet; maybe the system is
currently reading it in, or flushing
the modifications to the file */
all_freed = FALSE;
goto next_page;
}
for (ulint i = 0; i < srv_buf_pool_instances; i++) {
buf_pool_t* buf_pool = buf_pool_from_array(i);
if (drop_ahi) {
buf_LRU_drop_page_hash_for_tablespace(buf_pool, id);
}
ut_ad(mutex_own(block_mutex));
#ifdef UNIV_DEBUG
if (buf_debug_prints) {
fprintf(stderr,
"Dropping space %lu page %lu\n",
(ulong) buf_page_get_space(bpage),
(ulong) buf_page_get_page_no(bpage));
}
#endif
if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
/* Do nothing, because the adaptive hash index
covers uncompressed pages only. */
} else if (((buf_block_t*) bpage)->index) {
ulint page_no;
ulint zip_size;
buf_pool_mutex_exit(buf_pool);
zip_size = buf_page_get_zip_size(bpage);
page_no = buf_page_get_page_no(bpage);
rw_lock_x_unlock(hash_lock);
mutex_exit(block_mutex);
/* Note that the following call will acquire
and release block->lock X-latch. */
btr_search_drop_page_hash_when_freed(
id, zip_size, page_no);
goto scan_again;
}
if (bpage->oldest_modification != 0) {
buf_flush_remove(bpage);
}
ut_ad(!bpage->in_flush_list);
/* Remove from the LRU list. */
if (buf_LRU_block_remove_hashed(bpage, true)) {
buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
} else {
ut_ad(block_mutex == &buf_pool->zip_mutex);
}
ut_ad(!mutex_own(block_mutex));
#ifdef UNIV_SYNC_DEBUG
/* buf_LRU_block_remove_hashed() releases the hash_lock */
ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
#endif /* UNIV_SYNC_DEBUG */
next_page:
bpage = prev_bpage;
}
buf_pool_mutex_exit(buf_pool);
if (!all_freed) {
os_thread_sleep(20000);
goto scan_again;
}
}
/******************************************************************//**
Remove pages belonging to a given tablespace inside a specific
buffer pool instance when we are deleting the data file(s) of that
tablespace. The pages still remain a part of LRU and are evicted from
the list as they age towards the tail of the LRU only if buf_remove
is BUF_REMOVE_FLUSH_NO_WRITE. */
static MY_ATTRIBUTE((nonnull(1)))
void
buf_LRU_remove_pages(
/*=================*/
buf_pool_t* buf_pool, /*!< buffer pool instance */
ulint id, /*!< in: space id */
buf_remove_t buf_remove, /*!< in: remove or flush strategy */
const trx_t* trx) /*!< to check if the operation must
be interrupted */
{
switch (buf_remove) {
case BUF_REMOVE_ALL_NO_WRITE:
buf_LRU_remove_all_pages(buf_pool, id);
break;
case BUF_REMOVE_FLUSH_NO_WRITE:
ut_a(trx == 0);
buf_flush_dirty_pages(buf_pool, id, NULL);
break;
case BUF_REMOVE_FLUSH_WRITE:
ut_a(trx != 0);
buf_flush_dirty_pages(buf_pool, id, trx);
}
if (trx && !trx_is_interrupted(trx)) {
/* Ensure that all asynchronous IO is completed. */
os_aio_wait_until_no_pending_writes();
fil_flush(id);
break;
}
}
/******************************************************************//**
Flushes all dirty pages or removes all pages belonging
to a given tablespace. A PROBLEM: if readahead is being started, what
guarantees that it will not try to read in pages after this operation
has completed? */
UNIV_INTERN
void
buf_LRU_flush_or_remove_pages(
/*==========================*/
ulint id, /*!< in: space id */
buf_remove_t buf_remove, /*!< in: remove or flush strategy */
const trx_t* trx) /*!< to check if the operation must
be interrupted */
{
ulint i;
/* Before we attempt to drop pages one by one we first
attempt to drop page hash index entries in batches to make
it more efficient. The batching attempt is a best effort
attempt and does not guarantee that all pages hash entries
will be dropped. We get rid of remaining page hash entries
one by one below. */
for (i = 0; i < srv_buf_pool_instances; i++) {
buf_pool_t* buf_pool;
buf_pool = buf_pool_from_array(i);
switch (buf_remove) {
case BUF_REMOVE_ALL_NO_WRITE:
buf_LRU_drop_page_hash_for_tablespace(buf_pool, id);
break;
case BUF_REMOVE_FLUSH_NO_WRITE:
/* It is a DROP TABLE for a single table
tablespace. No AHI entries exist because
we already dealt with them when freeing up
extents. */
case BUF_REMOVE_FLUSH_WRITE:
/* We allow read-only queries against the
table, there is no need to drop the AHI entries. */
break;
}
buf_LRU_remove_pages(buf_pool, id, buf_remove, trx);
}
}

View file

@ -1673,7 +1673,7 @@ dict_table_rename_in_cache(
filepath = fil_make_ibd_name(table->name, false);
}
fil_delete_tablespace(table->space, BUF_REMOVE_ALL_NO_WRITE);
fil_delete_tablespace(table->space, true);
/* Delete any temp file hanging around. */
if (os_file_status(filepath, &exists, &ftype)

View file

@ -2526,8 +2526,7 @@ fil_op_log_parse_or_replay(
switch (type) {
case MLOG_FILE_DELETE:
if (fil_tablespace_exists_in_mem(space_id)) {
dberr_t err = fil_delete_tablespace(
space_id, BUF_REMOVE_FLUSH_NO_WRITE);
dberr_t err = fil_delete_tablespace(space_id);
ut_a(err == DB_SUCCESS);
}
@ -2805,7 +2804,7 @@ fil_close_tablespace(
completely and permanently. The flag stop_new_ops also prevents
fil_flush() from being applied to this tablespace. */
buf_LRU_flush_or_remove_pages(id, BUF_REMOVE_FLUSH_WRITE, trx);
buf_LRU_flush_or_remove_pages(id, trx);
#endif
mutex_enter(&fil_system->mutex);
@ -2832,18 +2831,13 @@ fil_close_tablespace(
return(err);
}
/*******************************************************************//**
Deletes a single-table tablespace. The tablespace must be cached in the
memory cache.
/** Delete a tablespace and associated .ibd file.
@param[in] id tablespace identifier
@param[in] drop_ahi whether to drop the adaptive hash index
@return DB_SUCCESS or error */
UNIV_INTERN
dberr_t
fil_delete_tablespace(
/*==================*/
ulint id, /*!< in: space id */
buf_remove_t buf_remove) /*!< in: specify the action to take
on the tables pages in the buffer
pool */
fil_delete_tablespace(ulint id, bool drop_ahi)
{
char* path = 0;
fil_space_t* space = 0;
@ -2899,7 +2893,7 @@ fil_delete_tablespace(
To deal with potential read requests by checking the
::stop_new_ops flag in fil_io() */
buf_LRU_flush_or_remove_pages(id, buf_remove, 0);
buf_LRU_flush_or_remove_pages(id, NULL, drop_ahi);
#endif /* !UNIV_HOTBACKUP */
@ -3010,7 +3004,7 @@ fil_discard_tablespace(
{
dberr_t err;
switch (err = fil_delete_tablespace(id, BUF_REMOVE_ALL_NO_WRITE)) {
switch (err = fil_delete_tablespace(id, true)) {
case DB_SUCCESS:
break;

View file

@ -59,6 +59,7 @@ Created 11/5/1995 Heikki Tuuri
#define BUF_GET_POSSIBLY_FREED 16
/*!< Like BUF_GET, but do not mind
if the file page has been freed. */
#define BUF_EVICT_IF_IN_POOL 20 /*!< evict a clean block if found */
/* @} */
/** @name Modes for buf_page_get_known_nowait */
/* @{ */

View file

@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -51,19 +52,14 @@ These are low-level functions
/** Minimum LRU list length for which the LRU_old pointer is defined */
#define BUF_LRU_OLD_MIN_LEN 512 /* 8 megabytes of 16k pages */
/******************************************************************//**
Flushes all dirty pages or removes all pages belonging
to a given tablespace. A PROBLEM: if readahead is being started, what
guarantees that it will not try to read in pages after this operation
has completed? */
/** Empty the flush list for all pages belonging to a tablespace.
@param[in] id tablespace identifier
@param[in] trx transaction, for checking for user interrupt;
or NULL if nothing is to be written
@param[in] drop_ahi whether to drop the adaptive hash index */
UNIV_INTERN
void
buf_LRU_flush_or_remove_pages(
/*==========================*/
ulint id, /*!< in: space id */
buf_remove_t buf_remove, /*!< in: remove or flush strategy */
const trx_t* trx); /*!< to check if the operation must
be interrupted */
buf_LRU_flush_or_remove_pages(ulint id, const trx_t* trx, bool drop_ahi=false);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/********************************************************************//**

View file

@ -58,17 +58,6 @@ enum buf_flush_t {
BUF_FLUSH_N_TYPES /*!< index of last element + 1 */
};
/** Algorithm to remove the pages for a tablespace from the buffer pool.
See buf_LRU_flush_or_remove_pages(). */
enum buf_remove_t {
BUF_REMOVE_ALL_NO_WRITE, /*!< Remove all pages from the buffer
pool, don't write or sync to disk */
BUF_REMOVE_FLUSH_NO_WRITE, /*!< Remove only, from the flush list,
don't write or sync to disk */
BUF_REMOVE_FLUSH_WRITE /*!< Flush dirty pages to disk only
don't remove from the buffer pool */
};
/** Flags for io_fix types */
enum buf_io_fix {
BUF_IO_NONE = 0, /**< no pending I/O */

View file

@ -851,18 +851,13 @@ fil_op_log_parse_or_replay(
only be parsed but not replayed */
ulint log_flags); /*!< in: redo log flags
(stored in the page number parameter) */
/*******************************************************************//**
Deletes a single-table tablespace. The tablespace must be cached in the
memory cache.
@return TRUE if success */
/** Delete a tablespace and associated .ibd file.
@param[in] id tablespace identifier
@param[in] drop_ahi whether to drop the adaptive hash index
@return DB_SUCCESS or error */
UNIV_INTERN
dberr_t
fil_delete_tablespace(
/*==================*/
ulint id, /*!< in: space id */
buf_remove_t buf_remove); /*!< in: specify the action to take
on the tables pages in the buffer
pool */
fil_delete_tablespace(ulint id, bool drop_ahi = false);
/*******************************************************************//**
Closes a single-table tablespace. The tablespace must be cached in the
memory cache. Free all pages used by the tablespace.

View file

@ -1601,18 +1601,16 @@ PageConverter::PageConverter(
:
AbstractCallback(trx),
m_cfg(cfg),
m_index(cfg->m_indexes),
m_current_lsn(log_get_lsn()),
m_page_zip_ptr(0),
m_heap(0) UNIV_NOTHROW
m_rec_iter(),
m_offsets_(), m_offsets(m_offsets_),
m_heap(0),
m_cluster_index(dict_table_get_first_index(cfg->m_table)) UNIV_NOTHROW
{
m_index = m_cfg->m_indexes;
m_current_lsn = log_get_lsn();
ut_a(m_current_lsn > 0);
m_offsets = m_offsets_;
rec_offs_init(m_offsets_);
m_cluster_index = dict_table_get_first_index(m_cfg->m_table);
}
/**
@ -2103,7 +2101,7 @@ PageConverter::operator() (
we can work on them */
if ((err = update_page(block, page_type)) != DB_SUCCESS) {
return(err);
break;
}
/* Note: For compressed pages this function will write to the
@ -2140,9 +2138,15 @@ PageConverter::operator() (
"%s: Page %lu at offset " UINT64PF " looks corrupted.",
m_filepath, (ulong) (offset / m_page_size), offset);
return(DB_CORRUPTION);
err = DB_CORRUPTION;
}
/* If we already had and old page with matching number
in the buffer pool, evict it now, because
we no longer evict the pages on DISCARD TABLESPACE. */
buf_page_get_gen(get_space_id(), get_zip_size(), block->page.offset,
RW_NO_LATCH, NULL, BUF_EVICT_IF_IN_POOL,
__FILE__, __LINE__, NULL);
return(err);
}
@ -3716,8 +3720,7 @@ row_import_for_mysql(
The only dirty pages generated should be from the pessimistic purge
of delete marked records that couldn't be purged in Phase I. */
buf_LRU_flush_or_remove_pages(
prebuilt->table->space, BUF_REMOVE_FLUSH_WRITE, trx);
buf_LRU_flush_or_remove_pages(prebuilt->table->space, trx);
if (trx_is_interrupted(trx)) {
ib_logf(IB_LOG_LEVEL_INFO, "Phase III - Flush interrupted");

View file

@ -2477,10 +2477,7 @@ err_exit:
/* We already have .ibd file here. it should be deleted. */
if (table->space
&& fil_delete_tablespace(
table->space,
BUF_REMOVE_FLUSH_NO_WRITE)
!= DB_SUCCESS) {
&& fil_delete_tablespace(table->space) != DB_SUCCESS) {
ut_print_timestamp(stderr);
fprintf(stderr,
@ -3109,9 +3106,6 @@ row_discard_tablespace(
4) FOREIGN KEY operations: if table->n_foreign_key_checks_running > 0,
we do not allow the discard. */
/* Play safe and remove all insert buffer entries, though we should
have removed them already when DISCARD TABLESPACE was called */
ibuf_delete_for_discarded_space(table->space);
table_id_t new_id;
@ -4496,9 +4490,7 @@ row_drop_table_for_mysql(
fil_delete_file(filepath);
} else if (fil_delete_tablespace(
space_id,
BUF_REMOVE_FLUSH_NO_WRITE)
} else if (fil_delete_tablespace(space_id)
!= DB_SUCCESS) {
fprintf(stderr,
"InnoDB: We removed now the InnoDB"

View file

@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -542,8 +543,7 @@ row_quiesce_table_start(
}
if (!trx_is_interrupted(trx)) {
buf_LRU_flush_or_remove_pages(
table->space, BUF_REMOVE_FLUSH_WRITE, trx);
buf_LRU_flush_or_remove_pages(table->space, trx);
if (trx_is_interrupted(trx)) {

View file

@ -2953,8 +2953,8 @@ buf_page_get_gen(
ib_mutex_t* fix_mutex = NULL;
buf_pool_t* buf_pool = buf_pool_get(space, offset);
ut_ad(mtr);
ut_ad(mtr->state == MTR_ACTIVE);
ut_ad((mtr == NULL) == (mode == BUF_EVICT_IF_IN_POOL));
ut_ad(!mtr || mtr->state == MTR_ACTIVE);
ut_ad((rw_latch == RW_S_LATCH)
|| (rw_latch == RW_X_LATCH)
|| (rw_latch == RW_NO_LATCH));
@ -2965,23 +2965,29 @@ buf_page_get_gen(
#ifdef UNIV_DEBUG
switch (mode) {
case BUF_EVICT_IF_IN_POOL:
/* After DISCARD TABLESPACE, the tablespace would not exist,
but in IMPORT TABLESPACE, PageConverter::operator() must
replace any old pages, which were not evicted during DISCARD.
Skip the assertion on zip_size. */
break;
case BUF_GET_NO_LATCH:
ut_ad(rw_latch == RW_NO_LATCH);
break;
/* fall through */
case BUF_GET:
case BUF_GET_IF_IN_POOL:
case BUF_PEEK_IF_IN_POOL:
case BUF_GET_IF_IN_POOL_OR_WATCH:
case BUF_GET_POSSIBLY_FREED:
ut_ad(zip_size == fil_space_get_zip_size(space));
break;
default:
ut_error;
}
#endif /* UNIV_DEBUG */
ut_ad(zip_size == fil_space_get_zip_size(space));
ut_ad(ut_is_2pow(zip_size));
#ifndef UNIV_LOG_DEBUG
ut_ad(!ibuf_inside(mtr)
ut_ad(!mtr || !ibuf_inside(mtr)
|| ibuf_page_low(space, zip_size, offset,
FALSE, file, line, NULL));
#endif
@ -3051,9 +3057,11 @@ loop:
rw_lock_x_unlock(hash_lock);
}
if (mode == BUF_GET_IF_IN_POOL
|| mode == BUF_PEEK_IF_IN_POOL
|| mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
switch (mode) {
case BUF_GET_IF_IN_POOL:
case BUF_GET_IF_IN_POOL_OR_WATCH:
case BUF_PEEK_IF_IN_POOL:
case BUF_EVICT_IF_IN_POOL:
#ifdef UNIV_SYNC_DEBUG
ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
@ -3142,8 +3150,10 @@ got_block:
ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
if (mode == BUF_GET_IF_IN_POOL || mode == BUF_PEEK_IF_IN_POOL) {
switch (mode) {
case BUF_GET_IF_IN_POOL:
case BUF_PEEK_IF_IN_POOL:
case BUF_EVICT_IF_IN_POOL:
bool must_read;
{
@ -3181,6 +3191,22 @@ got_block:
case BUF_BLOCK_FILE_PAGE:
ut_ad(fix_mutex != &buf_pool->zip_mutex);
if (UNIV_UNLIKELY(mode == BUF_EVICT_IF_IN_POOL)) {
evict_from_pool:
ut_ad(!fix_block->page.oldest_modification);
mutex_enter(&buf_pool->LRU_list_mutex);
buf_block_unfix(fix_block);
mutex_enter(fix_mutex);
if (!buf_LRU_free_page(&fix_block->page, true)) {
ut_ad(0);
mutex_exit(&buf_pool->LRU_list_mutex);
}
mutex_exit(fix_mutex);
return(NULL);
}
break;
case BUF_BLOCK_ZIP_PAGE:
@ -3215,6 +3241,10 @@ got_block:
goto loop;
}
if (UNIV_UNLIKELY(mode == BUF_EVICT_IF_IN_POOL)) {
goto evict_from_pool;
}
/* Buffer-fix the block so that it cannot be evicted
or relocated while we are attempting to allocate an
uncompressed page. */

View file

@ -726,239 +726,27 @@ buf_flush_dirty_pages(buf_pool_t* buf_pool, ulint id, const trx_t* trx)
|| buf_pool_get_dirty_pages_count(buf_pool, id) == 0);
}
/******************************************************************//**
Remove all pages that belong to a given tablespace inside a specific
buffer pool instance when we are DISCARDing the tablespace. */
static MY_ATTRIBUTE((nonnull))
/** Empty the flush list for all pages belonging to a tablespace.
@param[in] id tablespace identifier
@param[in] trx transaction, for checking for user interrupt;
or NULL if nothing is to be written
@param[in] drop_ahi whether to drop the adaptive hash index */
UNIV_INTERN
void
buf_LRU_remove_all_pages(
/*=====================*/
buf_pool_t* buf_pool, /*!< buffer pool instance */
ulint id) /*!< in: space id */
buf_LRU_flush_or_remove_pages(ulint id, const trx_t* trx, bool drop_ahi)
{
buf_page_t* bpage;
ibool all_freed;
scan_again:
mutex_enter(&buf_pool->LRU_list_mutex);
all_freed = TRUE;
for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
bpage != NULL;
/* No op */) {
prio_rw_lock_t* hash_lock;
buf_page_t* prev_bpage;
ib_mutex_t* block_mutex = NULL;
ut_a(buf_page_in_file(bpage));
ut_ad(bpage->in_LRU_list);
prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
/* It is safe to check bpage->space and bpage->io_fix while
holding buf_pool->LRU_list_mutex only and later recheck
while holding the buf_page_get_mutex() mutex. */
if (buf_page_get_space(bpage) != id) {
/* Skip this block, as it does not belong to
the space that is being invalidated. */
goto next_page;
} else if (UNIV_UNLIKELY(buf_page_get_io_fix_unlocked(bpage)
!= BUF_IO_NONE)) {
/* We cannot remove this page during this scan
yet; maybe the system is currently reading it
in, or flushing the modifications to the file */
all_freed = FALSE;
goto next_page;
} else {
ulint fold = buf_page_address_fold(
bpage->space, bpage->offset);
hash_lock = buf_page_hash_lock_get(buf_pool, fold);
rw_lock_x_lock(hash_lock);
block_mutex = buf_page_get_mutex(bpage);
mutex_enter(block_mutex);
if (UNIV_UNLIKELY(
buf_page_get_space(bpage) != id
|| bpage->buf_fix_count > 0
|| (buf_page_get_io_fix(bpage)
!= BUF_IO_NONE))) {
mutex_exit(block_mutex);
rw_lock_x_unlock(hash_lock);
/* We cannot remove this page during
this scan yet; maybe the system is
currently reading it in, or flushing
the modifications to the file */
all_freed = FALSE;
goto next_page;
}
for (ulint i = 0; i < srv_buf_pool_instances; i++) {
buf_pool_t* buf_pool = buf_pool_from_array(i);
if (drop_ahi) {
buf_LRU_drop_page_hash_for_tablespace(buf_pool, id);
}
ut_ad(mutex_own(block_mutex));
#ifdef UNIV_DEBUG
if (buf_debug_prints) {
fprintf(stderr,
"Dropping space %lu page %lu\n",
(ulong) buf_page_get_space(bpage),
(ulong) buf_page_get_page_no(bpage));
}
#endif
if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
/* Do nothing, because the adaptive hash index
covers uncompressed pages only. */
} else if (((buf_block_t*) bpage)->index) {
ulint page_no;
ulint zip_size;
mutex_exit(&buf_pool->LRU_list_mutex);
zip_size = buf_page_get_zip_size(bpage);
page_no = buf_page_get_page_no(bpage);
mutex_exit(block_mutex);
rw_lock_x_unlock(hash_lock);
/* Note that the following call will acquire
and release block->lock X-latch. */
btr_search_drop_page_hash_when_freed(
id, zip_size, page_no);
goto scan_again;
}
if (bpage->oldest_modification != 0) {
buf_flush_remove(bpage);
}
ut_ad(!bpage->in_flush_list);
/* Remove from the LRU list. */
if (buf_LRU_block_remove_hashed(bpage, true)) {
mutex_enter(block_mutex);
buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
mutex_exit(block_mutex);
} else {
ut_ad(block_mutex == &buf_pool->zip_mutex);
}
ut_ad(!mutex_own(block_mutex));
#ifdef UNIV_SYNC_DEBUG
/* buf_LRU_block_remove_hashed() releases the hash_lock */
ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
#endif /* UNIV_SYNC_DEBUG */
next_page:
bpage = prev_bpage;
}
mutex_exit(&buf_pool->LRU_list_mutex);
if (!all_freed) {
os_thread_sleep(20000);
goto scan_again;
}
}
/******************************************************************//**
Remove pages belonging to a given tablespace inside a specific
buffer pool instance when we are deleting the data file(s) of that
tablespace. The pages still remain a part of LRU and are evicted from
the list as they age towards the tail of the LRU only if buf_remove
is BUF_REMOVE_FLUSH_NO_WRITE. */
static MY_ATTRIBUTE((nonnull(1)))
void
buf_LRU_remove_pages(
/*=================*/
buf_pool_t* buf_pool, /*!< buffer pool instance */
ulint id, /*!< in: space id */
buf_remove_t buf_remove, /*!< in: remove or flush strategy */
const trx_t* trx) /*!< to check if the operation must
be interrupted */
{
switch (buf_remove) {
case BUF_REMOVE_ALL_NO_WRITE:
buf_LRU_remove_all_pages(buf_pool, id);
break;
case BUF_REMOVE_FLUSH_NO_WRITE:
ut_a(trx == 0);
buf_flush_dirty_pages(buf_pool, id, NULL);
break;
case BUF_REMOVE_FLUSH_WRITE:
ut_a(trx != 0);
buf_flush_dirty_pages(buf_pool, id, trx);
}
if (trx && !trx_is_interrupted(trx)) {
/* Ensure that all asynchronous IO is completed. */
os_aio_wait_until_no_pending_writes();
fil_flush(id);
break;
}
}
/******************************************************************//**
Flushes all dirty pages or removes all pages belonging
to a given tablespace. A PROBLEM: if readahead is being started, what
guarantees that it will not try to read in pages after this operation
has completed? */
UNIV_INTERN
void
buf_LRU_flush_or_remove_pages(
/*==========================*/
ulint id, /*!< in: space id */
buf_remove_t buf_remove, /*!< in: remove or flush strategy */
const trx_t* trx) /*!< to check if the operation must
be interrupted */
{
ulint i;
/* Before we attempt to drop pages one by one we first
attempt to drop page hash index entries in batches to make
it more efficient. The batching attempt is a best effort
attempt and does not guarantee that all pages hash entries
will be dropped. We get rid of remaining page hash entries
one by one below. */
for (i = 0; i < srv_buf_pool_instances; i++) {
buf_pool_t* buf_pool;
buf_pool = buf_pool_from_array(i);
switch (buf_remove) {
case BUF_REMOVE_ALL_NO_WRITE:
buf_LRU_drop_page_hash_for_tablespace(buf_pool, id);
break;
case BUF_REMOVE_FLUSH_NO_WRITE:
/* It is a DROP TABLE for a single table
tablespace. No AHI entries exist because
we already dealt with them when freeing up
extents. */
case BUF_REMOVE_FLUSH_WRITE:
/* We allow read-only queries against the
table, there is no need to drop the AHI entries. */
break;
}
buf_LRU_remove_pages(buf_pool, id, buf_remove, trx);
}
}

View file

@ -1679,7 +1679,7 @@ dict_table_rename_in_cache(
filepath = fil_make_ibd_name(table->name, false);
}
fil_delete_tablespace(table->space, BUF_REMOVE_ALL_NO_WRITE);
fil_delete_tablespace(table->space, true);
/* Delete any temp file hanging around. */
if (os_file_status(filepath, &exists, &ftype)

View file

@ -2572,8 +2572,7 @@ fil_op_log_parse_or_replay(
switch (type) {
case MLOG_FILE_DELETE:
if (fil_tablespace_exists_in_mem(space_id)) {
dberr_t err = fil_delete_tablespace(
space_id, BUF_REMOVE_FLUSH_NO_WRITE);
dberr_t err = fil_delete_tablespace(space_id);
ut_a(err == DB_SUCCESS);
}
@ -2851,7 +2850,7 @@ fil_close_tablespace(
completely and permanently. The flag stop_new_ops also prevents
fil_flush() from being applied to this tablespace. */
buf_LRU_flush_or_remove_pages(id, BUF_REMOVE_FLUSH_WRITE, trx);
buf_LRU_flush_or_remove_pages(id, trx);
#endif
mutex_enter(&fil_system->mutex);
@ -2878,18 +2877,13 @@ fil_close_tablespace(
return(err);
}
/*******************************************************************//**
Deletes a single-table tablespace. The tablespace must be cached in the
memory cache.
/** Delete a tablespace and associated .ibd file.
@param[in] id tablespace identifier
@param[in] drop_ahi whether to drop the adaptive hash index
@return DB_SUCCESS or error */
UNIV_INTERN
dberr_t
fil_delete_tablespace(
/*==================*/
ulint id, /*!< in: space id */
buf_remove_t buf_remove) /*!< in: specify the action to take
on the tables pages in the buffer
pool */
fil_delete_tablespace(ulint id, bool drop_ahi)
{
char* path = 0;
fil_space_t* space = 0;
@ -2945,7 +2939,7 @@ fil_delete_tablespace(
To deal with potential read requests by checking the
::stop_new_ops flag in fil_io() */
buf_LRU_flush_or_remove_pages(id, buf_remove, 0);
buf_LRU_flush_or_remove_pages(id, NULL, drop_ahi);
#endif /* !UNIV_HOTBACKUP */
@ -3056,7 +3050,7 @@ fil_discard_tablespace(
{
dberr_t err;
switch (err = fil_delete_tablespace(id, BUF_REMOVE_ALL_NO_WRITE)) {
switch (err = fil_delete_tablespace(id, true)) {
case DB_SUCCESS:
break;

View file

@ -58,6 +58,7 @@ Created 11/5/1995 Heikki Tuuri
#define BUF_GET_POSSIBLY_FREED 16
/*!< Like BUF_GET, but do not mind
if the file page has been freed. */
#define BUF_EVICT_IF_IN_POOL 20 /*!< evict a clean block if found */
/* @} */
/** @name Modes for buf_page_get_known_nowait */
/* @{ */

View file

@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -53,19 +54,14 @@ These are low-level functions
/** Minimum LRU list length for which the LRU_old pointer is defined */
#define BUF_LRU_OLD_MIN_LEN 512 /* 8 megabytes of 16k pages */
/******************************************************************//**
Flushes all dirty pages or removes all pages belonging
to a given tablespace. A PROBLEM: if readahead is being started, what
guarantees that it will not try to read in pages after this operation
has completed? */
/** Empty the flush list for all pages belonging to a tablespace.
@param[in] id tablespace identifier
@param[in] trx transaction, for checking for user interrupt;
or NULL if nothing is to be written
@param[in] drop_ahi whether to drop the adaptive hash index */
UNIV_INTERN
void
buf_LRU_flush_or_remove_pages(
/*==========================*/
ulint id, /*!< in: space id */
buf_remove_t buf_remove, /*!< in: remove or flush strategy */
const trx_t* trx); /*!< to check if the operation must
be interrupted */
buf_LRU_flush_or_remove_pages(ulint id, const trx_t* trx, bool drop_ahi=false);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/********************************************************************//**

View file

@ -58,17 +58,6 @@ enum buf_flush_t {
BUF_FLUSH_N_TYPES /*!< index of last element + 1 */
};
/** Algorithm to remove the pages for a tablespace from the buffer pool.
See buf_LRU_flush_or_remove_pages(). */
enum buf_remove_t {
BUF_REMOVE_ALL_NO_WRITE, /*!< Remove all pages from the buffer
pool, don't write or sync to disk */
BUF_REMOVE_FLUSH_NO_WRITE, /*!< Remove only, from the flush list,
don't write or sync to disk */
BUF_REMOVE_FLUSH_WRITE /*!< Flush dirty pages to disk only
don't remove from the buffer pool */
};
/** Flags for io_fix types */
enum buf_io_fix {
BUF_IO_NONE = 0, /**< no pending I/O */

View file

@ -849,18 +849,13 @@ fil_op_log_parse_or_replay(
only be parsed but not replayed */
ulint log_flags); /*!< in: redo log flags
(stored in the page number parameter) */
/*******************************************************************//**
Deletes a single-table tablespace. The tablespace must be cached in the
memory cache.
@return TRUE if success */
/** Delete a tablespace and associated .ibd file.
@param[in] id tablespace identifier
@param[in] drop_ahi whether to drop the adaptive hash index
@return DB_SUCCESS or error */
UNIV_INTERN
dberr_t
fil_delete_tablespace(
/*==================*/
ulint id, /*!< in: space id */
buf_remove_t buf_remove); /*!< in: specify the action to take
on the tables pages in the buffer
pool */
fil_delete_tablespace(ulint id, bool drop_ahi = false);
/*******************************************************************//**
Closes a single-table tablespace. The tablespace must be cached in the
memory cache. Free all pages used by the tablespace.

View file

@ -1602,18 +1602,16 @@ PageConverter::PageConverter(
:
AbstractCallback(trx),
m_cfg(cfg),
m_index(cfg->m_indexes),
m_current_lsn(log_get_lsn()),
m_page_zip_ptr(0),
m_heap(0) UNIV_NOTHROW
m_rec_iter(),
m_offsets_(), m_offsets(m_offsets_),
m_heap(0),
m_cluster_index(dict_table_get_first_index(cfg->m_table)) UNIV_NOTHROW
{
m_index = m_cfg->m_indexes;
m_current_lsn = log_get_lsn();
ut_a(m_current_lsn > 0);
m_offsets = m_offsets_;
rec_offs_init(m_offsets_);
m_cluster_index = dict_table_get_first_index(m_cfg->m_table);
}
/**
@ -2104,7 +2102,7 @@ PageConverter::operator() (
we can work on them */
if ((err = update_page(block, page_type)) != DB_SUCCESS) {
return(err);
break;
}
/* Note: For compressed pages this function will write to the
@ -2141,9 +2139,15 @@ PageConverter::operator() (
"%s: Page %lu at offset " UINT64PF " looks corrupted.",
m_filepath, (ulong) (offset / m_page_size), offset);
return(DB_CORRUPTION);
err = DB_CORRUPTION;
}
/* If we already had and old page with matching number
in the buffer pool, evict it now, because
we no longer evict the pages on DISCARD TABLESPACE. */
buf_page_get_gen(get_space_id(), get_zip_size(), block->page.offset,
RW_NO_LATCH, NULL, BUF_EVICT_IF_IN_POOL,
__FILE__, __LINE__, NULL);
return(err);
}
@ -3717,8 +3721,7 @@ row_import_for_mysql(
The only dirty pages generated should be from the pessimistic purge
of delete marked records that couldn't be purged in Phase I. */
buf_LRU_flush_or_remove_pages(
prebuilt->table->space, BUF_REMOVE_FLUSH_WRITE, trx);
buf_LRU_flush_or_remove_pages(prebuilt->table->space, trx);
if (trx_is_interrupted(trx)) {
ib_logf(IB_LOG_LEVEL_INFO, "Phase III - Flush interrupted");

View file

@ -2494,10 +2494,7 @@ err_exit:
/* We already have .ibd file here. it should be deleted. */
if (table->space
&& fil_delete_tablespace(
table->space,
BUF_REMOVE_FLUSH_NO_WRITE)
!= DB_SUCCESS) {
&& fil_delete_tablespace(table->space) != DB_SUCCESS) {
ut_print_timestamp(stderr);
fprintf(stderr,
@ -3132,9 +3129,6 @@ row_discard_tablespace(
4) FOREIGN KEY operations: if table->n_foreign_key_checks_running > 0,
we do not allow the discard. */
/* Play safe and remove all insert buffer entries, though we should
have removed them already when DISCARD TABLESPACE was called */
ibuf_delete_for_discarded_space(table->space);
table_id_t new_id;
@ -4516,9 +4510,7 @@ row_drop_table_for_mysql(
fil_delete_file(filepath);
} else if (fil_delete_tablespace(
space_id,
BUF_REMOVE_FLUSH_NO_WRITE)
} else if (fil_delete_tablespace(space_id)
!= DB_SUCCESS) {
fprintf(stderr,
"InnoDB: We removed now the InnoDB"

View file

@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -542,8 +543,7 @@ row_quiesce_table_start(
}
if (!trx_is_interrupted(trx)) {
buf_LRU_flush_or_remove_pages(
table->space, BUF_REMOVE_FLUSH_WRITE, trx);
buf_LRU_flush_or_remove_pages(table->space, trx);
if (trx_is_interrupted(trx)) {