MDEV-32042 Simplify buf_page_get_gen()

buf_page_get_low(): Rename to buf_page_get_gen(), and assume that no
crash recovery is needed.

recv_sys_t::recover(): Replaces the old buf_page_get_gen(). Read a page
while crash recovery is in progress.

trx_rseg_get_n_undo_tablespaces(), ibuf_upgrade_needed():
Invoke recv_sys.recover() instead of buf_page_get_gen().

dict_boot(): Invoke recv_sys.recover() instead of buf_page_get_gen().
Do not load the system tables.

srv_start(): Load the system tables and the undo logs after all redo log
has been applied in recv_sys.apply(true) and we can safely invoke the
regular buf_page_get_gen().
This commit is contained in:
Marko Mäkelä 2023-12-04 09:45:53 +02:00
parent b42f318996
commit 850d61736d
10 changed files with 115 additions and 156 deletions

View file

@ -1114,9 +1114,9 @@ void btr_drop_temporary_table(const dict_table_t &table)
for (const dict_index_t *index= table.indexes.start; index;
index= dict_table_get_next_index(index))
{
if (buf_block_t *block= buf_page_get_low({SRV_TMP_SPACE_ID, index->page}, 0,
RW_X_LATCH, nullptr, BUF_GET, &mtr,
nullptr))
if (buf_block_t *block= buf_page_get_gen({SRV_TMP_SPACE_ID, index->page},
0, RW_X_LATCH, nullptr, BUF_GET,
&mtr, nullptr))
{
btr_free_but_not_root(block, MTR_LOG_NO_REDO);
mtr.set_log_mode(MTR_LOG_NO_REDO);

View file

@ -2585,7 +2585,7 @@ or BUF_PEEK_IF_IN_POOL
@return pointer to the block or NULL */
TRANSACTIONAL_TARGET
buf_block_t*
buf_page_get_low(
buf_page_get_gen(
const page_id_t page_id,
ulint zip_size,
ulint rw_latch,
@ -2778,7 +2778,7 @@ free_unfixed_block:
wait_for_unzip:
/* The page is being read or written, or
another thread is executing buf_zip_decompress()
in buf_page_get_low() on it. */
in buf_page_get_gen() on it. */
block->page.unfix();
std::this_thread::sleep_for(
std::chrono::microseconds(100));
@ -2801,10 +2801,7 @@ wait_for_unfix:
ut_ad(&block->page == buf_pool.page_hash.get(page_id, chain));
/* Wait for any other threads to release their buffer-fix
on the compressed-only block descriptor.
FIXME: Never fix() before acquiring the lock.
Only in buf_page_get_gen(), buf_page_get_low(), buf_page_free()
we are violating that principle. */
on the compressed-only block descriptor. */
state = block->page.state();
switch (state) {
@ -2830,7 +2827,7 @@ wait_for_unfix:
goto wait_for_unfix;
}
/* Ensure that another buf_page_get_low() will wait for
/* Ensure that another buf_page_get_gen() will wait for
new_block->page.lock.x_unlock(). */
block->page.set_state(buf_page_t::READ_FIX);
@ -2954,59 +2951,6 @@ page_id_mismatch:
return block;
}
/** Get access to a database page. Buffered redo log may be applied.
@param[in] page_id page id
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
@param[in] guess guessed block or NULL
@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
or BUF_PEEK_IF_IN_POOL
@param[in,out] mtr mini-transaction, or NULL
@param[out] err DB_SUCCESS or error code
@return pointer to the block or NULL */
buf_block_t*
buf_page_get_gen(
const page_id_t page_id,
ulint zip_size,
ulint rw_latch,
buf_block_t* guess,
ulint mode,
mtr_t* mtr,
dberr_t* err)
{
buf_block_t *block= recv_sys.recover(page_id);
if (UNIV_LIKELY(!block))
return buf_page_get_low(page_id, zip_size, rw_latch,
guess, mode, mtr, err);
else if (UNIV_UNLIKELY(block == reinterpret_cast<buf_block_t*>(-1)))
{
corrupted:
if (err)
*err= DB_CORRUPTION;
return nullptr;
}
if (err)
*err= DB_SUCCESS;
/* Recovery is a special case; we fix() before acquiring lock. */
auto s= block->page.fix();
ut_ad(s >= buf_page_t::FREED);
/* The block may be write-fixed at this point because we are not
holding a lock, but it must not be read-fixed. */
ut_ad(s < buf_page_t::READ_FIX || s >= buf_page_t::WRITE_FIX);
if (s < buf_page_t::UNFIXED)
{
ut_ad(mode == BUF_GET_POSSIBLY_FREED || mode == BUF_PEEK_IF_IN_POOL);
mysql_mutex_lock(&buf_pool.mutex);
block->page.unfix();
buf_LRU_free_page(&block->page, true);
mysql_mutex_unlock(&buf_pool.mutex);
goto corrupted;
}
mtr->page_lock(block, rw_latch);
return block;
}
/********************************************************************//**
This is the general function used to get optimistic access to a database
page.

View file

@ -815,7 +815,7 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip)
/* We must hold an exclusive hash_lock to prevent
bpage->can_relocate() from changing due to a concurrent
execution of buf_page_get_low(). */
execution of buf_page_get_gen(). */
buf_pool_t::hash_chain& chain= buf_pool.page_hash.cell_get(id.fold());
page_hash_latch& hash_lock = buf_pool.page_hash.lock_get(chain);
/* We cannot use transactional_lock_guard here,

View file

@ -218,8 +218,7 @@ dberr_t dict_boot()
dict_sys.create();
dberr_t err;
const buf_block_t *d = buf_page_get_gen(hdr_page_id, 0, RW_S_LATCH,
nullptr, BUF_GET, &mtr, &err);
const buf_block_t *d = recv_sys.recover(hdr_page_id, &mtr ,&err);
if (!d) {
mtr.commit();
return err;
@ -393,19 +392,6 @@ dberr_t dict_boot()
UT_BITS_IN_BYTES(unsigned(table->indexes.start->n_nullable)));
mtr.commit();
if (err == DB_SUCCESS) {
/* Load definitions of other indexes on system tables */
dict_load_sys_table(dict_sys.sys_tables);
dict_load_sys_table(dict_sys.sys_columns);
dict_load_sys_table(dict_sys.sys_indexes);
dict_load_sys_table(dict_sys.sys_fields);
dict_sys.unlock();
dict_sys.load_sys_tables();
} else {
dict_sys.unlock();
}
dict_sys.unlock();
return err;
}

View file

@ -1012,8 +1012,7 @@ dberr_t ibuf_upgrade_needed()
mtr.start();
mtr.x_lock_space(fil_system.sys_space);
dberr_t err;
const buf_block_t *header_page=
buf_page_get_gen(ibuf_header, 0, RW_S_LATCH, nullptr, BUF_GET, &mtr, &err);
const buf_block_t *header_page= recv_sys.recover(ibuf_header, &mtr, &err);
if (!header_page)
{
@ -1026,8 +1025,7 @@ dberr_t ibuf_upgrade_needed()
return err;
}
const buf_block_t *root= buf_page_get_gen(ibuf_root, 0, RW_S_LATCH, nullptr,
BUF_GET, &mtr, &err);
const buf_block_t *root= recv_sys.recover(ibuf_root, &mtr, &err);
if (!root)
goto err_exit;

View file

@ -201,34 +201,12 @@ buf_page_get_gen(
buf_block_t* guess,
ulint mode,
mtr_t* mtr,
dberr_t* err = NULL)
MY_ATTRIBUTE((nonnull(6)));
/** This is the low level function used to get access to a database page.
@param[in] page_id page id
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
@param[in] guess guessed block or NULL
@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
or BUF_PEEK_IF_IN_POOL
@param[in,out] mtr mini-transaction, or NULL if a
block with page_id is to be evicted
@param[out] err DB_SUCCESS or error code
@return pointer to the block or NULL */
buf_block_t*
buf_page_get_low(
const page_id_t page_id,
ulint zip_size,
ulint rw_latch,
buf_block_t* guess,
ulint mode,
mtr_t* mtr,
dberr_t* err);
dberr_t* err = nullptr);
/** Initialize a page in the buffer pool. The page is usually not read
from a file even if it cannot be found in the buffer buf_pool. This is one
of the functions which perform to a block a state transition NOT_USED => LRU
(the other is buf_page_get_low()).
(the other is buf_page_get_gen()).
@param[in,out] space space object
@param[in] offset offset of the tablespace
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0

View file

@ -280,12 +280,6 @@ private:
@retval -1 if the page cannot be recovered due to corruption */
inline buf_block_t *recover_low(const map::iterator &p, mtr_t &mtr,
buf_block_t *b, lsn_t init_lsn);
/** Attempt to initialize a page based on redo log records.
@param page_id page identifier
@return the recovered block
@retval nullptr if the page cannot be initialized based on log records
@retval -1 if the page cannot be recovered due to corruption */
ATTRIBUTE_COLD buf_block_t *recover_low(const page_id_t page_id);
/** All found log files (multiple ones are possible if we are upgrading
from before MariaDB Server 10.5.1) */
@ -430,15 +424,14 @@ public:
/** @return whether log file corruption was found */
bool is_corrupt_log() const { return UNIV_UNLIKELY(found_corrupt_log); }
/** Attempt to initialize a page based on redo log records.
/** Read a page or recover it based on redo log records.
@param page_id page identifier
@return the recovered block
@retval nullptr if the page cannot be initialized based on log records
@retval -1 if the page cannot be recovered due to corruption */
buf_block_t *recover(const page_id_t page_id)
{
return UNIV_UNLIKELY(recovery_on) ? recover_low(page_id) : nullptr;
}
@param mtr mini-transaction
@param err error code
@return the requested block
@retval nullptr if the page cannot be accessed due to corruption */
ATTRIBUTE_COLD
buf_block_t *recover(const page_id_t page_id, mtr_t *mtr, dberr_t *err);
/** Try to recover a tablespace that was not readable earlier
@param p iterator

View file

@ -3649,33 +3649,70 @@ inline buf_block_t *recv_sys_t::recover_low(const map::iterator &p, mtr_t &mtr,
return block ? block : reinterpret_cast<buf_block_t*>(-1);
}
/** Attempt to initialize a page based on redo log records.
/** Read a page or recover it based on redo log records.
@param page_id page identifier
@return recovered block
@retval nullptr if the page cannot be initialized based on log records */
ATTRIBUTE_COLD buf_block_t *recv_sys_t::recover_low(const page_id_t page_id)
@param mtr mini-transaction
@param err error code
@return the requested block
@retval nullptr if the page cannot be accessed due to corruption */
ATTRIBUTE_COLD
buf_block_t *
recv_sys_t::recover(const page_id_t page_id, mtr_t *mtr, dberr_t *err)
{
if (!recovery_on)
must_read:
return buf_page_get_gen(page_id, 0, RW_S_LATCH, nullptr, BUF_GET, mtr,
err);
mysql_mutex_lock(&mutex);
map::iterator p= pages.find(page_id);
if (p != pages.end() && !p->second.being_processed && p->second.skip_read)
if (p == pages.end() || p->second.being_processed || !p->second.skip_read)
{
p->second.being_processed= 1;
const lsn_t init_lsn{mlog_init.last(page_id)};
mysql_mutex_unlock(&mutex);
buf_block_t *free_block= buf_LRU_get_free_block(have_no_mutex);
mtr_t mtr;
buf_block_t *block= recover_low(p, mtr, free_block, init_lsn);
p->second.being_processed= -1;
ut_ad(!block || block == reinterpret_cast<buf_block_t*>(-1) ||
block == free_block);
if (UNIV_UNLIKELY(!block))
buf_pool.free_block(free_block);
return block;
goto must_read;
}
p->second.being_processed= 1;
const lsn_t init_lsn{mlog_init.last(page_id)};
mysql_mutex_unlock(&mutex);
return nullptr;
buf_block_t *free_block= buf_LRU_get_free_block(have_no_mutex);
buf_block_t *block;
{
mtr_t local_mtr;
block= recover_low(p, local_mtr, free_block, init_lsn);
}
p->second.being_processed= -1;
if (UNIV_UNLIKELY(!block))
{
buf_pool.free_block(free_block);
goto must_read;
}
else if (block == reinterpret_cast<buf_block_t*>(-1))
{
corrupted:
if (err)
*err= DB_CORRUPTION;
return nullptr;
}
ut_ad(block == free_block);
auto s= block->page.fix();
ut_ad(s >= buf_page_t::FREED);
/* The block may be write-fixed at this point because we are not
holding a latch, but it must not be read-fixed. */
ut_ad(s < buf_page_t::READ_FIX || s >= buf_page_t::WRITE_FIX);
if (s < buf_page_t::UNFIXED)
{
mysql_mutex_lock(&buf_pool.mutex);
block->page.unfix();
buf_LRU_free_page(&block->page, true);
mysql_mutex_unlock(&buf_pool.mutex);
goto corrupted;
}
mtr->page_lock(block, RW_S_LATCH);
return block;
}
inline fil_space_t *fil_system_t::find(const char *path) const

View file

@ -2074,7 +2074,7 @@ dberr_t PageConverter::operator()(buf_block_t* block) UNIV_NOTHROW
/* If we already had an old page with matching number
in the buffer pool, evict it now, because
we no longer evict the pages on DISCARD TABLESPACE. */
buf_page_get_low(block->page.id(), get_zip_size(), RW_NO_LATCH,
buf_page_get_gen(block->page.id(), get_zip_size(), RW_NO_LATCH,
nullptr, BUF_PEEK_IF_IN_POOL,
nullptr, nullptr);

View file

@ -596,7 +596,8 @@ static uint32_t trx_rseg_get_n_undo_tablespaces()
mtr_t mtr;
mtr.start();
if (const buf_block_t *sys_header= trx_sysf_get(&mtr, false))
if (const buf_block_t *sys_header=
recv_sys.recover({TRX_SYS_SPACE, TRX_SYS_PAGE_NO}, &mtr, nullptr))
for (ulint rseg_id= 0; rseg_id < TRX_SYS_N_RSEGS; rseg_id++)
if (trx_sysf_rseg_get_page_no(sys_header, rseg_id) != FIL_NULL)
if (uint32_t space= trx_sysf_rseg_get_space(sys_header, rseg_id))
@ -1552,20 +1553,6 @@ dberr_t srv_start(bool create_new_db)
srv_undo_tablespaces_active
= trx_rseg_get_n_undo_tablespaces();
if (srv_operation != SRV_OPERATION_RESTORE) {
dict_sys.load_sys_tables();
}
if (UNIV_UNLIKELY(must_upgrade_ibuf)) {
dict_load_tablespaces(nullptr, true);
err = ibuf_upgrade();
if (err != DB_SUCCESS) {
break;
}
}
err = trx_lists_init_at_db_start();
break;
default:
ut_ad("wrong mariabackup mode" == 0);
@ -1596,9 +1583,45 @@ dberr_t srv_start(bool create_new_db)
return(srv_init_abort(DB_CORRUPTION));
}
if (srv_operation != SRV_OPERATION_RESTORE
|| recv_needed_recovery) {
}
DBUG_PRINT("ib_log", ("apply completed"));
if (recv_needed_recovery) {
if (srv_operation != SRV_OPERATION_RESTORE) {
dict_sys.lock(SRW_LOCK_CALL);
dict_load_sys_table(dict_sys.sys_tables);
dict_sys.unlock();
if (UNIV_UNLIKELY(must_upgrade_ibuf)) {
dict_load_tablespaces(nullptr, true);
err = ibuf_upgrade();
if (err != DB_SUCCESS) {
return srv_init_abort(err);
}
}
dict_sys.lock(SRW_LOCK_CALL);
dict_load_sys_table(dict_sys.sys_columns);
dict_load_sys_table(dict_sys.sys_indexes);
dict_load_sys_table(dict_sys.sys_fields);
dict_sys.unlock();
dict_sys.load_sys_tables();
err = trx_lists_init_at_db_start();
if (err != DB_SUCCESS) {
return srv_init_abort(err);
}
if (recv_needed_recovery) {
trx_sys_print_mysql_binlog_offset();
}
} else if (recv_needed_recovery) {
err = trx_lists_init_at_db_start();
if (err != DB_SUCCESS) {
return srv_init_abort(err);
}
trx_sys_print_mysql_binlog_offset();
}
}