MDEV-32042 Simplify buf_page_get_gen()

buf_page_get_low(): Rename to buf_page_get_gen(), and assume that no
crash recovery is needed.

recv_sys_t::recover(): Replaces the old buf_page_get_gen(). Read a page
while crash recovery is in progress.

trx_rseg_get_n_undo_tablespaces(), ibuf_upgrade_needed():
Invoke recv_sys.recover() instead of buf_page_get_gen().

dict_boot(): Invoke recv_sys.recover() instead of buf_page_get_gen().
Do not load the system tables.

srv_start(): Load the system tables and the undo logs after all redo log
has been applied in recv_sys.apply(true) and we can safely invoke the
regular buf_page_get_gen().
This commit is contained in:
Marko Mäkelä 2023-12-04 09:45:53 +02:00
parent b42f318996
commit 850d61736d
10 changed files with 115 additions and 156 deletions

View file

@ -1114,9 +1114,9 @@ void btr_drop_temporary_table(const dict_table_t &table)
for (const dict_index_t *index= table.indexes.start; index; for (const dict_index_t *index= table.indexes.start; index;
index= dict_table_get_next_index(index)) index= dict_table_get_next_index(index))
{ {
if (buf_block_t *block= buf_page_get_low({SRV_TMP_SPACE_ID, index->page}, 0, if (buf_block_t *block= buf_page_get_gen({SRV_TMP_SPACE_ID, index->page},
RW_X_LATCH, nullptr, BUF_GET, &mtr, 0, RW_X_LATCH, nullptr, BUF_GET,
nullptr)) &mtr, nullptr))
{ {
btr_free_but_not_root(block, MTR_LOG_NO_REDO); btr_free_but_not_root(block, MTR_LOG_NO_REDO);
mtr.set_log_mode(MTR_LOG_NO_REDO); mtr.set_log_mode(MTR_LOG_NO_REDO);

View file

@ -2585,7 +2585,7 @@ or BUF_PEEK_IF_IN_POOL
@return pointer to the block or NULL */ @return pointer to the block or NULL */
TRANSACTIONAL_TARGET TRANSACTIONAL_TARGET
buf_block_t* buf_block_t*
buf_page_get_low( buf_page_get_gen(
const page_id_t page_id, const page_id_t page_id,
ulint zip_size, ulint zip_size,
ulint rw_latch, ulint rw_latch,
@ -2778,7 +2778,7 @@ free_unfixed_block:
wait_for_unzip: wait_for_unzip:
/* The page is being read or written, or /* The page is being read or written, or
another thread is executing buf_zip_decompress() another thread is executing buf_zip_decompress()
in buf_page_get_low() on it. */ in buf_page_get_gen() on it. */
block->page.unfix(); block->page.unfix();
std::this_thread::sleep_for( std::this_thread::sleep_for(
std::chrono::microseconds(100)); std::chrono::microseconds(100));
@ -2801,10 +2801,7 @@ wait_for_unfix:
ut_ad(&block->page == buf_pool.page_hash.get(page_id, chain)); ut_ad(&block->page == buf_pool.page_hash.get(page_id, chain));
/* Wait for any other threads to release their buffer-fix /* Wait for any other threads to release their buffer-fix
on the compressed-only block descriptor. on the compressed-only block descriptor. */
FIXME: Never fix() before acquiring the lock.
Only in buf_page_get_gen(), buf_page_get_low(), buf_page_free()
we are violating that principle. */
state = block->page.state(); state = block->page.state();
switch (state) { switch (state) {
@ -2830,7 +2827,7 @@ wait_for_unfix:
goto wait_for_unfix; goto wait_for_unfix;
} }
/* Ensure that another buf_page_get_low() will wait for /* Ensure that another buf_page_get_gen() will wait for
new_block->page.lock.x_unlock(). */ new_block->page.lock.x_unlock(). */
block->page.set_state(buf_page_t::READ_FIX); block->page.set_state(buf_page_t::READ_FIX);
@ -2954,59 +2951,6 @@ page_id_mismatch:
return block; return block;
} }
/** Get access to a database page. Buffered redo log may be applied.
@param[in] page_id page id
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
@param[in] guess guessed block or NULL
@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
or BUF_PEEK_IF_IN_POOL
@param[in,out] mtr mini-transaction, or NULL
@param[out] err DB_SUCCESS or error code
@return pointer to the block or NULL */
buf_block_t*
buf_page_get_gen(
const page_id_t page_id,
ulint zip_size,
ulint rw_latch,
buf_block_t* guess,
ulint mode,
mtr_t* mtr,
dberr_t* err)
{
buf_block_t *block= recv_sys.recover(page_id);
if (UNIV_LIKELY(!block))
return buf_page_get_low(page_id, zip_size, rw_latch,
guess, mode, mtr, err);
else if (UNIV_UNLIKELY(block == reinterpret_cast<buf_block_t*>(-1)))
{
corrupted:
if (err)
*err= DB_CORRUPTION;
return nullptr;
}
if (err)
*err= DB_SUCCESS;
/* Recovery is a special case; we fix() before acquiring lock. */
auto s= block->page.fix();
ut_ad(s >= buf_page_t::FREED);
/* The block may be write-fixed at this point because we are not
holding a lock, but it must not be read-fixed. */
ut_ad(s < buf_page_t::READ_FIX || s >= buf_page_t::WRITE_FIX);
if (s < buf_page_t::UNFIXED)
{
ut_ad(mode == BUF_GET_POSSIBLY_FREED || mode == BUF_PEEK_IF_IN_POOL);
mysql_mutex_lock(&buf_pool.mutex);
block->page.unfix();
buf_LRU_free_page(&block->page, true);
mysql_mutex_unlock(&buf_pool.mutex);
goto corrupted;
}
mtr->page_lock(block, rw_latch);
return block;
}
/********************************************************************//** /********************************************************************//**
This is the general function used to get optimistic access to a database This is the general function used to get optimistic access to a database
page. page.

View file

@ -815,7 +815,7 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip)
/* We must hold an exclusive hash_lock to prevent /* We must hold an exclusive hash_lock to prevent
bpage->can_relocate() from changing due to a concurrent bpage->can_relocate() from changing due to a concurrent
execution of buf_page_get_low(). */ execution of buf_page_get_gen(). */
buf_pool_t::hash_chain& chain= buf_pool.page_hash.cell_get(id.fold()); buf_pool_t::hash_chain& chain= buf_pool.page_hash.cell_get(id.fold());
page_hash_latch& hash_lock = buf_pool.page_hash.lock_get(chain); page_hash_latch& hash_lock = buf_pool.page_hash.lock_get(chain);
/* We cannot use transactional_lock_guard here, /* We cannot use transactional_lock_guard here,

View file

@ -218,8 +218,7 @@ dberr_t dict_boot()
dict_sys.create(); dict_sys.create();
dberr_t err; dberr_t err;
const buf_block_t *d = buf_page_get_gen(hdr_page_id, 0, RW_S_LATCH, const buf_block_t *d = recv_sys.recover(hdr_page_id, &mtr ,&err);
nullptr, BUF_GET, &mtr, &err);
if (!d) { if (!d) {
mtr.commit(); mtr.commit();
return err; return err;
@ -393,19 +392,6 @@ dberr_t dict_boot()
UT_BITS_IN_BYTES(unsigned(table->indexes.start->n_nullable))); UT_BITS_IN_BYTES(unsigned(table->indexes.start->n_nullable)));
mtr.commit(); mtr.commit();
dict_sys.unlock();
if (err == DB_SUCCESS) {
/* Load definitions of other indexes on system tables */
dict_load_sys_table(dict_sys.sys_tables);
dict_load_sys_table(dict_sys.sys_columns);
dict_load_sys_table(dict_sys.sys_indexes);
dict_load_sys_table(dict_sys.sys_fields);
dict_sys.unlock();
dict_sys.load_sys_tables();
} else {
dict_sys.unlock();
}
return err; return err;
} }

View file

@ -1012,8 +1012,7 @@ dberr_t ibuf_upgrade_needed()
mtr.start(); mtr.start();
mtr.x_lock_space(fil_system.sys_space); mtr.x_lock_space(fil_system.sys_space);
dberr_t err; dberr_t err;
const buf_block_t *header_page= const buf_block_t *header_page= recv_sys.recover(ibuf_header, &mtr, &err);
buf_page_get_gen(ibuf_header, 0, RW_S_LATCH, nullptr, BUF_GET, &mtr, &err);
if (!header_page) if (!header_page)
{ {
@ -1026,8 +1025,7 @@ dberr_t ibuf_upgrade_needed()
return err; return err;
} }
const buf_block_t *root= buf_page_get_gen(ibuf_root, 0, RW_S_LATCH, nullptr, const buf_block_t *root= recv_sys.recover(ibuf_root, &mtr, &err);
BUF_GET, &mtr, &err);
if (!root) if (!root)
goto err_exit; goto err_exit;

View file

@ -201,34 +201,12 @@ buf_page_get_gen(
buf_block_t* guess, buf_block_t* guess,
ulint mode, ulint mode,
mtr_t* mtr, mtr_t* mtr,
dberr_t* err = NULL) dberr_t* err = nullptr);
MY_ATTRIBUTE((nonnull(6)));
/** This is the low level function used to get access to a database page.
@param[in] page_id page id
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
@param[in] guess guessed block or NULL
@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
or BUF_PEEK_IF_IN_POOL
@param[in,out] mtr mini-transaction, or NULL if a
block with page_id is to be evicted
@param[out] err DB_SUCCESS or error code
@return pointer to the block or NULL */
buf_block_t*
buf_page_get_low(
const page_id_t page_id,
ulint zip_size,
ulint rw_latch,
buf_block_t* guess,
ulint mode,
mtr_t* mtr,
dberr_t* err);
/** Initialize a page in the buffer pool. The page is usually not read /** Initialize a page in the buffer pool. The page is usually not read
from a file even if it cannot be found in the buffer buf_pool. This is one from a file even if it cannot be found in the buffer buf_pool. This is one
of the functions which perform to a block a state transition NOT_USED => LRU of the functions which perform to a block a state transition NOT_USED => LRU
(the other is buf_page_get_low()). (the other is buf_page_get_gen()).
@param[in,out] space space object @param[in,out] space space object
@param[in] offset offset of the tablespace @param[in] offset offset of the tablespace
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0

View file

@ -280,12 +280,6 @@ private:
@retval -1 if the page cannot be recovered due to corruption */ @retval -1 if the page cannot be recovered due to corruption */
inline buf_block_t *recover_low(const map::iterator &p, mtr_t &mtr, inline buf_block_t *recover_low(const map::iterator &p, mtr_t &mtr,
buf_block_t *b, lsn_t init_lsn); buf_block_t *b, lsn_t init_lsn);
/** Attempt to initialize a page based on redo log records.
@param page_id page identifier
@return the recovered block
@retval nullptr if the page cannot be initialized based on log records
@retval -1 if the page cannot be recovered due to corruption */
ATTRIBUTE_COLD buf_block_t *recover_low(const page_id_t page_id);
/** All found log files (multiple ones are possible if we are upgrading /** All found log files (multiple ones are possible if we are upgrading
from before MariaDB Server 10.5.1) */ from before MariaDB Server 10.5.1) */
@ -430,15 +424,14 @@ public:
/** @return whether log file corruption was found */ /** @return whether log file corruption was found */
bool is_corrupt_log() const { return UNIV_UNLIKELY(found_corrupt_log); } bool is_corrupt_log() const { return UNIV_UNLIKELY(found_corrupt_log); }
/** Attempt to initialize a page based on redo log records. /** Read a page or recover it based on redo log records.
@param page_id page identifier @param page_id page identifier
@return the recovered block @param mtr mini-transaction
@retval nullptr if the page cannot be initialized based on log records @param err error code
@retval -1 if the page cannot be recovered due to corruption */ @return the requested block
buf_block_t *recover(const page_id_t page_id) @retval nullptr if the page cannot be accessed due to corruption */
{ ATTRIBUTE_COLD
return UNIV_UNLIKELY(recovery_on) ? recover_low(page_id) : nullptr; buf_block_t *recover(const page_id_t page_id, mtr_t *mtr, dberr_t *err);
}
/** Try to recover a tablespace that was not readable earlier /** Try to recover a tablespace that was not readable earlier
@param p iterator @param p iterator

View file

@ -3649,33 +3649,70 @@ inline buf_block_t *recv_sys_t::recover_low(const map::iterator &p, mtr_t &mtr,
return block ? block : reinterpret_cast<buf_block_t*>(-1); return block ? block : reinterpret_cast<buf_block_t*>(-1);
} }
/** Attempt to initialize a page based on redo log records. /** Read a page or recover it based on redo log records.
@param page_id page identifier @param page_id page identifier
@return recovered block @param mtr mini-transaction
@retval nullptr if the page cannot be initialized based on log records */ @param err error code
ATTRIBUTE_COLD buf_block_t *recv_sys_t::recover_low(const page_id_t page_id) @return the requested block
@retval nullptr if the page cannot be accessed due to corruption */
ATTRIBUTE_COLD
buf_block_t *
recv_sys_t::recover(const page_id_t page_id, mtr_t *mtr, dberr_t *err)
{ {
if (!recovery_on)
must_read:
return buf_page_get_gen(page_id, 0, RW_S_LATCH, nullptr, BUF_GET, mtr,
err);
mysql_mutex_lock(&mutex); mysql_mutex_lock(&mutex);
map::iterator p= pages.find(page_id); map::iterator p= pages.find(page_id);
if (p != pages.end() && !p->second.being_processed && p->second.skip_read) if (p == pages.end() || p->second.being_processed || !p->second.skip_read)
{ {
p->second.being_processed= 1;
const lsn_t init_lsn{mlog_init.last(page_id)};
mysql_mutex_unlock(&mutex); mysql_mutex_unlock(&mutex);
buf_block_t *free_block= buf_LRU_get_free_block(have_no_mutex); goto must_read;
mtr_t mtr;
buf_block_t *block= recover_low(p, mtr, free_block, init_lsn);
p->second.being_processed= -1;
ut_ad(!block || block == reinterpret_cast<buf_block_t*>(-1) ||
block == free_block);
if (UNIV_UNLIKELY(!block))
buf_pool.free_block(free_block);
return block;
} }
p->second.being_processed= 1;
const lsn_t init_lsn{mlog_init.last(page_id)};
mysql_mutex_unlock(&mutex); mysql_mutex_unlock(&mutex);
return nullptr; buf_block_t *free_block= buf_LRU_get_free_block(have_no_mutex);
buf_block_t *block;
{
mtr_t local_mtr;
block= recover_low(p, local_mtr, free_block, init_lsn);
}
p->second.being_processed= -1;
if (UNIV_UNLIKELY(!block))
{
buf_pool.free_block(free_block);
goto must_read;
}
else if (block == reinterpret_cast<buf_block_t*>(-1))
{
corrupted:
if (err)
*err= DB_CORRUPTION;
return nullptr;
}
ut_ad(block == free_block);
auto s= block->page.fix();
ut_ad(s >= buf_page_t::FREED);
/* The block may be write-fixed at this point because we are not
holding a latch, but it must not be read-fixed. */
ut_ad(s < buf_page_t::READ_FIX || s >= buf_page_t::WRITE_FIX);
if (s < buf_page_t::UNFIXED)
{
mysql_mutex_lock(&buf_pool.mutex);
block->page.unfix();
buf_LRU_free_page(&block->page, true);
mysql_mutex_unlock(&buf_pool.mutex);
goto corrupted;
}
mtr->page_lock(block, RW_S_LATCH);
return block;
} }
inline fil_space_t *fil_system_t::find(const char *path) const inline fil_space_t *fil_system_t::find(const char *path) const

View file

@ -2074,7 +2074,7 @@ dberr_t PageConverter::operator()(buf_block_t* block) UNIV_NOTHROW
/* If we already had an old page with matching number /* If we already had an old page with matching number
in the buffer pool, evict it now, because in the buffer pool, evict it now, because
we no longer evict the pages on DISCARD TABLESPACE. */ we no longer evict the pages on DISCARD TABLESPACE. */
buf_page_get_low(block->page.id(), get_zip_size(), RW_NO_LATCH, buf_page_get_gen(block->page.id(), get_zip_size(), RW_NO_LATCH,
nullptr, BUF_PEEK_IF_IN_POOL, nullptr, BUF_PEEK_IF_IN_POOL,
nullptr, nullptr); nullptr, nullptr);

View file

@ -596,7 +596,8 @@ static uint32_t trx_rseg_get_n_undo_tablespaces()
mtr_t mtr; mtr_t mtr;
mtr.start(); mtr.start();
if (const buf_block_t *sys_header= trx_sysf_get(&mtr, false)) if (const buf_block_t *sys_header=
recv_sys.recover({TRX_SYS_SPACE, TRX_SYS_PAGE_NO}, &mtr, nullptr))
for (ulint rseg_id= 0; rseg_id < TRX_SYS_N_RSEGS; rseg_id++) for (ulint rseg_id= 0; rseg_id < TRX_SYS_N_RSEGS; rseg_id++)
if (trx_sysf_rseg_get_page_no(sys_header, rseg_id) != FIL_NULL) if (trx_sysf_rseg_get_page_no(sys_header, rseg_id) != FIL_NULL)
if (uint32_t space= trx_sysf_rseg_get_space(sys_header, rseg_id)) if (uint32_t space= trx_sysf_rseg_get_space(sys_header, rseg_id))
@ -1552,20 +1553,6 @@ dberr_t srv_start(bool create_new_db)
srv_undo_tablespaces_active srv_undo_tablespaces_active
= trx_rseg_get_n_undo_tablespaces(); = trx_rseg_get_n_undo_tablespaces();
if (srv_operation != SRV_OPERATION_RESTORE) {
dict_sys.load_sys_tables();
}
if (UNIV_UNLIKELY(must_upgrade_ibuf)) {
dict_load_tablespaces(nullptr, true);
err = ibuf_upgrade();
if (err != DB_SUCCESS) {
break;
}
}
err = trx_lists_init_at_db_start();
break; break;
default: default:
ut_ad("wrong mariabackup mode" == 0); ut_ad("wrong mariabackup mode" == 0);
@ -1596,9 +1583,45 @@ dberr_t srv_start(bool create_new_db)
return(srv_init_abort(DB_CORRUPTION)); return(srv_init_abort(DB_CORRUPTION));
} }
if (srv_operation != SRV_OPERATION_RESTORE
|| recv_needed_recovery) {
}
DBUG_PRINT("ib_log", ("apply completed")); DBUG_PRINT("ib_log", ("apply completed"));
if (recv_needed_recovery) { if (srv_operation != SRV_OPERATION_RESTORE) {
dict_sys.lock(SRW_LOCK_CALL);
dict_load_sys_table(dict_sys.sys_tables);
dict_sys.unlock();
if (UNIV_UNLIKELY(must_upgrade_ibuf)) {
dict_load_tablespaces(nullptr, true);
err = ibuf_upgrade();
if (err != DB_SUCCESS) {
return srv_init_abort(err);
}
}
dict_sys.lock(SRW_LOCK_CALL);
dict_load_sys_table(dict_sys.sys_columns);
dict_load_sys_table(dict_sys.sys_indexes);
dict_load_sys_table(dict_sys.sys_fields);
dict_sys.unlock();
dict_sys.load_sys_tables();
err = trx_lists_init_at_db_start();
if (err != DB_SUCCESS) {
return srv_init_abort(err);
}
if (recv_needed_recovery) {
trx_sys_print_mysql_binlog_offset();
}
} else if (recv_needed_recovery) {
err = trx_lists_init_at_db_start();
if (err != DB_SUCCESS) {
return srv_init_abort(err);
}
trx_sys_print_mysql_binlog_offset(); trx_sys_print_mysql_binlog_offset();
} }
} }