MDEV-35413 InnoDB: Cannot load compressed BLOB

A race condition was observed between two buf_page_get_zip() for a page.
One of them had proceeded to buf_read_page(), allocating and x-latching
a buf_block_t that initially comprises only an uncompressed page frame.
While that thread was waiting inside buf_block_alloc(), another thread
would try to access the same page. Without acquiring a page latch, it
would wrongly conclude that there is corruption because no compressed
page frame exists for the block.

buf_page_get_zip(): Simplify the logic and correct the documentation.
Always acquire a shared latch to prevent any race condition with a
concurrent read operation. No longer increment a buffer-fix; the latch
is sufficient for preventing page relocation or eviction.

buf_read_page(): Add the parameter bool unzip=true. In buf_page_get_zip()
there is no need to allocate an uncompressed page frame for reading a
compressed BLOB page. We only need that for other ROW_FORMAT=COMPRESSED
pages, or for writing compressed BLOB pages.

btr_copy_zblob_prefix(): Remove the message "Cannot load compressed BLOB"
because buf_page_get_zip() will already have reported a more specific
error whenever it returns nullptr.

row_merge_buf_add(): Do not crash on BLOB corruption, but return an
error instead. (In debug builds, an assertion will fail if this
corruption is noticed.)

Reviewed by: Debarun Banerjee
This commit is contained in:
Marko Mäkelä 2024-11-22 08:33:03 +02:00
parent a06d81ff3f
commit 26597b91b3
6 changed files with 66 additions and 104 deletions

View file

@ -6850,14 +6850,9 @@ btr_copy_zblob_prefix(
buf_page_t* bpage;
uint32_t next_page_no;
/* There is no latch on bpage directly. Instead,
bpage is protected by the B-tree page latch that
is being held on the clustered index record, or,
in row_merge_copy_blobs(), by an exclusive table lock. */
bpage = buf_page_get_zip(id);
if (UNIV_UNLIKELY(!bpage)) {
ib::error() << "Cannot load compressed BLOB " << id;
goto func_exit;
}
@ -6927,12 +6922,10 @@ inflate_error:
end_of_blob:
bpage->lock.s_unlock();
bpage->unfix();
goto func_exit;
}
bpage->lock.s_unlock();
bpage->unfix();
/* On other BLOB pages except the first
the BLOB header always is at the page header: */

View file

@ -2199,17 +2199,8 @@ static void buf_inc_get(ha_handler_stats *stats)
++buf_pool.stat.n_page_gets;
}
/** Get read access to a compressed page (usually of type
FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
The page must be released with unfix().
NOTE: the page is not protected by any latch. Mutual exclusion has to
be implemented at a higher level. In other words, all possible
accesses to a given page through this function must be protected by
the same set of mutexes or latches.
@param page_id page identifier
@return pointer to the block, s-latched */
TRANSACTIONAL_TARGET
buf_page_t* buf_page_get_zip(const page_id_t page_id)
buf_page_t *buf_page_get_zip(const page_id_t page_id)
{
ha_handler_stats *const stats= mariadb_stats;
buf_inc_get(stats);
@ -2218,109 +2209,84 @@ buf_page_t* buf_page_get_zip(const page_id_t page_id)
page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain);
buf_page_t *bpage;
lookup:
for (bool discard_attempted= false;;)
for (;;)
{
#ifndef NO_ELISION
if (xbegin())
{
if (hash_lock.is_locked())
xabort();
bpage= buf_pool.page_hash.get(page_id, chain);
if (!bpage || buf_pool.watch_is_sentinel(*bpage))
{
xend();
goto must_read_page;
}
if (!bpage->zip.data)
else
{
/* There is no ROW_FORMAT=COMPRESSED page. */
bpage= buf_pool.page_hash.get(page_id, chain);
const bool got_s_latch= bpage && !buf_pool.watch_is_sentinel(*bpage) &&
bpage->lock.s_lock_try();
xend();
return nullptr;
}
if (discard_attempted || !bpage->frame)
{
if (!bpage->lock.s_lock_try())
xabort();
xend();
break;
}
xend();
}
else
#endif
{
hash_lock.lock_shared();
bpage= buf_pool.page_hash.get(page_id, chain);
if (!bpage || buf_pool.watch_is_sentinel(*bpage))
{
hash_lock.unlock_shared();
goto must_read_page;
}
ut_ad(bpage->in_file());
ut_ad(page_id == bpage->id());
if (!bpage->zip.data)
{
/* There is no ROW_FORMAT=COMPRESSED page. */
hash_lock.unlock_shared();
return nullptr;
}
if (discard_attempted || !bpage->frame)
{
const bool got_s_latch= bpage->lock.s_lock_try();
hash_lock.unlock_shared();
if (UNIV_LIKELY(got_s_latch))
if (got_s_latch)
break;
/* We may fail to acquire bpage->lock because
buf_page_t::read_complete() may be invoking
buf_pool_t::corrupted_evict() on this block, which it would
hold an exclusive latch on.
Let us aqcuire and release buf_pool.mutex to ensure that any
buf_pool_t::corrupted_evict() will proceed before we reacquire
the hash_lock that it could be waiting for. */
mysql_mutex_lock(&buf_pool.mutex);
mysql_mutex_unlock(&buf_pool.mutex);
goto lookup;
}
}
#endif
hash_lock.lock_shared();
bpage= buf_pool.page_hash.get(page_id, chain);
if (!bpage || buf_pool.watch_is_sentinel(*bpage))
{
hash_lock.unlock_shared();
switch (dberr_t err= buf_read_page(page_id, false)) {
case DB_SUCCESS:
case DB_SUCCESS_LOCKED_REC:
mariadb_increment_pages_read(stats);
continue;
case DB_TABLESPACE_DELETED:
return nullptr;
default:
sql_print_error("InnoDB: Reading compressed page "
"[page id: space=" UINT32PF ", page number=" UINT32PF
"] failed with error: %s",
page_id.space(), page_id.page_no(), ut_strerr(err));
return nullptr;
}
}
discard_attempted= true;
ut_ad(bpage->in_file());
ut_ad(page_id == bpage->id());
const bool got_s_latch= bpage->lock.s_lock_try();
hash_lock.unlock_shared();
if (UNIV_LIKELY(got_s_latch))
break;
/* We may fail to acquire bpage->lock because a read is holding an
exclusive latch on this block and either in progress or invoking
buf_pool_t::corrupted_evict().
Let us aqcuire and release buf_pool.mutex to ensure that any
buf_pool_t::corrupted_evict() will proceed before we reacquire
the hash_lock that it could be waiting for.
While we are at it, let us also try to discard any uncompressed
page frame of the compressed BLOB page, in case one had been
allocated for writing the BLOB. */
mysql_mutex_lock(&buf_pool.mutex);
if (buf_page_t *bpage= buf_pool.page_hash.get(page_id, chain))
bpage= buf_pool.page_hash.get(page_id, chain);
if (bpage)
buf_LRU_free_page(bpage, false);
mysql_mutex_unlock(&buf_pool.mutex);
}
if (UNIV_UNLIKELY(!bpage->zip.data))
{
ut_d(const auto s=) bpage->fix();
ut_ad(s >= buf_page_t::UNFIXED);
ut_ad(s < buf_page_t::READ_FIX || s >= buf_page_t::WRITE_FIX);
ut_ad("no ROW_FORMAT=COMPRESSED page!" == 0);
bpage->lock.s_unlock();
bpage= nullptr;
}
buf_page_make_young_if_needed(bpage);
else
buf_page_make_young_if_needed(bpage);
#ifdef UNIV_DEBUG
if (!(++buf_dbg_counter % 5771)) buf_pool.validate();
#endif /* UNIV_DEBUG */
return bpage;
must_read_page:
switch (dberr_t err= buf_read_page(page_id)) {
case DB_SUCCESS:
case DB_SUCCESS_LOCKED_REC:
mariadb_increment_pages_read(stats);
goto lookup;
default:
ib::error() << "Reading compressed page " << page_id
<< " failed with error: " << err;
return nullptr;
}
}
/********************************************************************//**

View file

@ -447,7 +447,7 @@ read_ahead:
return count;
}
dberr_t buf_read_page(const page_id_t page_id)
dberr_t buf_read_page(const page_id_t page_id, bool unzip)
{
fil_space_t *space= fil_space_t::get(page_id.space());
if (UNIV_UNLIKELY(!space))
@ -462,7 +462,7 @@ dberr_t buf_read_page(const page_id_t page_id)
buf_LRU_stat_inc_io(); /* NOT protected by buf_pool.mutex */
return buf_read_page_low(space, true, BUF_READ_ANY_PAGE,
page_id, space->zip_size(), true);
page_id, space->zip_size(), unzip);
}
/** High-level function which reads a page asynchronously from a file to the

View file

@ -188,11 +188,7 @@ buf_block_t *buf_page_try_get(const page_id_t page_id, mtr_t *mtr);
/** Get read access to a compressed page (usually of type
FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
The page must be released with unfix().
NOTE: the page is not protected by any latch. Mutual exclusion has to
be implemented at a higher level. In other words, all possible
accesses to a given page through this function must be protected by
the same set of mutexes or latches.
The page must be released with s_unlock().
@param page_id page identifier
@return pointer to the block, s-latched */
buf_page_t *buf_page_get_zip(const page_id_t page_id);

View file

@ -32,13 +32,14 @@ Created 11/5/1995 Heikki Tuuri
/** Read a page synchronously from a file. buf_page_t::read_complete()
will be invoked on read completion.
@param page_id page id
@param unzip whether to decompress ROW_FORMAT=COMPRESSED pages
@retval DB_SUCCESS if the page was read and is not corrupted
@retval DB_SUCCESS_LOCKED_REC if the page was not read
@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted
@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
after decryption normal page checksum does not match.
@retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */
dberr_t buf_read_page(const page_id_t page_id);
dberr_t buf_read_page(const page_id_t page_id, bool unzip= true);
/** High-level function which reads a page asynchronously from a file to the
buffer buf_pool if it is not already there. Sets the io_fix flag and sets

View file

@ -712,7 +712,10 @@ error:
const byte* buf = row_ext_lookup(ext, col->ind,
&len);
if (UNIV_LIKELY_NULL(buf)) {
ut_a(buf != field_ref_zero);
if (UNIV_UNLIKELY(buf == field_ref_zero)) {
*err = DB_CORRUPTION;
goto error;
}
if (i < dict_index_get_n_unique(index)) {
dfield_set_data(field, buf, len);
} else {
@ -725,7 +728,10 @@ error:
const byte* buf = row_ext_lookup(ext, col->ind,
&len);
if (UNIV_LIKELY_NULL(buf)) {
ut_a(buf != field_ref_zero);
if (UNIV_UNLIKELY(buf == field_ref_zero)) {
*err = DB_CORRUPTION;
goto error;
}
dfield_set_data(field, buf, len);
}
}