mirror of
https://github.com/MariaDB/server.git
synced 2025-01-15 19:42:28 +01:00
MDEV-35413 InnoDB: Cannot load compressed BLOB
A race condition was observed between two buf_page_get_zip() for a page. One of them had proceeded to buf_read_page(), allocating and x-latching a buf_block_t that initially comprises only an uncompressed page frame. While that thread was waiting inside buf_block_alloc(), another thread would try to access the same page. Without acquiring a page latch, it would wrongly conclude that there is corruption because no compressed page frame exists for the block. buf_page_get_zip(): Simplify the logic and correct the documentation. Always acquire a shared latch to prevent any race condition with a concurrent read operation. No longer increment a buffer-fix; the latch is sufficient for preventing page relocation or eviction. buf_read_page(): Add the parameter bool unzip=true. In buf_page_get_zip() there is no need to allocate an uncompressed page frame for reading a compressed BLOB page. We only need that for other ROW_FORMAT=COMPRESSED pages, or for writing compressed BLOB pages. btr_copy_zblob_prefix(): Remove the message "Cannot load compressed BLOB" because buf_page_get_zip() will already have reported a more specific error whenever it returns nullptr. row_merge_buf_add(): Do not crash on BLOB corruption, but return an error instead. (In debug builds, an assertion will fail if this corruption is noticed.) Reviewed by: Debarun Banerjee
This commit is contained in:
parent
a06d81ff3f
commit
26597b91b3
6 changed files with 66 additions and 104 deletions
|
@ -6850,14 +6850,9 @@ btr_copy_zblob_prefix(
|
|||
buf_page_t* bpage;
|
||||
uint32_t next_page_no;
|
||||
|
||||
/* There is no latch on bpage directly. Instead,
|
||||
bpage is protected by the B-tree page latch that
|
||||
is being held on the clustered index record, or,
|
||||
in row_merge_copy_blobs(), by an exclusive table lock. */
|
||||
bpage = buf_page_get_zip(id);
|
||||
|
||||
if (UNIV_UNLIKELY(!bpage)) {
|
||||
ib::error() << "Cannot load compressed BLOB " << id;
|
||||
goto func_exit;
|
||||
}
|
||||
|
||||
|
@ -6927,12 +6922,10 @@ inflate_error:
|
|||
|
||||
end_of_blob:
|
||||
bpage->lock.s_unlock();
|
||||
bpage->unfix();
|
||||
goto func_exit;
|
||||
}
|
||||
|
||||
bpage->lock.s_unlock();
|
||||
bpage->unfix();
|
||||
|
||||
/* On other BLOB pages except the first
|
||||
the BLOB header always is at the page header: */
|
||||
|
|
|
@ -2199,17 +2199,8 @@ static void buf_inc_get(ha_handler_stats *stats)
|
|||
++buf_pool.stat.n_page_gets;
|
||||
}
|
||||
|
||||
/** Get read access to a compressed page (usually of type
|
||||
FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
|
||||
The page must be released with unfix().
|
||||
NOTE: the page is not protected by any latch. Mutual exclusion has to
|
||||
be implemented at a higher level. In other words, all possible
|
||||
accesses to a given page through this function must be protected by
|
||||
the same set of mutexes or latches.
|
||||
@param page_id page identifier
|
||||
@return pointer to the block, s-latched */
|
||||
TRANSACTIONAL_TARGET
|
||||
buf_page_t* buf_page_get_zip(const page_id_t page_id)
|
||||
buf_page_t *buf_page_get_zip(const page_id_t page_id)
|
||||
{
|
||||
ha_handler_stats *const stats= mariadb_stats;
|
||||
buf_inc_get(stats);
|
||||
|
@ -2218,109 +2209,84 @@ buf_page_t* buf_page_get_zip(const page_id_t page_id)
|
|||
page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain);
|
||||
buf_page_t *bpage;
|
||||
|
||||
lookup:
|
||||
for (bool discard_attempted= false;;)
|
||||
for (;;)
|
||||
{
|
||||
#ifndef NO_ELISION
|
||||
if (xbegin())
|
||||
{
|
||||
if (hash_lock.is_locked())
|
||||
xabort();
|
||||
bpage= buf_pool.page_hash.get(page_id, chain);
|
||||
if (!bpage || buf_pool.watch_is_sentinel(*bpage))
|
||||
{
|
||||
xend();
|
||||
goto must_read_page;
|
||||
}
|
||||
if (!bpage->zip.data)
|
||||
else
|
||||
{
|
||||
/* There is no ROW_FORMAT=COMPRESSED page. */
|
||||
bpage= buf_pool.page_hash.get(page_id, chain);
|
||||
const bool got_s_latch= bpage && !buf_pool.watch_is_sentinel(*bpage) &&
|
||||
bpage->lock.s_lock_try();
|
||||
xend();
|
||||
return nullptr;
|
||||
}
|
||||
if (discard_attempted || !bpage->frame)
|
||||
{
|
||||
if (!bpage->lock.s_lock_try())
|
||||
xabort();
|
||||
xend();
|
||||
break;
|
||||
}
|
||||
xend();
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
hash_lock.lock_shared();
|
||||
bpage= buf_pool.page_hash.get(page_id, chain);
|
||||
if (!bpage || buf_pool.watch_is_sentinel(*bpage))
|
||||
{
|
||||
hash_lock.unlock_shared();
|
||||
goto must_read_page;
|
||||
}
|
||||
|
||||
ut_ad(bpage->in_file());
|
||||
ut_ad(page_id == bpage->id());
|
||||
|
||||
if (!bpage->zip.data)
|
||||
{
|
||||
/* There is no ROW_FORMAT=COMPRESSED page. */
|
||||
hash_lock.unlock_shared();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (discard_attempted || !bpage->frame)
|
||||
{
|
||||
const bool got_s_latch= bpage->lock.s_lock_try();
|
||||
hash_lock.unlock_shared();
|
||||
if (UNIV_LIKELY(got_s_latch))
|
||||
if (got_s_latch)
|
||||
break;
|
||||
/* We may fail to acquire bpage->lock because
|
||||
buf_page_t::read_complete() may be invoking
|
||||
buf_pool_t::corrupted_evict() on this block, which it would
|
||||
hold an exclusive latch on.
|
||||
|
||||
Let us aqcuire and release buf_pool.mutex to ensure that any
|
||||
buf_pool_t::corrupted_evict() will proceed before we reacquire
|
||||
the hash_lock that it could be waiting for. */
|
||||
mysql_mutex_lock(&buf_pool.mutex);
|
||||
mysql_mutex_unlock(&buf_pool.mutex);
|
||||
goto lookup;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
hash_lock.lock_shared();
|
||||
bpage= buf_pool.page_hash.get(page_id, chain);
|
||||
if (!bpage || buf_pool.watch_is_sentinel(*bpage))
|
||||
{
|
||||
hash_lock.unlock_shared();
|
||||
switch (dberr_t err= buf_read_page(page_id, false)) {
|
||||
case DB_SUCCESS:
|
||||
case DB_SUCCESS_LOCKED_REC:
|
||||
mariadb_increment_pages_read(stats);
|
||||
continue;
|
||||
case DB_TABLESPACE_DELETED:
|
||||
return nullptr;
|
||||
default:
|
||||
sql_print_error("InnoDB: Reading compressed page "
|
||||
"[page id: space=" UINT32PF ", page number=" UINT32PF
|
||||
"] failed with error: %s",
|
||||
page_id.space(), page_id.page_no(), ut_strerr(err));
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
discard_attempted= true;
|
||||
ut_ad(bpage->in_file());
|
||||
ut_ad(page_id == bpage->id());
|
||||
|
||||
const bool got_s_latch= bpage->lock.s_lock_try();
|
||||
hash_lock.unlock_shared();
|
||||
if (UNIV_LIKELY(got_s_latch))
|
||||
break;
|
||||
/* We may fail to acquire bpage->lock because a read is holding an
|
||||
exclusive latch on this block and either in progress or invoking
|
||||
buf_pool_t::corrupted_evict().
|
||||
|
||||
Let us aqcuire and release buf_pool.mutex to ensure that any
|
||||
buf_pool_t::corrupted_evict() will proceed before we reacquire
|
||||
the hash_lock that it could be waiting for.
|
||||
|
||||
While we are at it, let us also try to discard any uncompressed
|
||||
page frame of the compressed BLOB page, in case one had been
|
||||
allocated for writing the BLOB. */
|
||||
mysql_mutex_lock(&buf_pool.mutex);
|
||||
if (buf_page_t *bpage= buf_pool.page_hash.get(page_id, chain))
|
||||
bpage= buf_pool.page_hash.get(page_id, chain);
|
||||
if (bpage)
|
||||
buf_LRU_free_page(bpage, false);
|
||||
mysql_mutex_unlock(&buf_pool.mutex);
|
||||
}
|
||||
|
||||
if (UNIV_UNLIKELY(!bpage->zip.data))
|
||||
{
|
||||
ut_d(const auto s=) bpage->fix();
|
||||
ut_ad(s >= buf_page_t::UNFIXED);
|
||||
ut_ad(s < buf_page_t::READ_FIX || s >= buf_page_t::WRITE_FIX);
|
||||
ut_ad("no ROW_FORMAT=COMPRESSED page!" == 0);
|
||||
bpage->lock.s_unlock();
|
||||
bpage= nullptr;
|
||||
}
|
||||
|
||||
buf_page_make_young_if_needed(bpage);
|
||||
else
|
||||
buf_page_make_young_if_needed(bpage);
|
||||
|
||||
#ifdef UNIV_DEBUG
|
||||
if (!(++buf_dbg_counter % 5771)) buf_pool.validate();
|
||||
#endif /* UNIV_DEBUG */
|
||||
return bpage;
|
||||
|
||||
must_read_page:
|
||||
switch (dberr_t err= buf_read_page(page_id)) {
|
||||
case DB_SUCCESS:
|
||||
case DB_SUCCESS_LOCKED_REC:
|
||||
mariadb_increment_pages_read(stats);
|
||||
goto lookup;
|
||||
default:
|
||||
ib::error() << "Reading compressed page " << page_id
|
||||
<< " failed with error: " << err;
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
/********************************************************************//**
|
||||
|
|
|
@ -447,7 +447,7 @@ read_ahead:
|
|||
return count;
|
||||
}
|
||||
|
||||
dberr_t buf_read_page(const page_id_t page_id)
|
||||
dberr_t buf_read_page(const page_id_t page_id, bool unzip)
|
||||
{
|
||||
fil_space_t *space= fil_space_t::get(page_id.space());
|
||||
if (UNIV_UNLIKELY(!space))
|
||||
|
@ -462,7 +462,7 @@ dberr_t buf_read_page(const page_id_t page_id)
|
|||
|
||||
buf_LRU_stat_inc_io(); /* NOT protected by buf_pool.mutex */
|
||||
return buf_read_page_low(space, true, BUF_READ_ANY_PAGE,
|
||||
page_id, space->zip_size(), true);
|
||||
page_id, space->zip_size(), unzip);
|
||||
}
|
||||
|
||||
/** High-level function which reads a page asynchronously from a file to the
|
||||
|
|
|
@ -188,11 +188,7 @@ buf_block_t *buf_page_try_get(const page_id_t page_id, mtr_t *mtr);
|
|||
|
||||
/** Get read access to a compressed page (usually of type
|
||||
FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
|
||||
The page must be released with unfix().
|
||||
NOTE: the page is not protected by any latch. Mutual exclusion has to
|
||||
be implemented at a higher level. In other words, all possible
|
||||
accesses to a given page through this function must be protected by
|
||||
the same set of mutexes or latches.
|
||||
The page must be released with s_unlock().
|
||||
@param page_id page identifier
|
||||
@return pointer to the block, s-latched */
|
||||
buf_page_t *buf_page_get_zip(const page_id_t page_id);
|
||||
|
|
|
@ -32,13 +32,14 @@ Created 11/5/1995 Heikki Tuuri
|
|||
/** Read a page synchronously from a file. buf_page_t::read_complete()
|
||||
will be invoked on read completion.
|
||||
@param page_id page id
|
||||
@param unzip whether to decompress ROW_FORMAT=COMPRESSED pages
|
||||
@retval DB_SUCCESS if the page was read and is not corrupted
|
||||
@retval DB_SUCCESS_LOCKED_REC if the page was not read
|
||||
@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted
|
||||
@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
|
||||
after decryption normal page checksum does not match.
|
||||
@retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */
|
||||
dberr_t buf_read_page(const page_id_t page_id);
|
||||
dberr_t buf_read_page(const page_id_t page_id, bool unzip= true);
|
||||
|
||||
/** High-level function which reads a page asynchronously from a file to the
|
||||
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
|
||||
|
|
|
@ -712,7 +712,10 @@ error:
|
|||
const byte* buf = row_ext_lookup(ext, col->ind,
|
||||
&len);
|
||||
if (UNIV_LIKELY_NULL(buf)) {
|
||||
ut_a(buf != field_ref_zero);
|
||||
if (UNIV_UNLIKELY(buf == field_ref_zero)) {
|
||||
*err = DB_CORRUPTION;
|
||||
goto error;
|
||||
}
|
||||
if (i < dict_index_get_n_unique(index)) {
|
||||
dfield_set_data(field, buf, len);
|
||||
} else {
|
||||
|
@ -725,7 +728,10 @@ error:
|
|||
const byte* buf = row_ext_lookup(ext, col->ind,
|
||||
&len);
|
||||
if (UNIV_LIKELY_NULL(buf)) {
|
||||
ut_a(buf != field_ref_zero);
|
||||
if (UNIV_UNLIKELY(buf == field_ref_zero)) {
|
||||
*err = DB_CORRUPTION;
|
||||
goto error;
|
||||
}
|
||||
dfield_set_data(field, buf, len);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue