MDEV-27891: SIGSEGV in InnoDB buffer pool resize

During an increase in resize, the new curr_size got a value
less than old_size.

As n_chunks_new and n_chunks have a strong correlation to the
resizing operation in progress, we can use them and remove the
need for old_size.

For convienece the n_chunks_new < n_chunks is now the is_shrinking
function.

The volatile compiler optimization on n_chunks{,_new} is removed
as real mutex uses are needed.

Other n_chunks_new/n_chunks methods:

n_chunks_new and n_chunks almost always read and altered under
the pool mutex.

Exceptions are:
* i_s_innodb_buffer_page_fill,
* buf_pool_t::is_uncompressed (via is_blocked_field)
These need reexamining for the need of a mutex, however comments
indicates this already.

get_n_pages has uses in buffer pool load, recover log memory
exhaustion estimates and innodb status so take the minimum number
of chunks for safety.

The buf_pool_t::running_out function also uses curr_size/old_size.
We replace this hot function calculation with just n_chunks_new.
This is the new size of the chunks before the resizing occurs.

If we are resizing down, we've already got the case we had previously
(as the minimum). If we are resizing upwards, we are taking an
optimistic view that there will be buffer chunks available for locks.
As this memory allocation is occurring immediately next the resizing
function it seems likely.

Compiler hint UNIV_UNLIKELY removed to leave it to the branch
predictor to make an informed decision.

Added test case of a smaller size than the Marko/Roel original
in JIRA reducing the size to 256M. SEGV hits roughly 1/10 times
but its better than a 21G memory size.

Reviewer: Marko
This commit is contained in:
Daniel Black 2022-02-22 17:42:59 +11:00
parent 1fa872f6ef
commit b6a2472489
8 changed files with 71 additions and 26 deletions

View file

@ -0,0 +1,13 @@
MDEV-27891: Delayed SIGSEGV in InnoDB buffer pool resize
after or during DROP TABLE
select @@innodb_buffer_pool_chunk_size;
@@innodb_buffer_pool_chunk_size
1048576
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB;
SET GLOBAL innodb_buffer_pool_size=256*1024*1024;
DROP TABLE t1;
SET GLOBAL innodb_buffer_pool_size=@@innodb_buffer_pool_size + @@innodb_buffer_pool_chunk_size;
# end of 10.6 test
set global innodb_buffer_pool_size = 8388608;;

View file

@ -0,0 +1 @@
--innodb-buffer-pool-chunk-size=1M

View file

@ -0,0 +1,28 @@
--source include/have_innodb.inc
--source include/big_test.inc
--let $save_size= `SELECT @@GLOBAL.innodb_buffer_pool_size`
let $wait_timeout = 60;
let $wait_condition =
SELECT SUBSTR(variable_value, 1, 30) = 'Completed resizing buffer pool'
FROM information_schema.global_status
WHERE variable_name = 'INNODB_BUFFER_POOL_RESIZE_STATUS';
--echo
--echo MDEV-27891: Delayed SIGSEGV in InnoDB buffer pool resize
--echo after or during DROP TABLE
--echo
select @@innodb_buffer_pool_chunk_size;
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB;
SET GLOBAL innodb_buffer_pool_size=256*1024*1024;
DROP TABLE t1;
--source include/wait_condition.inc
SET GLOBAL innodb_buffer_pool_size=@@innodb_buffer_pool_size + @@innodb_buffer_pool_chunk_size;
--source include/wait_condition.inc
--echo # end of 10.6 test
--eval set global innodb_buffer_pool_size = $save_size;
--source include/wait_condition.inc

View file

@ -298,7 +298,7 @@ static buf_buddy_free_t* buf_buddy_alloc_zip(ulint i)
buf = UT_LIST_GET_FIRST(buf_pool.zip_free[i]);
if (buf_pool.curr_size < buf_pool.old_size
if (buf_pool.is_shrinking()
&& UT_LIST_GET_LEN(buf_pool.withdraw)
< buf_pool.withdraw_target) {
@ -609,7 +609,7 @@ recombine:
We may waste up to 15360*max_len bytes to free blocks
(1024 + 2048 + 4096 + 8192 = 15360) */
if (UT_LIST_GET_LEN(buf_pool.zip_free[i]) < 16
&& buf_pool.curr_size >= buf_pool.old_size) {
&& !buf_pool.is_shrinking()) {
goto func_exit;
}
@ -715,7 +715,7 @@ buf_buddy_realloc(void* buf, ulint size)
void buf_buddy_condense_free()
{
mysql_mutex_assert_owner(&buf_pool.mutex);
ut_ad(buf_pool.curr_size < buf_pool.old_size);
ut_ad(buf_pool.is_shrinking());
for (ulint i = 0; i < UT_ARR_SIZE(buf_pool.zip_free); ++i) {
buf_buddy_free_t* buf =

View file

@ -1200,7 +1200,6 @@ bool buf_pool_t::create()
for (size_t i= 0; i < UT_ARR_SIZE(zip_free); ++i)
UT_LIST_INIT(zip_free[i], &buf_buddy_free_t::list);
ulint s= curr_size;
old_size= s;
s/= BUF_READ_AHEAD_PORTION;
read_ahead_area= s >= READ_AHEAD_PAGES
? READ_AHEAD_PAGES
@ -1669,7 +1668,6 @@ inline void buf_pool_t::resize()
#endif /* BTR_CUR_HASH_ADAPT */
mysql_mutex_lock(&mutex);
ut_ad(curr_size == old_size);
ut_ad(n_chunks_new == n_chunks);
ut_ad(UT_LIST_GET_LEN(withdraw) == 0);
@ -1678,7 +1676,7 @@ inline void buf_pool_t::resize()
curr_size = n_chunks_new * chunks->size;
mysql_mutex_unlock(&mutex);
if (curr_size < old_size) {
if (is_shrinking()) {
/* set withdraw target */
size_t w = 0;
@ -1699,7 +1697,7 @@ inline void buf_pool_t::resize()
withdraw_retry:
/* wait for the number of blocks fit to the new size (if needed)*/
bool should_retry_withdraw = curr_size < old_size
bool should_retry_withdraw = is_shrinking()
&& withdraw_blocks();
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
@ -1782,7 +1780,7 @@ withdraw_retry:
ULINTPF " to " ULINTPF ".",
n_chunks, n_chunks_new);
if (n_chunks_new < n_chunks) {
if (is_shrinking()) {
/* delete chunks */
chunk_t* chunk = chunks + n_chunks_new;
const chunk_t* const echunk = chunks + n_chunks;
@ -1846,8 +1844,7 @@ withdraw_retry:
goto calc_buf_pool_size;
}
ulint n_chunks_copy = ut_min(n_chunks_new,
n_chunks);
ulint n_chunks_copy = ut_min(n_chunks_new, n_chunks);
memcpy(new_chunks, chunks,
n_chunks_copy * sizeof *new_chunks);
@ -1914,7 +1911,6 @@ calc_buf_pool_size:
/* set size */
ut_ad(UT_LIST_GET_LEN(withdraw) == 0);
ulint s= curr_size;
old_size= s;
s/= BUF_READ_AHEAD_PORTION;
read_ahead_area= s >= READ_AHEAD_PAGES
? READ_AHEAD_PAGES
@ -3876,7 +3872,7 @@ void buf_pool_t::validate()
mysql_mutex_unlock(&flush_list_mutex);
if (curr_size == old_size
if (n_chunks_new == n_chunks
&& n_lru + n_free > curr_size + n_zip) {
ib::fatal() << "n_LRU " << n_lru << ", n_free " << n_free
@ -3886,7 +3882,7 @@ void buf_pool_t::validate()
ut_ad(UT_LIST_GET_LEN(LRU) >= n_lru);
if (curr_size == old_size
if (n_chunks_new == n_chunks
&& UT_LIST_GET_LEN(free) != n_free) {
ib::fatal() << "Free list len "

View file

@ -1225,7 +1225,7 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n)
ulint free_limit= srv_LRU_scan_depth;
mysql_mutex_assert_owner(&buf_pool.mutex);
if (buf_pool.withdraw_target && buf_pool.curr_size < buf_pool.old_size)
if (buf_pool.withdraw_target && buf_pool.is_shrinking())
free_limit+= buf_pool.withdraw_target - UT_LIST_GET_LEN(buf_pool.withdraw);
const auto neighbors= UT_LIST_GET_LEN(buf_pool.LRU) < BUF_LRU_OLD_MIN_LEN

View file

@ -288,7 +288,7 @@ buf_block_t* buf_LRU_get_free_only()
ut_a(!block->page.in_file());
UT_LIST_REMOVE(buf_pool.free, &block->page);
if (buf_pool.curr_size >= buf_pool.old_size
if (!buf_pool.is_shrinking()
|| UT_LIST_GET_LEN(buf_pool.withdraw)
>= buf_pool.withdraw_target
|| !buf_pool.will_be_withdrawn(block->page)) {
@ -321,7 +321,7 @@ static void buf_LRU_check_size_of_non_data_objects()
{
mysql_mutex_assert_owner(&buf_pool.mutex);
if (recv_recovery_is_on() || buf_pool.curr_size != buf_pool.old_size)
if (recv_recovery_is_on() || buf_pool.n_chunks_new != buf_pool.n_chunks)
return;
const auto s= UT_LIST_GET_LEN(buf_pool.free) + UT_LIST_GET_LEN(buf_pool.LRU);
@ -1012,7 +1012,7 @@ buf_LRU_block_free_non_file_page(
page_zip_set_size(&block->page.zip, 0);
}
if (buf_pool.curr_size < buf_pool.old_size
if (buf_pool.is_shrinking()
&& UT_LIST_GET_LEN(buf_pool.withdraw) < buf_pool.withdraw_target
&& buf_pool.will_be_withdrawn(block->page)) {
/* This should be withdrawn */

View file

@ -1353,7 +1353,7 @@ public:
{
ut_ad(is_initialised());
size_t size= 0;
for (auto j= n_chunks; j--; )
for (auto j= ut_min(n_chunks_new, n_chunks); j--; )
size+= chunks[j].size;
return size;
}
@ -1363,7 +1363,7 @@ public:
@return whether the frame will be withdrawn */
bool will_be_withdrawn(const byte *ptr) const
{
ut_ad(curr_size < old_size);
ut_ad(n_chunks_new < n_chunks);
#ifdef SAFE_MUTEX
if (resize_in_progress())
mysql_mutex_assert_owner(&mutex);
@ -1383,7 +1383,7 @@ public:
@return whether the frame will be withdrawn */
bool will_be_withdrawn(const buf_page_t &bpage) const
{
ut_ad(curr_size < old_size);
ut_ad(n_chunks_new < n_chunks);
#ifdef SAFE_MUTEX
if (resize_in_progress())
mysql_mutex_assert_owner(&mutex);
@ -1540,11 +1540,18 @@ public:
inline void watch_remove(buf_page_t *watch, hash_chain &chain);
/** @return whether less than 1/4 of the buffer pool is available */
TPOOL_SUPPRESS_TSAN
bool running_out() const
{
return !recv_recovery_is_on() &&
UNIV_UNLIKELY(UT_LIST_GET_LEN(free) + UT_LIST_GET_LEN(LRU) <
std::min(curr_size, old_size) / 4);
UT_LIST_GET_LEN(free) + UT_LIST_GET_LEN(LRU) <
n_chunks_new / 4 * chunks->size;
}
/** @return whether the buffer pool is shrinking */
inline bool is_shrinking() const
{
return n_chunks_new < n_chunks;
}
#ifdef UNIV_DEBUG
@ -1603,15 +1610,15 @@ public:
ut_allocator<unsigned char> allocator; /*!< Allocator used for
allocating memory for the the "chunks"
member. */
volatile ulint n_chunks; /*!< number of buffer pool chunks */
volatile ulint n_chunks_new; /*!< new number of buffer pool chunks */
ulint n_chunks; /*!< number of buffer pool chunks */
ulint n_chunks_new; /*!< new number of buffer pool chunks.
both n_chunks{,new} are protected under
mutex */
chunk_t* chunks; /*!< buffer pool chunks */
chunk_t* chunks_old; /*!< old buffer pool chunks to be freed
after resizing buffer pool */
/** current pool size in pages */
Atomic_counter<ulint> curr_size;
/** previous pool size in pages */
Atomic_counter<ulint> old_size;
/** read-ahead request size in pages */
Atomic_counter<uint32_t> read_ahead_area;