MDEV-34166 Server could hang with BP < 80M under stress

BUF_LRU_MIN_LEN (256) is too high value for low buffer pool(BP) size.
For example, for BP size lower than 80M and 16 K page size, the limit is
more than 5% of total BP and for lowest BP 5M, it is 80% of the BP.
Non-data objects like explicit locks could occupy part of the BP pool
reducing the pages available for LRU. If LRU reaches minimum limit and
if no free pages are available, server would hang with page cleaner not
able to free any more pages.

Fix: To avoid such hang, we adjust the LRU limit lower than the limit
for data objects as checked in buf_LRU_check_size_of_non_data_objects()
i.e. one page less than 5% of BP.
This commit is contained in:
mariadb-DebarunBanerjee 2024-05-15 17:30:52 +05:30
parent 0907df3d89
commit b2944adb76
5 changed files with 51 additions and 1 deletions

View file

@ -0,0 +1,13 @@
#
# MDEV-34166 Server could hang with BP < 80M under stress
#
call mtr.add_suppression("\\[Warning\\] InnoDB: Over 67 percent of the buffer pool");
CREATE TABLE t1 (col1 INT) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1),(2),(3),(4),(5);
SET STATEMENT debug_dbug='+d,innodb_skip_lock_bitmap' FOR
INSERT INTO t1 SELECT a.* FROM t1 a, t1 b, t1 c, t1 d, t1 e, t1 f, t1 g LIMIT 45000;
ERROR HY000: The total number of locks exceeds the lock table size
SELECT COUNT(*) FROM t1;
COUNT(*)
5
DROP TABLE t1;

View file

@ -0,0 +1 @@
--innodb_buffer_pool_size=5M

View file

@ -0,0 +1,19 @@
--source include/have_innodb.inc
--source include/have_debug.inc
--echo #
--echo # MDEV-34166 Server could hang with BP < 80M under stress
--echo #
call mtr.add_suppression("\\[Warning\\] InnoDB: Over 67 percent of the buffer pool");
CREATE TABLE t1 (col1 INT) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1),(2),(3),(4),(5);
--error ER_LOCK_TABLE_FULL
SET STATEMENT debug_dbug='+d,innodb_skip_lock_bitmap' FOR
INSERT INTO t1 SELECT a.* FROM t1 a, t1 b, t1 c, t1 d, t1 e, t1 f, t1 g LIMIT 45000;
SELECT COUNT(*) FROM t1;
DROP TABLE t1;

View file

@ -1300,9 +1300,21 @@ static void buf_flush_LRU_list_batch(ulint max, bool evict,
static_assert(FIL_NULL > SRV_TMP_SPACE_ID, "consistency"); static_assert(FIL_NULL > SRV_TMP_SPACE_ID, "consistency");
static_assert(FIL_NULL > SRV_SPACE_ID_UPPER_BOUND, "consistency"); static_assert(FIL_NULL > SRV_SPACE_ID_UPPER_BOUND, "consistency");
/* BUF_LRU_MIN_LEN (256) is too high value for low buffer pool(BP) size. For
example, for BP size lower than 80M and 16 K page size, the limit is more than
5% of total BP and for lowest BP 5M, it is 80% of the BP. Non-data objects
like explicit locks could occupy part of the BP pool reducing the pages
available for LRU. If LRU reaches minimum limit and if no free pages are
available, server would hang with page cleaner not able to free any more
pages. To avoid such hang, we adjust the LRU limit lower than the limit for
data objects as checked in buf_LRU_check_size_of_non_data_objects() i.e. one
page less than 5% of BP. */
size_t pool_limit= buf_pool.curr_size / 20 - 1;
auto buf_lru_min_len= std::min<size_t>(pool_limit, BUF_LRU_MIN_LEN);
for (buf_page_t *bpage= UT_LIST_GET_LAST(buf_pool.LRU); for (buf_page_t *bpage= UT_LIST_GET_LAST(buf_pool.LRU);
bpage && bpage &&
((UT_LIST_GET_LEN(buf_pool.LRU) > BUF_LRU_MIN_LEN && ((UT_LIST_GET_LEN(buf_pool.LRU) > buf_lru_min_len &&
UT_LIST_GET_LEN(buf_pool.free) < free_limit) || UT_LIST_GET_LEN(buf_pool.free) < free_limit) ||
recv_recovery_is_on()); ++scanned) recv_recovery_is_on()); ++scanned)
{ {

View file

@ -1739,6 +1739,11 @@ lock_rec_find_similar_on_page(
const trx_t* trx) /*!< in: transaction */ const trx_t* trx) /*!< in: transaction */
{ {
ut_ad(lock_mutex_own()); ut_ad(lock_mutex_own());
DBUG_EXECUTE_IF("innodb_skip_lock_bitmap", {
if (!trx->in_rollback) {
return nullptr;
}
});
for (/* No op */; for (/* No op */;
lock != NULL; lock != NULL;