MDEV-20215: binlog.show_concurrent_rotate failed in buildbot with wrong result

Problem:
=======
There are two issues that are addressed in this patch:
 1) SHOW BINARY LOGS uses caching to store the binary logs that exist
in the log directory; however, if new events are written to the logs,
the caching strategy is unaware. This is okay for users, as it is
okay for SHOW to return slightly old data. The test, however, can
result in inconsistent data. It runs two connections concurrently,
where one shows the logs, and the other adds a new file. The output
of SHOW BINARY LOGS then depends on when the cache is built, with
respect to the time that the second connection rotates the logs.
 2) There is a race condition between RESET MASTER and SHOW BINARY
LOGS. More specifically, where they both need the binary log lock to
begin, SHOW BINARY LOGS only needs the lock to build its cache. If
RESET MASTER is issued after SHOW BINARY LOGS has built its cache and
before it has returned the results, the presented data may be
incorrect.

Solution:
========
 1) As it is okay for users to see stale data, to make the test
consistent, use DEBUG_SYNC to force the race condition (problem 2) to
make SHOW BINARY LOGS build a cache before RESET MASTER is called.
Then, use additional logic from the next part of the solution to
rebuild the cache.
 2) Use an Atomic_counter to keep track of the number of times RESET
MASTER has been called. If the value of the counter changes after
building the cache, the cache should be rebuilt and the analysis
should be restarted.

Reviewed By:
============
Andrei Elkin: <andrei.elkin@mariadb.com>
This commit is contained in:
Brandon Nesterenko 2021-05-21 14:53:43 -06:00
parent 38b79d7295
commit 46c3e7e353
5 changed files with 28 additions and 3 deletions

View file

@ -2,9 +2,10 @@ connect con1,localhost,root,,;
FLUSH LOGS;
FLUSH LOGS;
FLUSH LOGS;
SET DEBUG_SYNC= "at_after_lock_index WAIT_FOR con1_go";
SET DEBUG_SYNC= "at_after_lock_index SIGNAL con1_ready WAIT_FOR con1_go";
SHOW BINARY LOGS;
connect con2,localhost,root,,;
SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
RESET MASTER;
FLUSH LOGS;
SET DEBUG_SYNC= "now SIGNAL con1_go";

View file

@ -8,10 +8,15 @@ FLUSH LOGS;
FLUSH LOGS;
FLUSH LOGS;
SET DEBUG_SYNC= "at_after_lock_index WAIT_FOR con1_go";
# This forced synchronization pattern ensures con1 will execute its retry
# path. More specifically, con1 should see that the cache of log files it
# creates during SHOW BINARY LOGS becomes invalidated after con2 completes
# RESET MASTER.
SET DEBUG_SYNC= "at_after_lock_index SIGNAL con1_ready WAIT_FOR con1_go";
--send SHOW BINARY LOGS
connect(con2,localhost,root,,);
SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
RESET MASTER;
FLUSH LOGS;
SET DEBUG_SYNC= "now SIGNAL con1_go";

View file

@ -3256,7 +3256,7 @@ MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period)
checksum_alg_reset(BINLOG_CHECKSUM_ALG_UNDEF),
relay_log_checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF),
description_event_for_exec(0), description_event_for_queue(0),
current_binlog_id(0)
current_binlog_id(0), reset_master_count(0)
{
/*
We don't want to initialize locks here as such initialization depends on
@ -4350,6 +4350,7 @@ err:
}
mysql_cond_broadcast(&COND_xid_list);
reset_master_pending--;
reset_master_count++;
mysql_mutex_unlock(&LOCK_xid_list);
}

View file

@ -672,6 +672,11 @@ public:
my_off_t last_commit_pos_offset;
ulong current_binlog_id;
/*
Tracks the number of times that the master has been reset
*/
Atomic_counter<uint64> reset_master_count;
MYSQL_BIN_LOG(uint *sync_period);
/*
note that there's no destructor ~MYSQL_BIN_LOG() !
@ -881,6 +886,7 @@ public:
inline mysql_mutex_t* get_log_lock() { return &LOCK_log; }
inline mysql_cond_t* get_bin_log_cond() { return &COND_bin_log_updated; }
inline IO_CACHE* get_log_file() { return &log_file; }
inline uint64 get_reset_master_count() { return reset_master_count; }
inline void lock_index() { mysql_mutex_lock(&LOCK_index);}
inline void unlock_index() { mysql_mutex_unlock(&LOCK_index);}

View file

@ -4374,6 +4374,7 @@ bool show_binlogs(THD* thd)
Protocol *protocol= thd->protocol;
uint retry_count= 0;
size_t cur_dir_len;
uint64 expected_reset_masters;
DBUG_ENTER("show_binlogs");
if (!mysql_bin_log.is_open())
@ -4399,6 +4400,7 @@ retry:
mysql_mutex_lock(mysql_bin_log.get_log_lock());
mysql_bin_log.lock_index();
mysql_bin_log.raw_get_current_log(&cur);
expected_reset_masters= mysql_bin_log.get_reset_master_count();
mysql_mutex_unlock(mysql_bin_log.get_log_lock());
/* The following call unlocks lock_index */
@ -4419,6 +4421,16 @@ retry:
cur_link->name.str+= dir_len;
cur_link->name.length-= dir_len;
if (mysql_bin_log.get_reset_master_count() > expected_reset_masters)
{
/*
Reset master was called after we cached filenames.
Reinitialize the cache.
*/
free_root(&mem_root, MYF(MY_MARK_BLOCKS_FREE));
goto retry;
}
if (!(strncmp(fname+dir_len, cur.log_file_name+cur_dir_len, length)))
cur_link->size= cur.pos; /* The active log, use the active position */
else