mirror of
https://github.com/MariaDB/server.git
synced 2025-01-16 03:52:35 +01:00
MDEV-9423: cannot add new node to the cluser: Binlog..
.. file '/var/log/mysql/mariadb-bin.000001' not found in binlog index, needed for recovery. Aborting. In Galera cluster, while preparing for rsync/xtrabackup based SST, the donor node takes an FTWRL followed by (REFRESH_ENGINE_LOG in rsync based state transfer and) REFRESH_BINARY_LOG. The latter rotates the binary log and logs Binlog_checkpoint_log_event corresponding to the penultimate binary log file into the new file. The checkpoint event for the current file is later logged synchronously by binlog_background_thread. Now, since in rsync/xtrabackup based snapshot state transfer methods, only the last binary log file is transferred to the joiner node; the file could get transferred even before the checkpoint event for the same file gets written to it. As a result, the joiner node would fail to start complaining about the missing binlog file needed for recovery. In order to fix this, a mechanism has been put in place to make REFRESH_BINARY_LOG operation wait for Binlog_checkpoint_log_event to be logged for the current binary log file if the node is part of a Galera cluster. As further safety, during rsync based state transfer the donor node now acquires and owns LOCK_log for the duration of file transfer during SST.
This commit is contained in:
parent
33492ec8d4
commit
3fd214c8be
4 changed files with 53 additions and 3 deletions
29
sql/log.cc
29
sql/log.cc
|
@ -3690,7 +3690,10 @@ bool MYSQL_BIN_LOG::open(const char *log_name,
|
|||
new_xid_list_entry->binlog_id= current_binlog_id;
|
||||
/* Remove any initial entries with no pending XIDs. */
|
||||
while ((b= binlog_xid_count_list.head()) && b->xid_count == 0)
|
||||
{
|
||||
my_free(binlog_xid_count_list.get());
|
||||
}
|
||||
mysql_cond_broadcast(&COND_xid_list);
|
||||
binlog_xid_count_list.push_back(new_xid_list_entry);
|
||||
mysql_mutex_unlock(&LOCK_xid_list);
|
||||
|
||||
|
@ -4227,6 +4230,7 @@ err:
|
|||
DBUG_ASSERT(b->xid_count == 0);
|
||||
my_free(binlog_xid_count_list.get());
|
||||
}
|
||||
mysql_cond_broadcast(&COND_xid_list);
|
||||
reset_master_pending--;
|
||||
mysql_mutex_unlock(&LOCK_xid_list);
|
||||
}
|
||||
|
@ -4237,6 +4241,26 @@ err:
|
|||
}
|
||||
|
||||
|
||||
void MYSQL_BIN_LOG::wait_for_last_checkpoint_event()
|
||||
{
|
||||
mysql_mutex_lock(&LOCK_xid_list);
|
||||
for (;;)
|
||||
{
|
||||
if (binlog_xid_count_list.is_last(binlog_xid_count_list.head()))
|
||||
break;
|
||||
mysql_cond_wait(&COND_xid_list, &LOCK_xid_list);
|
||||
}
|
||||
mysql_mutex_unlock(&LOCK_xid_list);
|
||||
|
||||
/*
|
||||
LOCK_xid_list and LOCK_log are chained, so the LOCK_log will only be
|
||||
obtained after mark_xid_done() has written the last checkpoint event.
|
||||
*/
|
||||
mysql_mutex_lock(&LOCK_log);
|
||||
mysql_mutex_unlock(&LOCK_log);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Delete relay log files prior to rli->group_relay_log_name
|
||||
(i.e. all logs which are not involved in a non-finished group
|
||||
|
@ -9394,7 +9418,7 @@ TC_LOG_BINLOG::mark_xid_done(ulong binlog_id, bool write_checkpoint)
|
|||
*/
|
||||
if (unlikely(reset_master_pending))
|
||||
{
|
||||
mysql_cond_signal(&COND_xid_list);
|
||||
mysql_cond_broadcast(&COND_xid_list);
|
||||
mysql_mutex_unlock(&LOCK_xid_list);
|
||||
DBUG_VOID_RETURN;
|
||||
}
|
||||
|
@ -9432,8 +9456,7 @@ TC_LOG_BINLOG::mark_xid_done(ulong binlog_id, bool write_checkpoint)
|
|||
mysql_mutex_lock(&LOCK_log);
|
||||
mysql_mutex_lock(&LOCK_xid_list);
|
||||
--mark_xid_done_waiting;
|
||||
if (unlikely(reset_master_pending))
|
||||
mysql_cond_signal(&COND_xid_list);
|
||||
mysql_cond_broadcast(&COND_xid_list);
|
||||
/* We need to reload current_binlog_id due to release/re-take of lock. */
|
||||
current= current_binlog_id;
|
||||
|
||||
|
|
|
@ -788,6 +788,7 @@ public:
|
|||
bool reset_logs(THD* thd, bool create_new_log,
|
||||
rpl_gtid *init_state, uint32 init_state_len,
|
||||
ulong next_log_number);
|
||||
void wait_for_last_checkpoint_event();
|
||||
void close(uint exiting);
|
||||
void clear_inuse_flag_when_closing(File file);
|
||||
|
||||
|
|
|
@ -155,6 +155,12 @@ bool reload_acl_and_cache(THD *thd, unsigned long long options,
|
|||
{
|
||||
if (mysql_bin_log.rotate_and_purge(true))
|
||||
*write_to_binlog= -1;
|
||||
|
||||
if (WSREP_ON)
|
||||
{
|
||||
/* Wait for last binlog checkpoint event to be logged. */
|
||||
mysql_bin_log.wait_for_last_checkpoint_event();
|
||||
}
|
||||
}
|
||||
}
|
||||
if (options & REFRESH_RELAY_LOG)
|
||||
|
|
|
@ -1120,6 +1120,16 @@ wait_signal:
|
|||
if (!err)
|
||||
{
|
||||
sst_disallow_writes (thd.ptr, true);
|
||||
/*
|
||||
Lets also keep statements that modify binary logs (like RESET LOGS,
|
||||
RESET MASTER) from proceeding until the files have been transferred
|
||||
to the joiner node.
|
||||
*/
|
||||
if (mysql_bin_log.is_open())
|
||||
{
|
||||
mysql_mutex_lock(mysql_bin_log.get_log_lock());
|
||||
}
|
||||
|
||||
locked= true;
|
||||
goto wait_signal;
|
||||
}
|
||||
|
@ -1128,6 +1138,11 @@ wait_signal:
|
|||
{
|
||||
if (locked)
|
||||
{
|
||||
if (mysql_bin_log.is_open())
|
||||
{
|
||||
mysql_mutex_assert_owner(mysql_bin_log.get_log_lock());
|
||||
mysql_mutex_unlock(mysql_bin_log.get_log_lock());
|
||||
}
|
||||
sst_disallow_writes (thd.ptr, false);
|
||||
thd.ptr->global_read_lock.unlock_global_read_lock (thd.ptr);
|
||||
locked= false;
|
||||
|
@ -1160,6 +1175,11 @@ wait_signal:
|
|||
|
||||
if (locked) // don't forget to unlock server before return
|
||||
{
|
||||
if (mysql_bin_log.is_open())
|
||||
{
|
||||
mysql_mutex_assert_owner(mysql_bin_log.get_log_lock());
|
||||
mysql_mutex_unlock(mysql_bin_log.get_log_lock());
|
||||
}
|
||||
sst_disallow_writes (thd.ptr, false);
|
||||
thd.ptr->global_read_lock.unlock_global_read_lock (thd.ptr);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue