mirror of
https://github.com/MariaDB/server.git
synced 2025-03-22 15:08:38 +01:00
Fix for MDEV-9670 server_id mysteriously set to 0
Problem was that in a circular replication setup the master remembers position to events it has generated itself when reading from a slave. If there are no new events in the queue from the slave, a Gtid_list_log_event is generated to remember the last skipped event. The problem happens if there is a network delay and we generate a Gtid_list_log_event in the middle of the transaction, in which case there will be an implicit comment and a new transaction with serverid=0 will be logged. The fix was to not generate any Gtid_list_log_events in the middle of a transaction.
This commit is contained in:
parent
46d6f74c48
commit
946a07e8a8
4 changed files with 193 additions and 3 deletions
mysql-test/suite/rpl
sql
30
mysql-test/suite/rpl/r/circular_serverid0.result
Normal file
30
mysql-test/suite/rpl/r/circular_serverid0.result
Normal file
|
@ -0,0 +1,30 @@
|
|||
include/rpl_init.inc [topology=1->2->1]
|
||||
include/rpl_connect.inc [creating M4]
|
||||
include/rpl_connect.inc [creating M2]
|
||||
SET @old_debug= @@global.debug;
|
||||
STOP SLAVE;
|
||||
SET GLOBAL debug_dbug= "+d,dbug.rows_events_to_delay_relay_logging";
|
||||
START SLAVE IO_THREAD;
|
||||
include/wait_for_slave_io_to_start.inc
|
||||
CREATE TABLE t1 (a INT AUTO_INCREMENT PRIMARY KEY, b VARCHAR(30000)) ENGINE=innodb;
|
||||
INSERT INTO `t1` VALUES (null, repeat('a', 1024)), (null, repeat('b', 1024));
|
||||
SET debug_sync='now WAIT_FOR start_sql_thread';
|
||||
START SLAVE SQL_THREAD;
|
||||
include/show_binlog_events.inc
|
||||
Log_name Pos Event_type Server_id End_log_pos Info
|
||||
slave-bin.000001 # Gtid # # GTID #-#-#
|
||||
slave-bin.000001 # Query # # use `test`; CREATE TABLE t1 (a INT AUTO_INCREMENT PRIMARY KEY, b VARCHAR(30000)) ENGINE=innodb
|
||||
SET debug_sync='now SIGNAL go_on_relay_logging';
|
||||
include/show_binlog_events.inc
|
||||
Log_name Pos Event_type Server_id End_log_pos Info
|
||||
slave-bin.000001 # Gtid # # GTID #-#-#
|
||||
slave-bin.000001 # Query # # use `test`; CREATE TABLE t1 (a INT AUTO_INCREMENT PRIMARY KEY, b VARCHAR(30000)) ENGINE=innodb
|
||||
slave-bin.000001 # Gtid # # BEGIN GTID #-#-#
|
||||
slave-bin.000001 # Table_map # # table_id: # (test.t1)
|
||||
slave-bin.000001 # Write_rows_v1 # # table_id: #
|
||||
slave-bin.000001 # Write_rows_v1 # # table_id: # flags: STMT_END_F
|
||||
slave-bin.000001 # Xid # # COMMIT /* XID */
|
||||
drop table t1;
|
||||
SET GLOBAL debug_dbug= @old_debug;
|
||||
SET debug_sync='RESET';
|
||||
include/rpl_end.inc
|
30
mysql-test/suite/rpl/t/circular_serverid0.cnf
Normal file
30
mysql-test/suite/rpl/t/circular_serverid0.cnf
Normal file
|
@ -0,0 +1,30 @@
|
|||
!include ../my.cnf
|
||||
|
||||
[mysqld.1]
|
||||
gtid-domain-id=4
|
||||
server-id=4
|
||||
#
|
||||
log-slave-updates
|
||||
slave-parallel-threads=0
|
||||
gtid-strict-mode=1
|
||||
gtid-ignore-duplicates=1
|
||||
|
||||
#
|
||||
# Max-size row events to minimum with the idea to create
|
||||
# a number of Rows_log_event per Query.
|
||||
#
|
||||
binlog-row-event-max-size=1024
|
||||
|
||||
[mysqld.2]
|
||||
gtid-domain-id=2
|
||||
server-id=2
|
||||
#
|
||||
log-slave-updates
|
||||
slave-parallel-threads=0
|
||||
gtid-strict-mode=1
|
||||
gtid-ignore-duplicates=1
|
||||
binlog-row-event-max-size=1024
|
||||
# The slave will be initialized with a @@global.dbug-var value
|
||||
skip-slave-start=1
|
||||
|
||||
|
104
mysql-test/suite/rpl/t/circular_serverid0.test
Normal file
104
mysql-test/suite/rpl/t/circular_serverid0.test
Normal file
|
@ -0,0 +1,104 @@
|
|||
#
|
||||
# Testing chain/circular replication scenario of MDEV-9670
|
||||
# The effect of the bug was that we got a commit with a GTID with server_id
|
||||
#
|
||||
|
||||
--source include/have_binlog_format_row.inc
|
||||
--source include/have_innodb.inc
|
||||
--source include/have_debug.inc
|
||||
--source include/have_debug_sync.inc
|
||||
|
||||
--let $rpl_topology= 1->2->1
|
||||
--source include/rpl_init.inc
|
||||
|
||||
--let $rpl_connection_name= M4
|
||||
--let $rpl_server_number= 1
|
||||
--source include/rpl_connect.inc
|
||||
|
||||
--let $rpl_connection_name= M2
|
||||
--let $rpl_server_number= 2
|
||||
--source include/rpl_connect.inc
|
||||
|
||||
# The parameter reflects binlog-row-event-max-size @cnf.
|
||||
--let $row_size=1024
|
||||
|
||||
SET @old_debug= @@global.debug;
|
||||
|
||||
--connection M2
|
||||
STOP SLAVE;
|
||||
SET GLOBAL debug_dbug= "+d,dbug.rows_events_to_delay_relay_logging";
|
||||
START SLAVE IO_THREAD;
|
||||
--source include/wait_for_slave_io_to_start.inc
|
||||
|
||||
--connection M2
|
||||
# This query also creates a Gtid event group whose Gtid will remain in
|
||||
# ignored status for too long causing a following group split.
|
||||
|
||||
CREATE TABLE t1 (a INT AUTO_INCREMENT PRIMARY KEY, b VARCHAR(30000)) ENGINE=innodb;
|
||||
--sync_slave_with_master M4
|
||||
|
||||
# This INSERT will be logged as two Write_log events which the buggy
|
||||
# slave applier would split.
|
||||
|
||||
--connection M4
|
||||
eval INSERT INTO `t1` VALUES (null, repeat('a', $row_size)), (null, repeat('b', $row_size));
|
||||
|
||||
# START M2 IO thread and wait for its signal to follow with the SQL
|
||||
# thread start. At this moment the SQL thread shall be having 2 and
|
||||
# "half" groups to execute. The "hafl" one would be committed by the
|
||||
# buggy applier after which the IO is released to queue the rest of
|
||||
# the 3rd group which the SQL thread commits separately to complete
|
||||
# the split.
|
||||
|
||||
--connection M2
|
||||
|
||||
# wait for IO signal to start the SQL thread. IO will be hanging upon that.
|
||||
SET debug_sync='now WAIT_FOR start_sql_thread';
|
||||
|
||||
# Now the slave server has relay log whose last group is incomplete.
|
||||
# An unfixed slave server would go to "insert" a "fake"
|
||||
# Gtid_list_log_event event which actually would commit the incomplete
|
||||
# group. However before to actual commit do_apply_event() hits some assert.
|
||||
# In the fixed server the fake Gtid_list_log_event is *not* inserted
|
||||
# in the middle of a group.
|
||||
START SLAVE SQL_THREAD;
|
||||
|
||||
# Sleep for a little time to give SQL thread a chance to commit while
|
||||
# the IO thread is hanging (see
|
||||
# DBUG_EXECUTE_IF("dbug.rows_events_to_delay_relay_logging"...) in
|
||||
# queue_event). Alternatively to reproduce the case when buggy slave
|
||||
# wait for the 1st group commit
|
||||
|
||||
#--let $count= 1
|
||||
#--let $table= t1
|
||||
#--source include/wait_until_rows_count.inc
|
||||
|
||||
--sleep 2
|
||||
|
||||
# Demonstrate either no split group in the correct slave or the 1nd
|
||||
# group in the buggy one
|
||||
--source include/show_binlog_events.inc
|
||||
|
||||
# Release the IO thread
|
||||
SET debug_sync='now SIGNAL go_on_relay_logging';
|
||||
|
||||
# Sync servers
|
||||
--sync_slave_with_master M4
|
||||
--connection M4
|
||||
--sync_slave_with_master M2
|
||||
--connection M2
|
||||
|
||||
# Demonstrate replication goes correctly not to create any split, or
|
||||
# the 2nd group in the buggy slave
|
||||
--source include/show_binlog_events.inc
|
||||
|
||||
#
|
||||
# Cleanup
|
||||
#
|
||||
--connection M4
|
||||
drop table t1;
|
||||
|
||||
--connection M2
|
||||
SET GLOBAL debug_dbug= @old_debug;
|
||||
SET debug_sync='RESET';
|
||||
--source include/rpl_end.inc
|
32
sql/slave.cc
32
sql/slave.cc
|
@ -2363,6 +2363,7 @@ static void write_ignored_events_info_to_relay_log(THD *thd, Master_info *mi)
|
|||
}
|
||||
if (rli->ign_gtids.count())
|
||||
{
|
||||
DBUG_ASSERT(!rli->is_in_group()); // Ensure no active transaction
|
||||
glev= new Gtid_list_log_event(&rli->ign_gtids,
|
||||
Gtid_list_log_event::FLAG_IGN_GTIDS);
|
||||
rli->ign_gtids.reset();
|
||||
|
@ -5317,7 +5318,9 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
|
|||
bool gtid_skip_enqueue= false;
|
||||
bool got_gtid_event= false;
|
||||
rpl_gtid event_gtid;
|
||||
|
||||
#ifndef DBUG_OFF
|
||||
static uint dbug_rows_event_count= 0;
|
||||
#endif
|
||||
/*
|
||||
FD_q must have been prepared for the first R_a event
|
||||
inside get_master_version_and_clock()
|
||||
|
@ -5384,6 +5387,26 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
|
|||
(uchar)buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT /* a way to escape */)
|
||||
DBUG_RETURN(queue_old_event(mi,buf,event_len));
|
||||
|
||||
#ifdef ENABLED_DEBUG_SYNC
|
||||
/*
|
||||
A (+d,dbug.rows_events_to_delay_relay_logging)-test is supposed to
|
||||
create a few Write_log_events and after receiving the 1st of them
|
||||
the IO thread signals to launch the SQL thread, and sets itself to
|
||||
wait for a release signal.
|
||||
*/
|
||||
DBUG_EXECUTE_IF("dbug.rows_events_to_delay_relay_logging",
|
||||
if ((buf[EVENT_TYPE_OFFSET] == WRITE_ROWS_EVENT_V1 ||
|
||||
buf[EVENT_TYPE_OFFSET] == WRITE_ROWS_EVENT) &&
|
||||
++dbug_rows_event_count == 2)
|
||||
{
|
||||
const char act[]=
|
||||
"now SIGNAL start_sql_thread "
|
||||
"WAIT_FOR go_on_relay_logging";
|
||||
DBUG_ASSERT(debug_sync_service);
|
||||
DBUG_ASSERT(!debug_sync_set_action(current_thd,
|
||||
STRING_WITH_LEN(act)));
|
||||
};);
|
||||
#endif
|
||||
mysql_mutex_lock(&mi->data_lock);
|
||||
|
||||
switch ((uchar)buf[EVENT_TYPE_OFFSET]) {
|
||||
|
@ -6588,9 +6611,12 @@ static Log_event* next_event(rpl_group_info *rgi, ulonglong *event_size)
|
|||
DBUG_RETURN(ev);
|
||||
}
|
||||
|
||||
if (rli->ign_gtids.count())
|
||||
if (rli->ign_gtids.count() && !rli->is_in_group())
|
||||
{
|
||||
/* We generate and return a Gtid_list, to update gtid_slave_pos. */
|
||||
/*
|
||||
We generate and return a Gtid_list, to update gtid_slave_pos,
|
||||
unless being in the middle of a group.
|
||||
*/
|
||||
DBUG_PRINT("info",("seeing ignored end gtids"));
|
||||
ev= new Gtid_list_log_event(&rli->ign_gtids,
|
||||
Gtid_list_log_event::FLAG_IGN_GTIDS);
|
||||
|
|
Loading…
Add table
Reference in a new issue