mirror of
https://github.com/MariaDB/server.git
synced 2025-01-16 03:52:35 +01:00
MDEV-27760 event may non stop replicate in circular semisync setup
MDEV-21117 had to relax own events acceptance condition for a case when a former semisync master server recovers after crash as the semisync slave. That however admitted a possibility for endless event "orbiting" in the non-strict slave gtid mode of semisync circular setup. The same server-id event termination is restored now for the non-strict gtid mode to follow regular rules (that is it's ignored unless @@global.replicate_same_server_id allows it in). To address MDEV-21117 recovery agenda, in the strict gtid mode and the transaction's gtid ordered strictly greater than the current slave gtid state, the same server-id transaction is accepted. The gtid strict mode is safe to accept transactions even if the slave state were not set correct by the user, e.g at the former master. An added test shows a typical out-of-order error at execution so no data corruption is guaranteed in such a case.
This commit is contained in:
parent
35725df6e2
commit
5ccd845d51
4 changed files with 217 additions and 9 deletions
76
mysql-test/suite/rpl/r/rpl_circular_semi_sync.result
Normal file
76
mysql-test/suite/rpl/r/rpl_circular_semi_sync.result
Normal file
|
@ -0,0 +1,76 @@
|
|||
include/master-slave.inc
|
||||
[connection master]
|
||||
# Master server_1 and Slave server_2 initialiation ...
|
||||
connection server_2;
|
||||
include/stop_slave.inc
|
||||
connection server_1;
|
||||
set @@sql_log_bin = off;
|
||||
call mtr.add_suppression("Slave: An attempt was made to binlog GTID 10-1-1 which would create an out-of-order sequence number with existing GTID");
|
||||
set @@sql_log_bin = on;
|
||||
RESET MASTER;
|
||||
set @@session.gtid_domain_id=10;
|
||||
set @@global.rpl_semi_sync_master_enabled = 1;
|
||||
set @@global.rpl_semi_sync_master_wait_point=AFTER_SYNC;
|
||||
connection server_2;
|
||||
RESET MASTER;
|
||||
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
|
||||
set @@session.gtid_domain_id=20;
|
||||
set @@global.rpl_semi_sync_slave_enabled = 1;
|
||||
# a 1948 warning is expected
|
||||
set @@global.gtid_slave_pos = "";
|
||||
Warnings:
|
||||
Warning 1948 Specified value for @@gtid_slave_pos contains no value for replication domain 0. This conflicts with the binary log which contains GTID 0-2-1. If MASTER_GTID_POS=CURRENT_POS is used, the binlog position will override the new value of @@gtid_slave_pos
|
||||
CHANGE MASTER TO master_use_gtid= slave_pos;
|
||||
include/start_slave.inc
|
||||
# ... server_1 -> server_2 is set up
|
||||
connection server_1;
|
||||
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=Innodb;
|
||||
INSERT INTO t1 VALUES (1);
|
||||
connection server_2;
|
||||
# Circular configuration server_2 -> server_1 initialiation ...
|
||||
connection server_1;
|
||||
# A. ... first when server_1 is in gtid strict mode...
|
||||
set @@global.gtid_strict_mode = true;
|
||||
set @@global.rpl_semi_sync_slave_enabled = 1;
|
||||
CHANGE MASTER TO master_host='127.0.0.1', master_port=$SERVER_MYPORT_2, master_user='root', master_use_gtid=SLAVE_POS;
|
||||
# ... only for it to fail 'cos if its inconsistent (empty) slave's gtid state:
|
||||
SELECT @@global.gtid_slave_pos;
|
||||
@@global.gtid_slave_pos
|
||||
|
||||
START SLAVE;
|
||||
include/wait_for_slave_sql_error.inc [errno=1950]
|
||||
# B. ... Resume on the circular setup with the server_id now in the non-strict mode ...
|
||||
set @@global.gtid_strict_mode = false;
|
||||
include/start_slave.inc
|
||||
# ... to have succeeded.
|
||||
connection server_2;
|
||||
INSERT INTO t1 VALUES (2);
|
||||
connection server_1;
|
||||
INSERT INTO t1 VALUES (3);
|
||||
connection server_2;
|
||||
# The gtid states on server_2 must be equal to ...
|
||||
SHOW VARIABLES LIKE 'gtid_binlog_pos';
|
||||
Variable_name Value
|
||||
gtid_binlog_pos 0-2-1,10-1-3,20-2-1
|
||||
SHOW VARIABLES LIKE 'gtid_slave_pos';
|
||||
Variable_name Value
|
||||
gtid_slave_pos 0-2-1,10-1-3,20-2-1
|
||||
connection server_1;
|
||||
# ... the gtid states on server_1
|
||||
SHOW VARIABLES LIKE 'gtid_slave_pos';
|
||||
Variable_name Value
|
||||
gtid_slave_pos 0-2-1,10-1-3,20-2-1
|
||||
SHOW VARIABLES LIKE 'gtid_binlog_pos';
|
||||
Variable_name Value
|
||||
gtid_binlog_pos 0-2-1,10-1-3,20-2-1
|
||||
# Cleanup
|
||||
connection server_1;
|
||||
include/stop_slave.inc
|
||||
set @@global.rpl_semi_sync_master_enabled = default;
|
||||
set @@global.rpl_semi_sync_slave_enabled = default;
|
||||
set @@global.rpl_semi_sync_master_wait_point=default;
|
||||
DROP TABLE t1;
|
||||
connection server_2;
|
||||
set @@global.rpl_semi_sync_master_enabled = default;
|
||||
set @@global.rpl_semi_sync_slave_enabled = default;
|
||||
include/rpl_end.inc
|
11
mysql-test/suite/rpl/t/rpl_circular_semi_sync.cnf
Normal file
11
mysql-test/suite/rpl/t/rpl_circular_semi_sync.cnf
Normal file
|
@ -0,0 +1,11 @@
|
|||
!include suite/rpl/rpl_1slave_base.cnf
|
||||
!include include/default_client.cnf
|
||||
|
||||
|
||||
[mysqld.1]
|
||||
log-slave-updates
|
||||
sync-binlog=1
|
||||
|
||||
[mysqld.2]
|
||||
log-slave-updates
|
||||
sync-binlog=1
|
115
mysql-test/suite/rpl/t/rpl_circular_semi_sync.test
Normal file
115
mysql-test/suite/rpl/t/rpl_circular_semi_sync.test
Normal file
|
@ -0,0 +1,115 @@
|
|||
# ==== References ====
|
||||
#
|
||||
# MDEV-27760 event may non stop replicate in circular semisync setup
|
||||
#
|
||||
--source include/have_innodb.inc
|
||||
--source include/have_binlog_format_row.inc
|
||||
--source include/master-slave.inc
|
||||
|
||||
# The following tests prove
|
||||
# A. out-of-order gtid error when the stict gtid mode semisync slave
|
||||
# receives the same server-id gtid event inconsistent
|
||||
# (rpl_semi_sync_fail_over tests the consistent case) with its state;
|
||||
# B. in the non-strict mode the same server-id events remains ignored
|
||||
# by default as usual.
|
||||
#
|
||||
--echo # Master server_1 and Slave server_2 initialiation ...
|
||||
--connection server_2
|
||||
--source include/stop_slave.inc
|
||||
|
||||
# Initial master
|
||||
--connection server_1
|
||||
set @@sql_log_bin = off;
|
||||
call mtr.add_suppression("Slave: An attempt was made to binlog GTID 10-1-1 which would create an out-of-order sequence number with existing GTID");
|
||||
set @@sql_log_bin = on;
|
||||
|
||||
RESET MASTER;
|
||||
|
||||
set @@session.gtid_domain_id=10;
|
||||
|
||||
set @@global.rpl_semi_sync_master_enabled = 1;
|
||||
set @@global.rpl_semi_sync_master_wait_point=AFTER_SYNC;
|
||||
|
||||
--connection server_2
|
||||
RESET MASTER;
|
||||
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
|
||||
|
||||
set @@session.gtid_domain_id=20;
|
||||
|
||||
set @@global.rpl_semi_sync_slave_enabled = 1;
|
||||
--echo # a 1948 warning is expected
|
||||
set @@global.gtid_slave_pos = "";
|
||||
CHANGE MASTER TO master_use_gtid= slave_pos;
|
||||
--source include/start_slave.inc
|
||||
--echo # ... server_1 -> server_2 is set up
|
||||
|
||||
--connection server_1
|
||||
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=Innodb;
|
||||
INSERT INTO t1 VALUES (1);
|
||||
--save_master_pos
|
||||
|
||||
--connection server_2
|
||||
--sync_with_master
|
||||
|
||||
--echo # Circular configuration server_2 -> server_1 initialiation ...
|
||||
--connection server_1
|
||||
--echo # A. ... first when server_1 is in gtid strict mode...
|
||||
set @@global.gtid_strict_mode = true;
|
||||
set @@global.rpl_semi_sync_slave_enabled = 1;
|
||||
|
||||
evalp CHANGE MASTER TO master_host='127.0.0.1', master_port=$SERVER_MYPORT_2, master_user='root', master_use_gtid=SLAVE_POS;
|
||||
|
||||
--echo # ... only for it to fail 'cos if its inconsistent (empty) slave's gtid state:
|
||||
SELECT @@global.gtid_slave_pos;
|
||||
START SLAVE;
|
||||
# ER_GTID_STRICT_OUT_OF_ORDER
|
||||
--let $slave_sql_errno = 1950
|
||||
--source include/wait_for_slave_sql_error.inc
|
||||
|
||||
--echo # B. ... Resume on the circular setup with the server_id now in the non-strict mode ...
|
||||
set @@global.gtid_strict_mode = false;
|
||||
--source include/start_slave.inc
|
||||
|
||||
--echo # ... to have succeeded.
|
||||
|
||||
--connection server_2
|
||||
INSERT INTO t1 VALUES (2);
|
||||
--save_master_pos
|
||||
|
||||
--connection server_1
|
||||
--sync_with_master
|
||||
|
||||
INSERT INTO t1 VALUES (3);
|
||||
--save_master_pos
|
||||
|
||||
--connection server_2
|
||||
--sync_with_master
|
||||
--echo # The gtid states on server_2 must be equal to ...
|
||||
--let $wait_condition=select @@gtid_slave_pos=@@gtid_binlog_pos
|
||||
--source include/wait_condition.inc
|
||||
SHOW VARIABLES LIKE 'gtid_binlog_pos';
|
||||
SHOW VARIABLES LIKE 'gtid_slave_pos';
|
||||
|
||||
--connection server_1
|
||||
--echo # ... the gtid states on server_1
|
||||
--let $wait_condition=select @@gtid_slave_pos=@@gtid_binlog_pos
|
||||
--source include/wait_condition.inc
|
||||
SHOW VARIABLES LIKE 'gtid_slave_pos';
|
||||
SHOW VARIABLES LIKE 'gtid_binlog_pos';
|
||||
|
||||
--echo # Cleanup
|
||||
--connection server_1
|
||||
--source include/stop_slave.inc
|
||||
set @@global.rpl_semi_sync_master_enabled = default;
|
||||
set @@global.rpl_semi_sync_slave_enabled = default;
|
||||
set @@global.rpl_semi_sync_master_wait_point=default;
|
||||
|
||||
DROP TABLE t1;
|
||||
--save_master_pos
|
||||
|
||||
--connection server_2
|
||||
--sync_with_master
|
||||
set @@global.rpl_semi_sync_master_enabled = default;
|
||||
set @@global.rpl_semi_sync_slave_enabled = default;
|
||||
|
||||
--source include/rpl_end.inc
|
24
sql/slave.cc
24
sql/slave.cc
|
@ -6186,13 +6186,13 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len)
|
|||
bool is_rows_event= false;
|
||||
/*
|
||||
The flag has replicate_same_server_id semantics and is raised to accept
|
||||
a same-server-id event on the semisync slave, for both the gtid and legacy
|
||||
connection modes.
|
||||
Such events can appear as result of this server recovery so the event
|
||||
was created there and replicated elsewhere right before the crash. At recovery
|
||||
it could be evicted from the server's binlog.
|
||||
*/
|
||||
bool do_accept_own_server_id= false;
|
||||
a same-server-id event group by the gtid strict mode semisync slave.
|
||||
Own server-id events can appear as result of this server crash-recovery:
|
||||
the transaction was created on this server then being master, got replicated
|
||||
elsewhere right before the crash before commit;
|
||||
finally at recovery the transaction gets evicted from the server's binlog.
|
||||
*/
|
||||
bool do_accept_own_server_id;
|
||||
/*
|
||||
FD_q must have been prepared for the first R_a event
|
||||
inside get_master_version_and_clock()
|
||||
|
@ -6281,6 +6281,8 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len)
|
|||
dbug_rows_event_count = 0;
|
||||
};);
|
||||
#endif
|
||||
s_id= uint4korr(buf + SERVER_ID_OFFSET);
|
||||
|
||||
mysql_mutex_lock(&mi->data_lock);
|
||||
|
||||
switch (buf[EVENT_TYPE_OFFSET]) {
|
||||
|
@ -6722,6 +6724,7 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len)
|
|||
|
||||
++mi->events_queued_since_last_gtid;
|
||||
inc_pos= event_len;
|
||||
|
||||
}
|
||||
break;
|
||||
/*
|
||||
|
@ -6864,6 +6867,10 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len)
|
|||
break;
|
||||
}
|
||||
|
||||
do_accept_own_server_id= (s_id == global_system_variables.server_id
|
||||
&& rpl_semi_sync_slave_enabled && opt_gtid_strict_mode
|
||||
&& mi->using_gtid != Master_info::USE_GTID_NO);
|
||||
|
||||
/*
|
||||
Integrity of Rows- event group check.
|
||||
A sequence of Rows- events must end with STMT_END_F flagged one.
|
||||
|
@ -6909,7 +6916,6 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len)
|
|||
*/
|
||||
|
||||
mysql_mutex_lock(log_lock);
|
||||
s_id= uint4korr(buf + SERVER_ID_OFFSET);
|
||||
/*
|
||||
Write the event to the relay log, unless we reconnected in the middle
|
||||
of an event group and now need to skip the initial part of the group that
|
||||
|
@ -6955,7 +6961,7 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len)
|
|||
else
|
||||
if ((s_id == global_system_variables.server_id &&
|
||||
!(mi->rli.replicate_same_server_id ||
|
||||
(do_accept_own_server_id= rpl_semi_sync_slave_enabled))) ||
|
||||
do_accept_own_server_id)) ||
|
||||
event_that_should_be_ignored(buf) ||
|
||||
/*
|
||||
the following conjunction deals with IGNORE_SERVER_IDS, if set
|
||||
|
|
Loading…
Reference in a new issue