MDEV-27760 event may non stop replicate in circular semisync setup

MDEV-21117 had to relax own events acceptance condition for a case
when a former semisync master server recovers after crash as the
semisync slave. That however admitted a possibility for endless event
"orbiting" in the non-strict slave gtid mode of semisync circular
setup.

The same server-id event termination is restored now for
the non-strict gtid mode to follow regular rules (that is it's ignored
unless @@global.replicate_same_server_id allows it in).

To address MDEV-21117 recovery agenda,
in the strict gtid mode and the transaction's gtid ordered strictly
greater than the current slave gtid state, the same server-id
transaction is accepted.

The gtid strict mode is safe to accept transactions even if
the slave state were not set correct by the user, e.g
at the former master.
An added test shows a typical out-of-order error at execution so
no data corruption is guaranteed in such a case.
This commit is contained in:
Andrei 2022-02-10 19:17:06 +02:00
parent 35725df6e2
commit 5ccd845d51
4 changed files with 217 additions and 9 deletions

View file

@ -0,0 +1,76 @@
include/master-slave.inc
[connection master]
# Master server_1 and Slave server_2 initialiation ...
connection server_2;
include/stop_slave.inc
connection server_1;
set @@sql_log_bin = off;
call mtr.add_suppression("Slave: An attempt was made to binlog GTID 10-1-1 which would create an out-of-order sequence number with existing GTID");
set @@sql_log_bin = on;
RESET MASTER;
set @@session.gtid_domain_id=10;
set @@global.rpl_semi_sync_master_enabled = 1;
set @@global.rpl_semi_sync_master_wait_point=AFTER_SYNC;
connection server_2;
RESET MASTER;
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
set @@session.gtid_domain_id=20;
set @@global.rpl_semi_sync_slave_enabled = 1;
# a 1948 warning is expected
set @@global.gtid_slave_pos = "";
Warnings:
Warning 1948 Specified value for @@gtid_slave_pos contains no value for replication domain 0. This conflicts with the binary log which contains GTID 0-2-1. If MASTER_GTID_POS=CURRENT_POS is used, the binlog position will override the new value of @@gtid_slave_pos
CHANGE MASTER TO master_use_gtid= slave_pos;
include/start_slave.inc
# ... server_1 -> server_2 is set up
connection server_1;
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=Innodb;
INSERT INTO t1 VALUES (1);
connection server_2;
# Circular configuration server_2 -> server_1 initialiation ...
connection server_1;
# A. ... first when server_1 is in gtid strict mode...
set @@global.gtid_strict_mode = true;
set @@global.rpl_semi_sync_slave_enabled = 1;
CHANGE MASTER TO master_host='127.0.0.1', master_port=$SERVER_MYPORT_2, master_user='root', master_use_gtid=SLAVE_POS;
# ... only for it to fail 'cos if its inconsistent (empty) slave's gtid state:
SELECT @@global.gtid_slave_pos;
@@global.gtid_slave_pos
START SLAVE;
include/wait_for_slave_sql_error.inc [errno=1950]
# B. ... Resume on the circular setup with the server_id now in the non-strict mode ...
set @@global.gtid_strict_mode = false;
include/start_slave.inc
# ... to have succeeded.
connection server_2;
INSERT INTO t1 VALUES (2);
connection server_1;
INSERT INTO t1 VALUES (3);
connection server_2;
# The gtid states on server_2 must be equal to ...
SHOW VARIABLES LIKE 'gtid_binlog_pos';
Variable_name Value
gtid_binlog_pos 0-2-1,10-1-3,20-2-1
SHOW VARIABLES LIKE 'gtid_slave_pos';
Variable_name Value
gtid_slave_pos 0-2-1,10-1-3,20-2-1
connection server_1;
# ... the gtid states on server_1
SHOW VARIABLES LIKE 'gtid_slave_pos';
Variable_name Value
gtid_slave_pos 0-2-1,10-1-3,20-2-1
SHOW VARIABLES LIKE 'gtid_binlog_pos';
Variable_name Value
gtid_binlog_pos 0-2-1,10-1-3,20-2-1
# Cleanup
connection server_1;
include/stop_slave.inc
set @@global.rpl_semi_sync_master_enabled = default;
set @@global.rpl_semi_sync_slave_enabled = default;
set @@global.rpl_semi_sync_master_wait_point=default;
DROP TABLE t1;
connection server_2;
set @@global.rpl_semi_sync_master_enabled = default;
set @@global.rpl_semi_sync_slave_enabled = default;
include/rpl_end.inc

View file

@ -0,0 +1,11 @@
!include suite/rpl/rpl_1slave_base.cnf
!include include/default_client.cnf
[mysqld.1]
log-slave-updates
sync-binlog=1
[mysqld.2]
log-slave-updates
sync-binlog=1

View file

@ -0,0 +1,115 @@
# ==== References ====
#
# MDEV-27760 event may non stop replicate in circular semisync setup
#
--source include/have_innodb.inc
--source include/have_binlog_format_row.inc
--source include/master-slave.inc
# The following tests prove
# A. out-of-order gtid error when the stict gtid mode semisync slave
# receives the same server-id gtid event inconsistent
# (rpl_semi_sync_fail_over tests the consistent case) with its state;
# B. in the non-strict mode the same server-id events remains ignored
# by default as usual.
#
--echo # Master server_1 and Slave server_2 initialiation ...
--connection server_2
--source include/stop_slave.inc
# Initial master
--connection server_1
set @@sql_log_bin = off;
call mtr.add_suppression("Slave: An attempt was made to binlog GTID 10-1-1 which would create an out-of-order sequence number with existing GTID");
set @@sql_log_bin = on;
RESET MASTER;
set @@session.gtid_domain_id=10;
set @@global.rpl_semi_sync_master_enabled = 1;
set @@global.rpl_semi_sync_master_wait_point=AFTER_SYNC;
--connection server_2
RESET MASTER;
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
set @@session.gtid_domain_id=20;
set @@global.rpl_semi_sync_slave_enabled = 1;
--echo # a 1948 warning is expected
set @@global.gtid_slave_pos = "";
CHANGE MASTER TO master_use_gtid= slave_pos;
--source include/start_slave.inc
--echo # ... server_1 -> server_2 is set up
--connection server_1
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=Innodb;
INSERT INTO t1 VALUES (1);
--save_master_pos
--connection server_2
--sync_with_master
--echo # Circular configuration server_2 -> server_1 initialiation ...
--connection server_1
--echo # A. ... first when server_1 is in gtid strict mode...
set @@global.gtid_strict_mode = true;
set @@global.rpl_semi_sync_slave_enabled = 1;
evalp CHANGE MASTER TO master_host='127.0.0.1', master_port=$SERVER_MYPORT_2, master_user='root', master_use_gtid=SLAVE_POS;
--echo # ... only for it to fail 'cos if its inconsistent (empty) slave's gtid state:
SELECT @@global.gtid_slave_pos;
START SLAVE;
# ER_GTID_STRICT_OUT_OF_ORDER
--let $slave_sql_errno = 1950
--source include/wait_for_slave_sql_error.inc
--echo # B. ... Resume on the circular setup with the server_id now in the non-strict mode ...
set @@global.gtid_strict_mode = false;
--source include/start_slave.inc
--echo # ... to have succeeded.
--connection server_2
INSERT INTO t1 VALUES (2);
--save_master_pos
--connection server_1
--sync_with_master
INSERT INTO t1 VALUES (3);
--save_master_pos
--connection server_2
--sync_with_master
--echo # The gtid states on server_2 must be equal to ...
--let $wait_condition=select @@gtid_slave_pos=@@gtid_binlog_pos
--source include/wait_condition.inc
SHOW VARIABLES LIKE 'gtid_binlog_pos';
SHOW VARIABLES LIKE 'gtid_slave_pos';
--connection server_1
--echo # ... the gtid states on server_1
--let $wait_condition=select @@gtid_slave_pos=@@gtid_binlog_pos
--source include/wait_condition.inc
SHOW VARIABLES LIKE 'gtid_slave_pos';
SHOW VARIABLES LIKE 'gtid_binlog_pos';
--echo # Cleanup
--connection server_1
--source include/stop_slave.inc
set @@global.rpl_semi_sync_master_enabled = default;
set @@global.rpl_semi_sync_slave_enabled = default;
set @@global.rpl_semi_sync_master_wait_point=default;
DROP TABLE t1;
--save_master_pos
--connection server_2
--sync_with_master
set @@global.rpl_semi_sync_master_enabled = default;
set @@global.rpl_semi_sync_slave_enabled = default;
--source include/rpl_end.inc

View file

@ -6186,13 +6186,13 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len)
bool is_rows_event= false;
/*
The flag has replicate_same_server_id semantics and is raised to accept
a same-server-id event on the semisync slave, for both the gtid and legacy
connection modes.
Such events can appear as result of this server recovery so the event
was created there and replicated elsewhere right before the crash. At recovery
it could be evicted from the server's binlog.
*/
bool do_accept_own_server_id= false;
a same-server-id event group by the gtid strict mode semisync slave.
Own server-id events can appear as result of this server crash-recovery:
the transaction was created on this server then being master, got replicated
elsewhere right before the crash before commit;
finally at recovery the transaction gets evicted from the server's binlog.
*/
bool do_accept_own_server_id;
/*
FD_q must have been prepared for the first R_a event
inside get_master_version_and_clock()
@ -6281,6 +6281,8 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len)
dbug_rows_event_count = 0;
};);
#endif
s_id= uint4korr(buf + SERVER_ID_OFFSET);
mysql_mutex_lock(&mi->data_lock);
switch (buf[EVENT_TYPE_OFFSET]) {
@ -6722,6 +6724,7 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len)
++mi->events_queued_since_last_gtid;
inc_pos= event_len;
}
break;
/*
@ -6864,6 +6867,10 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len)
break;
}
do_accept_own_server_id= (s_id == global_system_variables.server_id
&& rpl_semi_sync_slave_enabled && opt_gtid_strict_mode
&& mi->using_gtid != Master_info::USE_GTID_NO);
/*
Integrity of Rows- event group check.
A sequence of Rows- events must end with STMT_END_F flagged one.
@ -6909,7 +6916,6 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len)
*/
mysql_mutex_lock(log_lock);
s_id= uint4korr(buf + SERVER_ID_OFFSET);
/*
Write the event to the relay log, unless we reconnected in the middle
of an event group and now need to skip the initial part of the group that
@ -6955,7 +6961,7 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len)
else
if ((s_id == global_system_variables.server_id &&
!(mi->rli.replicate_same_server_id ||
(do_accept_own_server_id= rpl_semi_sync_slave_enabled))) ||
do_accept_own_server_id)) ||
event_that_should_be_ignored(buf) ||
/*
the following conjunction deals with IGNORE_SERVER_IDS, if set