mirror of
https://github.com/MariaDB/server.git
synced 2025-01-16 12:02:42 +01:00
MDEV-8354: out-of-order error with --gtid-ignore-duplicates and row-based replication
The --gtid-ignore-duplicates option was not working correctly with row-based replication. When a row event was completed, but before committing, there was a small window where another multi-source SQL thread could wrongly try to re-execute the same transaction, without properly ignoring the duplicate GTID. This would lead to duplicate key error or out-of-order GTID error or similar. Thanks to Matt Neth for reporting this and giving an easy way to reproduce the issue.
This commit is contained in:
parent
93c039dd3c
commit
b89de2b2ce
4 changed files with 263 additions and 7 deletions
|
@ -3,21 +3,25 @@
|
|||
[mysqld.1]
|
||||
log-slave-updates
|
||||
loose-innodb
|
||||
binlog-format=mixed
|
||||
|
||||
[mysqld.2]
|
||||
log-slave-updates
|
||||
loose-innodb
|
||||
binlog-format=mixed
|
||||
|
||||
[mysqld.3]
|
||||
log-bin=server3-bin
|
||||
log-slave-updates
|
||||
loose-innodb
|
||||
binlog-format=mixed
|
||||
|
||||
[mysqld.4]
|
||||
server-id=4
|
||||
log-bin=server4-bin
|
||||
log-slave-updates
|
||||
loose-innodb
|
||||
binlog-format=mixed
|
||||
|
||||
[ENV]
|
||||
SERVER_MYPORT_4= @mysqld.4.port
|
||||
|
|
|
@ -242,6 +242,145 @@ a
|
|||
24
|
||||
25
|
||||
26
|
||||
*** MDEV-8354: out-of-order error with --gtid-ignore-duplicates and row-based replication ***
|
||||
SET default_master_connection = "b2a";
|
||||
STOP SLAVE;
|
||||
include/wait_for_slave_to_stop.inc
|
||||
SET default_master_connection = "c2a";
|
||||
STOP SLAVE;
|
||||
include/wait_for_slave_to_stop.inc
|
||||
SET default_master_connection = "c2b";
|
||||
STOP SLAVE;
|
||||
include/wait_for_slave_to_stop.inc
|
||||
SET default_master_connection = "b2c";
|
||||
STOP SLAVE;
|
||||
include/wait_for_slave_to_stop.inc
|
||||
SET @old_slave_mode=@@GLOBAL.slave_exec_mode;
|
||||
SET GLOBAL slave_exec_mode=IDEMPOTENT;
|
||||
SET @old_strict=@@GLOBAL.gtid_strict_mode;
|
||||
SET GLOBAL gtid_strict_mode=1;
|
||||
SET @old_dbug=@@GLOBAL.debug_dbug;
|
||||
SET GLOBAL debug_dbug="+d,inject_sleep_gtid_100_x_x";
|
||||
SET @old_domain=@@SESSION.gtid_domain_id;
|
||||
SET @old_format=@@SESSION.binlog_format;
|
||||
SET SESSION gtid_domain_id=100;
|
||||
SET SESSION binlog_format='row';
|
||||
INSERT INTO t1 VALUES (30);
|
||||
INSERT INTO t1 VALUES (31);
|
||||
INSERT INTO t1 VALUES (32);
|
||||
INSERT INTO t1 VALUES (33);
|
||||
INSERT INTO t1 VALUES (34);
|
||||
INSERT INTO t1 VALUES (35);
|
||||
INSERT INTO t1 VALUES (36);
|
||||
INSERT INTO t1 VALUES (37);
|
||||
INSERT INTO t1 VALUES (38);
|
||||
INSERT INTO t1 VALUES (39);
|
||||
INSERT INTO t1 VALUES (40);
|
||||
INSERT INTO t1 VALUES (41);
|
||||
INSERT INTO t1 VALUES (42);
|
||||
INSERT INTO t1 VALUES (43);
|
||||
INSERT INTO t1 VALUES (44);
|
||||
INSERT INTO t1 VALUES (45);
|
||||
INSERT INTO t1 VALUES (46);
|
||||
INSERT INTO t1 VALUES (47);
|
||||
INSERT INTO t1 VALUES (48);
|
||||
INSERT INTO t1 VALUES (49);
|
||||
SET SESSION gtid_domain_id=@old_domain;
|
||||
SET SESSION binlog_format=@old_format;
|
||||
include/save_master_gtid.inc
|
||||
include/sync_with_master_gtid.inc
|
||||
INSERT INTO t1 VALUES (50);
|
||||
include/save_master_gtid.inc
|
||||
SET default_master_connection = "b2c";
|
||||
START SLAVE;
|
||||
include/wait_for_slave_to_start.inc
|
||||
SELECT MASTER_GTID_WAIT("GTID", 30);
|
||||
MASTER_GTID_WAIT("GTID", 30)
|
||||
0
|
||||
SET default_master_connection = "b2a";
|
||||
START SLAVE;
|
||||
include/wait_for_slave_to_start.inc
|
||||
SET default_master_connection = "c2a";
|
||||
START SLAVE;
|
||||
include/wait_for_slave_to_start.inc
|
||||
include/sync_with_master_gtid.inc
|
||||
SELECT * FROM t1 WHERE a >= 30 ORDER BY a;
|
||||
a
|
||||
30
|
||||
31
|
||||
32
|
||||
33
|
||||
34
|
||||
35
|
||||
36
|
||||
37
|
||||
38
|
||||
39
|
||||
40
|
||||
41
|
||||
42
|
||||
43
|
||||
44
|
||||
45
|
||||
46
|
||||
47
|
||||
48
|
||||
49
|
||||
50
|
||||
SET default_master_connection = "c2b";
|
||||
START SLAVE;
|
||||
include/wait_for_slave_to_start.inc
|
||||
include/sync_with_master_gtid.inc
|
||||
SELECT * FROM t1 WHERE a >= 30 ORDER BY a;
|
||||
a
|
||||
30
|
||||
31
|
||||
32
|
||||
33
|
||||
34
|
||||
35
|
||||
36
|
||||
37
|
||||
38
|
||||
39
|
||||
40
|
||||
41
|
||||
42
|
||||
43
|
||||
44
|
||||
45
|
||||
46
|
||||
47
|
||||
48
|
||||
49
|
||||
50
|
||||
include/sync_with_master_gtid.inc
|
||||
SET GLOBAL debug_dbug=@old_dbug;
|
||||
SELECT * FROM t1 WHERE a >= 30 ORDER BY a;
|
||||
a
|
||||
30
|
||||
31
|
||||
32
|
||||
33
|
||||
34
|
||||
35
|
||||
36
|
||||
37
|
||||
38
|
||||
39
|
||||
40
|
||||
41
|
||||
42
|
||||
43
|
||||
44
|
||||
45
|
||||
46
|
||||
47
|
||||
48
|
||||
49
|
||||
50
|
||||
SET GLOBAL slave_exec_mode=@old_slave_mode;
|
||||
SET GLOBAL gtid_strict_mode=@old_strict;
|
||||
SET GLOBAL gtid_domain_id=0;
|
||||
STOP ALL SLAVES;
|
||||
Warnings:
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
--source include/not_embedded.inc
|
||||
--source include/have_innodb.inc
|
||||
--source include/have_debug.inc
|
||||
|
||||
|
||||
--echo *** Test all-to-all replication with --gtid-ignore-duplicates ***
|
||||
|
||||
|
@ -258,6 +260,112 @@ SELECT * FROM t1 WHERE a >= 20 ORDER BY a;
|
|||
SELECT * FROM t1 WHERE a >= 20 ORDER BY a;
|
||||
|
||||
|
||||
--echo *** MDEV-8354: out-of-order error with --gtid-ignore-duplicates and row-based replication ***
|
||||
|
||||
# Have only A->C A->B initially.
|
||||
--connection server_1
|
||||
SET default_master_connection = "b2a";
|
||||
STOP SLAVE;
|
||||
--source include/wait_for_slave_to_stop.inc
|
||||
SET default_master_connection = "c2a";
|
||||
STOP SLAVE;
|
||||
--source include/wait_for_slave_to_stop.inc
|
||||
|
||||
--connection server_2
|
||||
SET default_master_connection = "c2b";
|
||||
STOP SLAVE;
|
||||
--source include/wait_for_slave_to_stop.inc
|
||||
|
||||
--connection server_3
|
||||
SET default_master_connection = "b2c";
|
||||
STOP SLAVE;
|
||||
--source include/wait_for_slave_to_stop.inc
|
||||
SET @old_slave_mode=@@GLOBAL.slave_exec_mode;
|
||||
SET GLOBAL slave_exec_mode=IDEMPOTENT;
|
||||
SET @old_strict=@@GLOBAL.gtid_strict_mode;
|
||||
SET GLOBAL gtid_strict_mode=1;
|
||||
|
||||
SET @old_dbug=@@GLOBAL.debug_dbug;
|
||||
# This will inject a small sleep that helps trigger the race. I did not manage
|
||||
# to create a non-sleeping version with debug_sync for this; the problem is
|
||||
# that once the bug is fixed, the race becomes impossible, so even with
|
||||
# debug_sync at best we can check that the debug_sync times out. Which is
|
||||
# just another way of adding a sleep.
|
||||
#
|
||||
# The bug was a race at this point where another multi-source connection
|
||||
# could incorrectly re-apply the same GTID, in case of row-based replication.
|
||||
SET GLOBAL debug_dbug="+d,inject_sleep_gtid_100_x_x";
|
||||
|
||||
--connection server_1
|
||||
SET @old_domain=@@SESSION.gtid_domain_id;
|
||||
SET @old_format=@@SESSION.binlog_format;
|
||||
SET SESSION gtid_domain_id=100;
|
||||
SET SESSION binlog_format='row';
|
||||
INSERT INTO t1 VALUES (30);
|
||||
INSERT INTO t1 VALUES (31);
|
||||
INSERT INTO t1 VALUES (32);
|
||||
INSERT INTO t1 VALUES (33);
|
||||
INSERT INTO t1 VALUES (34);
|
||||
INSERT INTO t1 VALUES (35);
|
||||
INSERT INTO t1 VALUES (36);
|
||||
INSERT INTO t1 VALUES (37);
|
||||
INSERT INTO t1 VALUES (38);
|
||||
INSERT INTO t1 VALUES (39);
|
||||
INSERT INTO t1 VALUES (40);
|
||||
INSERT INTO t1 VALUES (41);
|
||||
INSERT INTO t1 VALUES (42);
|
||||
INSERT INTO t1 VALUES (43);
|
||||
INSERT INTO t1 VALUES (44);
|
||||
INSERT INTO t1 VALUES (45);
|
||||
INSERT INTO t1 VALUES (46);
|
||||
INSERT INTO t1 VALUES (47);
|
||||
INSERT INTO t1 VALUES (48);
|
||||
INSERT INTO t1 VALUES (49);
|
||||
SET SESSION gtid_domain_id=@old_domain;
|
||||
SET SESSION binlog_format=@old_format;
|
||||
--source include/save_master_gtid.inc
|
||||
|
||||
--connection server_2
|
||||
--source include/sync_with_master_gtid.inc
|
||||
INSERT INTO t1 VALUES (50);
|
||||
--let $gtid=`SELECT @@last_gtid`
|
||||
--source include/save_master_gtid.inc
|
||||
|
||||
--connection server_3
|
||||
SET default_master_connection = "b2c";
|
||||
START SLAVE;
|
||||
--source include/wait_for_slave_to_start.inc
|
||||
--replace_result $gtid GTID
|
||||
eval SELECT MASTER_GTID_WAIT("$gtid", 30);
|
||||
# The bug occured here, the slave would get an out-of-order binlog error
|
||||
# due to trying to re-apply the 100-x-x transaction.
|
||||
|
||||
# Restart stopped multi-source connections, and sync up.
|
||||
--connection server_1
|
||||
SET default_master_connection = "b2a";
|
||||
START SLAVE;
|
||||
--source include/wait_for_slave_to_start.inc
|
||||
SET default_master_connection = "c2a";
|
||||
START SLAVE;
|
||||
--source include/wait_for_slave_to_start.inc
|
||||
--source include/sync_with_master_gtid.inc
|
||||
SELECT * FROM t1 WHERE a >= 30 ORDER BY a;
|
||||
|
||||
--connection server_2
|
||||
SET default_master_connection = "c2b";
|
||||
START SLAVE;
|
||||
--source include/wait_for_slave_to_start.inc
|
||||
--source include/sync_with_master_gtid.inc
|
||||
SELECT * FROM t1 WHERE a >= 30 ORDER BY a;
|
||||
|
||||
--connection server_3
|
||||
--source include/sync_with_master_gtid.inc
|
||||
SET GLOBAL debug_dbug=@old_dbug;
|
||||
SELECT * FROM t1 WHERE a >= 30 ORDER BY a;
|
||||
SET GLOBAL slave_exec_mode=@old_slave_mode;
|
||||
SET GLOBAL gtid_strict_mode=@old_strict;
|
||||
|
||||
|
||||
# Clean up.
|
||||
--connection server_1
|
||||
SET GLOBAL gtid_domain_id=0;
|
||||
|
|
|
@ -1788,6 +1788,13 @@ void rpl_group_info::cleanup_context(THD *thd, bool error)
|
|||
rli->clear_flag(Relay_log_info::IN_STMT);
|
||||
rli->clear_flag(Relay_log_info::IN_TRANSACTION);
|
||||
}
|
||||
|
||||
/*
|
||||
Ensure we always release the domain for others to process, when using
|
||||
--gtid-ignore-duplicates.
|
||||
*/
|
||||
if (gtid_ignore_duplicate_state != GTID_DUPLICATE_NULL)
|
||||
rpl_global_gtid_slave_state.release_domain_owner(this);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1796,13 +1803,6 @@ void rpl_group_info::cleanup_context(THD *thd, bool error)
|
|||
thd->variables.option_bits&= ~OPTION_NO_FOREIGN_KEY_CHECKS;
|
||||
thd->variables.option_bits&= ~OPTION_RELAXED_UNIQUE_CHECKS;
|
||||
|
||||
/*
|
||||
Ensure we always release the domain for others to process, when using
|
||||
--gtid-ignore-duplicates.
|
||||
*/
|
||||
if (gtid_ignore_duplicate_state != GTID_DUPLICATE_NULL)
|
||||
rpl_global_gtid_slave_state.release_domain_owner(this);
|
||||
|
||||
/*
|
||||
Reset state related to long_find_row notes in the error log:
|
||||
- timestamp
|
||||
|
@ -1811,6 +1811,11 @@ void rpl_group_info::cleanup_context(THD *thd, bool error)
|
|||
reset_row_stmt_start_timestamp();
|
||||
unset_long_find_row_note_printed();
|
||||
|
||||
DBUG_EXECUTE_IF("inject_sleep_gtid_100_x_x", {
|
||||
if (current_gtid.domain_id == 100)
|
||||
my_sleep(50000);
|
||||
};);
|
||||
|
||||
DBUG_VOID_RETURN;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue