mariadb/mysql-test/suite/rpl/r/rpl_parallel_retry.result

include/rpl_init.inc [topology=1->2]
*** Test retry of transactions that fail to replicate due to deadlock or similar temporary error. ***
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
CREATE TABLE t1 (a int PRIMARY KEY, b INT) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1,1);
SET sql_log_bin=0;
CREATE FUNCTION foo(x INT, d1 VARCHAR(500), d2 VARCHAR(500))
RETURNS INT DETERMINISTIC
BEGIN
RETURN x;
END
||
SET sql_log_bin=1;
SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
include/stop_slave.inc
SET GLOBAL slave_parallel_threads=5;
include/start_slave.inc
SET sql_log_bin=0;
CREATE FUNCTION foo(x INT, d1 VARCHAR(500), d2 VARCHAR(500))
RETURNS INT DETERMINISTIC
BEGIN
IF d1 != '' THEN
SET debug_sync = d1;
END IF;
IF d2 != '' THEN
SET debug_sync = d2;
END IF;
RETURN x;
END
||
SET sql_log_bin=1;
include/stop_slave.inc
SET gtid_seq_no = 100;
BEGIN;
INSERT INTO t1 VALUES (2,1);
UPDATE t1 SET b=b+1 WHERE a=1;
INSERT INTO t1 VALUES (3,1);
COMMIT;
SELECT * FROM t1 ORDER BY a;
a	b
1	2
2	1
3	1
SET @old_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_gtid_0_x_100";
include/start_slave.inc
SET GLOBAL debug_dbug=@old_dbug;
retries
1
SELECT * FROM t1 ORDER BY a;
a	b
1	2
2	1
3	1
*** Test that double retry works when the first retry also fails with temp error ***
include/stop_slave.inc
SET gtid_seq_no = 100;
SET @old_server_id = @@server_id;
SET server_id = 10;
BEGIN;
INSERT INTO t1 VALUES (4,1);
UPDATE t1 SET b=b+1 WHERE a=1;
INSERT INTO t1 VALUES (5,1);
INSERT INTO t1 VALUES (6,1);
COMMIT;
SET server_id = @old_server_id;
SELECT * FROM t1 ORDER BY a;
a	b
1	3
2	1
3	1
4	1
5	1
6	1
SET @old_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_gtid_0_x_100,rpl_parallel_simulate_double_temp_err_gtid_0_x_100";
include/start_slave.inc
SET GLOBAL debug_dbug=@old_dbug;
retries
2
SELECT * FROM t1 ORDER BY a;
a	b
1	3
2	1
3	1
4	1
5	1
6	1
*** Test too many retries, eventually causing failure. ***
include/stop_slave.inc
SET gtid_seq_no = 100;
SET @old_server_id = @@server_id;
SET server_id = 11;
BEGIN;
INSERT INTO t1 VALUES (7,1);
UPDATE t1 SET b=b+1 WHERE a=1;
INSERT INTO t1 VALUES (8,1);
INSERT INTO t1 VALUES (9,1);
COMMIT;
SET server_id = @old_server_id;
SELECT * FROM t1 ORDER BY a;
a	b
1	4
2	1
3	1
4	1
5	1
6	1
7	1
8	1
9	1
SET sql_log_bin=0;
CALL mtr.add_suppression("Slave worker thread retried transaction 10 time\\(s\\) in vain, giving up");
CALL mtr.add_suppression("Slave: Deadlock found when trying to get lock; try restarting transaction");
SET sql_log_bin=1;
SET @old_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_gtid_0_x_100,rpl_parallel_simulate_infinite_temp_err_gtid_0_x_100";
START SLAVE;
include/wait_for_slave_sql_error.inc [errno=1213]
SET GLOBAL debug_dbug=@old_dbug;
retries
10
SELECT * FROM t1 ORDER BY a;
a	b
1	3
2	1
3	1
4	1
5	1
6	1
STOP SLAVE IO_THREAD;
include/start_slave.inc
SELECT * FROM t1 ORDER BY a;
a	b
1	4
2	1
3	1
4	1
5	1
6	1
7	1
8	1
9	1
*** Test retry of event group that spans multiple relay log files. ***
CREATE TABLE t2 (a int PRIMARY KEY, b BLOB) ENGINE=InnoDB;
INSERT INTO t2 VALUES (1,"Hulubullu");
include/stop_slave.inc
SET @old_max= @@GLOBAL.max_relay_log_size;
SET GLOBAL max_relay_log_size=4096;
SET gtid_seq_no = 100;
SET @old_server_id = @@server_id;
SET server_id = 12;
BEGIN;
INSERT INTO t1 VALUES (10, 4);
COMMIT;
SET server_id = @old_server_id;
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
a	b
10	4
SELECT a, LENGTH(b) FROM t2 ORDER BY a;
a	LENGTH(b)
1	9
2	5006
3	5012
SET @old_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_gtid_0_x_100";
include/start_slave.inc
SET GLOBAL debug_dbug=@old_dbug;
retries
1
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
a	b
10	4
SELECT a, LENGTH(b) FROM t2 ORDER BY a;
a	LENGTH(b)
1	9
2	5006
3	5012
INSERT INTO t1 VALUES (11,11);
SELECT * FROM t1 WHERE a >= 10 ORDER BY a;
a	b
10	4
11	11
SELECT a, LENGTH(b) FROM t2 ORDER BY a;
a	LENGTH(b)
1	9
2	5006
3	5012
4	5000
SET GLOBAL max_relay_log_size=@old_max;
*** MDEV-7065: Incorrect relay log position in parallel replication after retry of transaction ***
include/stop_slave.inc
BEGIN;
INSERT INTO t1 VALUES (100, 0);
INSERT INTO t1 VALUES (101, 0);
INSERT INTO t1 VALUES (102, 0);
INSERT INTO t1 VALUES (103, 0);
COMMIT;
SELECT * FROM t1 WHERE a >= 100 ORDER BY a;
a	b
100	0
101	0
102	0
103	0
SET @old_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_xid";
include/start_slave.inc
SET GLOBAL debug_dbug=@old_dbug;
retries
1
SELECT * FROM t1 WHERE a >= 100 ORDER BY a;
a	b
100	0
101	0
102	0
103	0
include/stop_slave_sql.inc
INSERT INTO t1 VALUES (104, 1);
INSERT INTO t1 VALUES (105, 1);
INSERT INTO t1 VALUES (106, 1);
INSERT INTO t1 VALUES (107, 1);
INSERT INTO t1 VALUES (108, 1);
INSERT INTO t1 VALUES (109, 1);
include/start_slave.inc
SELECT * FROM t1 WHERE a >= 100 ORDER BY a;
a	b
100	0
101	0
102	0
103	0
104	1
105	1
106	1
107	1
108	1
109	1
*** MDEV-6917: Parallel replication: "Commit failed due to failure of an earlier commit on which this one depends", but no prior failure seen **
CREATE TABLE t3 (a INT PRIMARY KEY, b INT, KEY b_idx(b)) ENGINE=InnoDB;
INSERT INTO t3 VALUES (1,NULL), (2,2), (3,NULL), (4,4), (5, NULL), (6, 6);
CREATE TABLE t4 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
SET @old_format= @@SESSION.binlog_format;
SET binlog_format='statement';
include/stop_slave.inc
CHANGE MASTER TO master_use_gtid=no;
SET @old_format= @@SESSION.binlog_format;
SET binlog_format='statement';
BEGIN;
INSERT INTO t4 VALUES (10, foo(1, 'before_execute_sql_command WAIT_FOR t1_start', ''));
UPDATE t3 SET b=NULL WHERE a=6;
SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued1 WAIT_FOR master_cont1';
COMMIT;
SET debug_sync='now WAIT_FOR master_queued1';
SET @old_format= @@SESSION.binlog_format;
SET binlog_format='statement';
BEGIN;
INSERT INTO t4 VALUES (20, foo(2, 'group_commit_waiting_for_prior SIGNAL t2_waiting', ''));
DELETE FROM t3 WHERE b <= 3;
SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued2';
COMMIT;
SET debug_sync='now WAIT_FOR master_queued2';
SET @old_format= @@SESSION.binlog_format;
SET binlog_format='statement';
BEGIN;
INSERT INTO t4 VALUES (30, foo(3, 'before_execute_sql_command WAIT_FOR t3_start', 'group_commit_waiting_for_prior SIGNAL t3_waiting'));
INSERT INTO t3 VALUES (7,7);
SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued3';
COMMIT;
SET debug_sync='now WAIT_FOR master_queued3';
SET debug_sync='now SIGNAL master_cont1';
SET binlog_format=@old_format;
SET binlog_format=@old_format;
SET debug_sync='RESET';
SET binlog_format=@old_format;
SELECT * FROM t3 ORDER BY a;
a	b
1	NULL
3	NULL
4	4
5	NULL
6	NULL
7	7
SET @old_dbug=@@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug="+d,thd_need_ordering_with_force";
include/start_slave.inc
SET debug_sync='now WAIT_FOR t2_waiting';
SET debug_sync='now SIGNAL t3_start';
SET debug_sync='now WAIT_FOR t3_waiting';
SET debug_sync='now SIGNAL t1_start';
SET GLOBAL debug_dbug=@old_dbug;
SET debug_sync='RESET';
retries
1
SELECT * FROM t3 ORDER BY a;
a	b
1	NULL
3	NULL
4	4
5	NULL
6	NULL
7	7
SET binlog_format=@old_format;
include/stop_slave.inc
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
include/start_slave.inc
DROP TABLE t1, t2, t3, t4;
DROP function foo;
include/rpl_end.inc
MDEV-5262: Missing retry after temp error in parallel replication Start implementing that an event group can be re-tried in parallel replication if it fails with a temporary error (like deadlock). Patch is very incomplete, just some very basic retry works. Stuff still missing (not complete list): - Handle moving to the next relay log file, if event group to be retried spans multiple relay log files. - Handle refcounting of relay log files, to ensure that we do not purge a relay log file and then later attempt to re-execute events out of it. - Handle description_event_for_exec - we need to save this somehow for the possible retry - and use the correct one in case it differs between relay logs. - Do another retry attempt in case the first retry also fails. - Limit the max number of retries. - Lots of testing will be needed for the various edge cases. 2014-05-08 14:20:18 +02:00			`include/rpl_init.inc [topology=1->2]`
			`* Test retry of transactions that fail to replicate due to deadlock or similar temporary error. *`
			`ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;`
			`CREATE TABLE t1 (a int PRIMARY KEY, b INT) ENGINE=InnoDB;`
			`INSERT INTO t1 VALUES (1,1);`
			`SET sql_log_bin=0;`
			`CREATE FUNCTION foo(x INT, d1 VARCHAR(500), d2 VARCHAR(500))`
			`RETURNS INT DETERMINISTIC`
			`BEGIN`
			`RETURN x;`
			`END`
			`\|\|`
			`SET sql_log_bin=1;`
			`SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;`
			`include/stop_slave.inc`
			`SET GLOBAL slave_parallel_threads=5;`
			`include/start_slave.inc`
			`SET sql_log_bin=0;`
			`CREATE FUNCTION foo(x INT, d1 VARCHAR(500), d2 VARCHAR(500))`
			`RETURNS INT DETERMINISTIC`
			`BEGIN`
			`IF d1 != '' THEN`
			`SET debug_sync = d1;`
			`END IF;`
			`IF d2 != '' THEN`
			`SET debug_sync = d2;`
			`END IF;`
			`RETURN x;`
			`END`
			`\|\|`
MDEV-5262: Missing retry after temp error in parallel replication Implement that if first retry fails, we can do another attempt. Add testcases to test multi-retry that succeeds in second attempt, and multi-retry that eventually fails due to exceeding slave_trans_retries. 2014-05-13 13:42:06 +02:00			`SET sql_log_bin=1;`
MDEV-5262: Missing retry after temp error in parallel replication Start implementing that an event group can be re-tried in parallel replication if it fails with a temporary error (like deadlock). Patch is very incomplete, just some very basic retry works. Stuff still missing (not complete list): - Handle moving to the next relay log file, if event group to be retried spans multiple relay log files. - Handle refcounting of relay log files, to ensure that we do not purge a relay log file and then later attempt to re-execute events out of it. - Handle description_event_for_exec - we need to save this somehow for the possible retry - and use the correct one in case it differs between relay logs. - Do another retry attempt in case the first retry also fails. - Limit the max number of retries. - Lots of testing will be needed for the various edge cases. 2014-05-08 14:20:18 +02:00			`include/stop_slave.inc`
			`SET gtid_seq_no = 100;`
			`BEGIN;`
			`INSERT INTO t1 VALUES (2,1);`
			`UPDATE t1 SET b=b+1 WHERE a=1;`
			`INSERT INTO t1 VALUES (3,1);`
			`COMMIT;`
			`SELECT * FROM t1 ORDER BY a;`
			`a b`
			`1 2`
			`2 1`
			`3 1`
			`SET @old_dbug= @@GLOBAL.debug_dbug;`
MDEV-5262: Missing retry after temp error in parallel replication Implement that if first retry fails, we can do another attempt. Add testcases to test multi-retry that succeeds in second attempt, and multi-retry that eventually fails due to exceeding slave_trans_retries. 2014-05-13 13:42:06 +02:00			`SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_gtid_0_x_100";`
MDEV-5262: Missing retry after temp error in parallel replication Start implementing that an event group can be re-tried in parallel replication if it fails with a temporary error (like deadlock). Patch is very incomplete, just some very basic retry works. Stuff still missing (not complete list): - Handle moving to the next relay log file, if event group to be retried spans multiple relay log files. - Handle refcounting of relay log files, to ensure that we do not purge a relay log file and then later attempt to re-execute events out of it. - Handle description_event_for_exec - we need to save this somehow for the possible retry - and use the correct one in case it differs between relay logs. - Do another retry attempt in case the first retry also fails. - Limit the max number of retries. - Lots of testing will be needed for the various edge cases. 2014-05-08 14:20:18 +02:00			`include/start_slave.inc`
			`SET GLOBAL debug_dbug=@old_dbug;`
			`retries`
			`1`
			`SELECT * FROM t1 ORDER BY a;`
			`a b`
			`1 2`
			`2 1`
			`3 1`
MDEV-5262: Missing retry after temp error in parallel replication Implement that if first retry fails, we can do another attempt. Add testcases to test multi-retry that succeeds in second attempt, and multi-retry that eventually fails due to exceeding slave_trans_retries. 2014-05-13 13:42:06 +02:00			`* Test that double retry works when the first retry also fails with temp error *`
			`include/stop_slave.inc`
			`SET gtid_seq_no = 100;`
			`SET @old_server_id = @@server_id;`
			`SET server_id = 10;`
			`BEGIN;`
			`INSERT INTO t1 VALUES (4,1);`
			`UPDATE t1 SET b=b+1 WHERE a=1;`
			`INSERT INTO t1 VALUES (5,1);`
			`INSERT INTO t1 VALUES (6,1);`
			`COMMIT;`
			`SET server_id = @old_server_id;`
			`SELECT * FROM t1 ORDER BY a;`
			`a b`
			`1 3`
			`2 1`
			`3 1`
			`4 1`
			`5 1`
			`6 1`
			`SET @old_dbug= @@GLOBAL.debug_dbug;`
			`SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_gtid_0_x_100,rpl_parallel_simulate_double_temp_err_gtid_0_x_100";`
			`include/start_slave.inc`
			`SET GLOBAL debug_dbug=@old_dbug;`
			`retries`
			`2`
			`SELECT * FROM t1 ORDER BY a;`
			`a b`
			`1 3`
			`2 1`
			`3 1`
			`4 1`
			`5 1`
			`6 1`
			`* Test too many retries, eventually causing failure. *`
			`include/stop_slave.inc`
			`SET gtid_seq_no = 100;`
			`SET @old_server_id = @@server_id;`
			`SET server_id = 11;`
			`BEGIN;`
			`INSERT INTO t1 VALUES (7,1);`
			`UPDATE t1 SET b=b+1 WHERE a=1;`
			`INSERT INTO t1 VALUES (8,1);`
			`INSERT INTO t1 VALUES (9,1);`
			`COMMIT;`
			`SET server_id = @old_server_id;`
			`SELECT * FROM t1 ORDER BY a;`
			`a b`
			`1 4`
			`2 1`
			`3 1`
			`4 1`
			`5 1`
			`6 1`
			`7 1`
			`8 1`
			`9 1`
			`SET sql_log_bin=0;`
			`CALL mtr.add_suppression("Slave worker thread retried transaction 10 time\\(s\\) in vain, giving up");`
			`CALL mtr.add_suppression("Slave: Deadlock found when trying to get lock; try restarting transaction");`
			`SET sql_log_bin=1;`
			`SET @old_dbug= @@GLOBAL.debug_dbug;`
			`SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_gtid_0_x_100,rpl_parallel_simulate_infinite_temp_err_gtid_0_x_100";`
			`START SLAVE;`
			`include/wait_for_slave_sql_error.inc [errno=1213]`
			`SET GLOBAL debug_dbug=@old_dbug;`
			`retries`
			`10`
			`SELECT * FROM t1 ORDER BY a;`
			`a b`
			`1 3`
			`2 1`
			`3 1`
			`4 1`
			`5 1`
			`6 1`
			`STOP SLAVE IO_THREAD;`
			`include/start_slave.inc`
			`SELECT * FROM t1 ORDER BY a;`
			`a b`
			`1 4`
			`2 1`
			`3 1`
			`4 1`
			`5 1`
			`6 1`
			`7 1`
			`8 1`
			`9 1`
MDEV-5262: Missing retry after temp error in parallel replication Handle retry of event groups that span multiple relay log files. - If retry reaches the end of one relay log file, move on to the next. - Handle refcounting of relay log files, and avoid purging relay log files until all event groups have completed that might have needed them for transaction retry. 2014-05-15 15:52:08 +02:00			`* Test retry of event group that spans multiple relay log files. *`
			`CREATE TABLE t2 (a int PRIMARY KEY, b BLOB) ENGINE=InnoDB;`
			`INSERT INTO t2 VALUES (1,"Hulubullu");`
			`include/stop_slave.inc`
			`SET @old_max= @@GLOBAL.max_relay_log_size;`
			`SET GLOBAL max_relay_log_size=4096;`
			`SET gtid_seq_no = 100;`
			`SET @old_server_id = @@server_id;`
			`SET server_id = 12;`
			`BEGIN;`
			`INSERT INTO t1 VALUES (10, 4);`
			`COMMIT;`
			`SET server_id = @old_server_id;`
			`SELECT * FROM t1 WHERE a >= 10 ORDER BY a;`
			`a b`
			`10 4`
			`SELECT a, LENGTH(b) FROM t2 ORDER BY a;`
			`a LENGTH(b)`
			`1 9`
			`2 5006`
			`3 5012`
			`SET @old_dbug= @@GLOBAL.debug_dbug;`
			`SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_gtid_0_x_100";`
			`include/start_slave.inc`
			`SET GLOBAL debug_dbug=@old_dbug;`
			`retries`
			`1`
			`SELECT * FROM t1 WHERE a >= 10 ORDER BY a;`
			`a b`
			`10 4`
			`SELECT a, LENGTH(b) FROM t2 ORDER BY a;`
			`a LENGTH(b)`
			`1 9`
			`2 5006`
			`3 5012`
			`INSERT INTO t1 VALUES (11,11);`
			`SELECT * FROM t1 WHERE a >= 10 ORDER BY a;`
			`a b`
			`10 4`
			`11 11`
			`SELECT a, LENGTH(b) FROM t2 ORDER BY a;`
			`a LENGTH(b)`
			`1 9`
			`2 5006`
			`3 5012`
			`4 5000`
			`SET GLOBAL max_relay_log_size=@old_max;`
MDEV-7065: Incorrect relay log position in parallel replication after retry of transaction The retry of an event group in parallel replication set the wrong value for the end log position of the event that was retried (qev->future_event_relay_log_pos). It was too large by the size of the event, so it pointed into the middle of the following event. If the retry happened in the very last event of the event group, _and_ the SQL thread was stopped just after successfully retrying that event, then the SQL threads's relay log position would be left incorrect. Restarting the SQL thread could then try to read events from a garbage offset in the relay log, usually leading to an error about not being able to read the event. 2014-11-13 10:46:09 +01:00			`* MDEV-7065: Incorrect relay log position in parallel replication after retry of transaction *`
			`include/stop_slave.inc`
			`BEGIN;`
			`INSERT INTO t1 VALUES (100, 0);`
			`INSERT INTO t1 VALUES (101, 0);`
			`INSERT INTO t1 VALUES (102, 0);`
			`INSERT INTO t1 VALUES (103, 0);`
			`COMMIT;`
			`SELECT * FROM t1 WHERE a >= 100 ORDER BY a;`
			`a b`
			`100 0`
			`101 0`
			`102 0`
			`103 0`
			`SET @old_dbug= @@GLOBAL.debug_dbug;`
			`SET GLOBAL debug_dbug="+d,rpl_parallel_simulate_temp_err_xid";`
			`include/start_slave.inc`
			`SET GLOBAL debug_dbug=@old_dbug;`
			`retries`
			`1`
			`SELECT * FROM t1 WHERE a >= 100 ORDER BY a;`
			`a b`
			`100 0`
			`101 0`
			`102 0`
			`103 0`
			`include/stop_slave_sql.inc`
			`INSERT INTO t1 VALUES (104, 1);`
			`INSERT INTO t1 VALUES (105, 1);`
			`INSERT INTO t1 VALUES (106, 1);`
			`INSERT INTO t1 VALUES (107, 1);`
			`INSERT INTO t1 VALUES (108, 1);`
			`INSERT INTO t1 VALUES (109, 1);`
			`include/start_slave.inc`
			`SELECT * FROM t1 WHERE a >= 100 ORDER BY a;`
			`a b`
			`100 0`
			`101 0`
			`102 0`
			`103 0`
			`104 1`
			`105 1`
			`106 1`
			`107 1`
			`108 1`
			`109 1`
MDEV-6917: Parallel replication: "Commit failed due to failure of an earlier commit on which this one depends", but no prior failure seen This bug was seen when parallel replication experienced a deadlock between transactions T1 and T2, where T2 has reached the commit phase and is waiting for T1 to commit first. In this case, the deadlock is broken by sending a kill to T2; that kill error is then later detected and converted to a deadlock error, which causes T2 to be rolled back and retried. The problem was that the kill caused ha_commit_trans() to errorneously call wakeup_subsequent_commits() on T3, signalling it to abort because T2 failed during commit. This is incorrect, because the error in T2 is only a temporary error, which will be resolved by normal transaction retry. We should not signal error to the next transaction until we have executed the code that handles such temporary errors. So this patch just removes the calls to wakeup_subsequent_commits() from ha_commit_trans(). They are incorrect in this case, and they are not needed in general, as wakeup_subsequent_commits() must in any case be called in finish_event_group() to wakeup any transactions that may have started to wait after ha_commit_trans(). And normally, wakeup will in fact have happened earlier, either from the binlog group commit code, or (in case of no binlogging) after the fast part of InnoDB/XtraDB group commit. The symptom of this bug was that replication would break on some transaction with "Commit failed due to failure of an earlier commit on which this one depends", but with no such failure of an earlier commit visible anywhere. 2014-11-13 11:01:31 +01:00			`* MDEV-6917: Parallel replication: "Commit failed due to failure of an earlier commit on which this one depends", but no prior failure seen `
			`CREATE TABLE t3 (a INT PRIMARY KEY, b INT, KEY b_idx(b)) ENGINE=InnoDB;`
			`INSERT INTO t3 VALUES (1,NULL), (2,2), (3,NULL), (4,4), (5, NULL), (6, 6);`
			`CREATE TABLE t4 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;`
			`SET @old_format= @@SESSION.binlog_format;`
			`SET binlog_format='statement';`
			`include/stop_slave.inc`
			`CHANGE MASTER TO master_use_gtid=no;`
			`SET @old_format= @@SESSION.binlog_format;`
			`SET binlog_format='statement';`
			`BEGIN;`
			`INSERT INTO t4 VALUES (10, foo(1, 'before_execute_sql_command WAIT_FOR t1_start', ''));`
			`UPDATE t3 SET b=NULL WHERE a=6;`
			`SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued1 WAIT_FOR master_cont1';`
			`COMMIT;`
			`SET debug_sync='now WAIT_FOR master_queued1';`
			`SET @old_format= @@SESSION.binlog_format;`
			`SET binlog_format='statement';`
			`BEGIN;`
			`INSERT INTO t4 VALUES (20, foo(2, 'group_commit_waiting_for_prior SIGNAL t2_waiting', ''));`
			`DELETE FROM t3 WHERE b <= 3;`
			`SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued2';`
			`COMMIT;`
			`SET debug_sync='now WAIT_FOR master_queued2';`
			`SET @old_format= @@SESSION.binlog_format;`
			`SET binlog_format='statement';`
			`BEGIN;`
			`INSERT INTO t4 VALUES (30, foo(3, 'before_execute_sql_command WAIT_FOR t3_start', 'group_commit_waiting_for_prior SIGNAL t3_waiting'));`
			`INSERT INTO t3 VALUES (7,7);`
			`SET debug_sync='commit_after_release_LOCK_prepare_ordered SIGNAL master_queued3';`
			`COMMIT;`
			`SET debug_sync='now WAIT_FOR master_queued3';`
			`SET debug_sync='now SIGNAL master_cont1';`
			`SET binlog_format=@old_format;`
			`SET binlog_format=@old_format;`
			`SET debug_sync='RESET';`
			`SET binlog_format=@old_format;`
			`SELECT * FROM t3 ORDER BY a;`
			`a b`
			`1 NULL`
			`3 NULL`
			`4 4`
			`5 NULL`
			`6 NULL`
			`7 7`
			`SET @old_dbug=@@GLOBAL.debug_dbug;`
			`SET GLOBAL debug_dbug="+d,thd_need_ordering_with_force";`
			`include/start_slave.inc`
			`SET debug_sync='now WAIT_FOR t2_waiting';`
			`SET debug_sync='now SIGNAL t3_start';`
			`SET debug_sync='now WAIT_FOR t3_waiting';`
			`SET debug_sync='now SIGNAL t1_start';`
			`SET GLOBAL debug_dbug=@old_dbug;`
			`SET debug_sync='RESET';`
			`retries`
			`1`
			`SELECT * FROM t3 ORDER BY a;`
			`a b`
			`1 NULL`
			`3 NULL`
			`4 4`
			`5 NULL`
			`6 NULL`
			`7 7`
			`SET binlog_format=@old_format;`
MDEV-5262: Missing retry after temp error in parallel replication Start implementing that an event group can be re-tried in parallel replication if it fails with a temporary error (like deadlock). Patch is very incomplete, just some very basic retry works. Stuff still missing (not complete list): - Handle moving to the next relay log file, if event group to be retried spans multiple relay log files. - Handle refcounting of relay log files, to ensure that we do not purge a relay log file and then later attempt to re-execute events out of it. - Handle description_event_for_exec - we need to save this somehow for the possible retry - and use the correct one in case it differs between relay logs. - Do another retry attempt in case the first retry also fails. - Limit the max number of retries. - Lots of testing will be needed for the various edge cases. 2014-05-08 14:20:18 +02:00			`include/stop_slave.inc`
			`SET GLOBAL slave_parallel_threads=@old_parallel_threads;`
			`include/start_slave.inc`
MDEV-6917: Parallel replication: "Commit failed due to failure of an earlier commit on which this one depends", but no prior failure seen This bug was seen when parallel replication experienced a deadlock between transactions T1 and T2, where T2 has reached the commit phase and is waiting for T1 to commit first. In this case, the deadlock is broken by sending a kill to T2; that kill error is then later detected and converted to a deadlock error, which causes T2 to be rolled back and retried. The problem was that the kill caused ha_commit_trans() to errorneously call wakeup_subsequent_commits() on T3, signalling it to abort because T2 failed during commit. This is incorrect, because the error in T2 is only a temporary error, which will be resolved by normal transaction retry. We should not signal error to the next transaction until we have executed the code that handles such temporary errors. So this patch just removes the calls to wakeup_subsequent_commits() from ha_commit_trans(). They are incorrect in this case, and they are not needed in general, as wakeup_subsequent_commits() must in any case be called in finish_event_group() to wakeup any transactions that may have started to wait after ha_commit_trans(). And normally, wakeup will in fact have happened earlier, either from the binlog group commit code, or (in case of no binlogging) after the fast part of InnoDB/XtraDB group commit. The symptom of this bug was that replication would break on some transaction with "Commit failed due to failure of an earlier commit on which this one depends", but with no such failure of an earlier commit visible anywhere. 2014-11-13 11:01:31 +01:00			`DROP TABLE t1, t2, t3, t4;`
MDEV-5262: Missing retry after temp error in parallel replication Start implementing that an event group can be re-tried in parallel replication if it fails with a temporary error (like deadlock). Patch is very incomplete, just some very basic retry works. Stuff still missing (not complete list): - Handle moving to the next relay log file, if event group to be retried spans multiple relay log files. - Handle refcounting of relay log files, to ensure that we do not purge a relay log file and then later attempt to re-execute events out of it. - Handle description_event_for_exec - we need to save this somehow for the possible retry - and use the correct one in case it differs between relay logs. - Do another retry attempt in case the first retry also fails. - Limit the max number of retries. - Lots of testing will be needed for the various edge cases. 2014-05-08 14:20:18 +02:00			`DROP function foo;`
			`include/rpl_end.inc`