mirror of
https://github.com/MariaDB/server.git
synced 2025-01-16 12:02:42 +01:00
Merge MDEV-8031 into 10.1
This commit is contained in:
commit
c2dd88ac85
3 changed files with 259 additions and 18 deletions
|
@ -1591,7 +1591,7 @@ a b
|
|||
120 0
|
||||
121 0
|
||||
include/stop_slave.inc
|
||||
SET GLOBAL debug_dbug= @old_debug;
|
||||
SET GLOBAL debug_dbug= @old_dbug;
|
||||
include/start_slave.inc
|
||||
*** MDEV-7929: record_gtid() for non-transactional event group calls wakeup_subsequent_commits() too early, causing slave hang. ***
|
||||
include/stop_slave.inc
|
||||
|
@ -1620,7 +1620,88 @@ a b
|
|||
130 0
|
||||
131 0
|
||||
include/stop_slave.inc
|
||||
SET GLOBAL debug_dbug= @old_debug;
|
||||
SET GLOBAL debug_dbug= @old_dbug;
|
||||
include/start_slave.inc
|
||||
*** MDEV-8031: Parallel replication stops on "connection killed" error (probably incorrectly handled deadlock kill) ***
|
||||
INSERT INTO t3 VALUES (201,0), (202,0);
|
||||
include/save_master_gtid.inc
|
||||
include/sync_with_master_gtid.inc
|
||||
include/stop_slave.inc
|
||||
SET @old_dbug= @@GLOBAL.debug_dbug;
|
||||
SET GLOBAL debug_dbug= '+d,inject_mdev8031';
|
||||
SET @old_dbug= @@SESSION.debug_dbug;
|
||||
SET SESSION debug_dbug="+d,binlog_force_commit_id";
|
||||
SET @commit_id= 10200;
|
||||
INSERT INTO t3 VALUES (203, 1);
|
||||
INSERT INTO t3 VALUES (204, 1);
|
||||
INSERT INTO t3 VALUES (205, 1);
|
||||
UPDATE t3 SET b=b+1 WHERE a=201;
|
||||
UPDATE t3 SET b=b+1 WHERE a=201;
|
||||
UPDATE t3 SET b=b+1 WHERE a=201;
|
||||
UPDATE t3 SET b=b+1 WHERE a=202;
|
||||
UPDATE t3 SET b=b+1 WHERE a=202;
|
||||
UPDATE t3 SET b=b+1 WHERE a=202;
|
||||
UPDATE t3 SET b=b+1 WHERE a=202;
|
||||
UPDATE t3 SET b=b+1 WHERE a=203;
|
||||
UPDATE t3 SET b=b+1 WHERE a=203;
|
||||
UPDATE t3 SET b=b+1 WHERE a=204;
|
||||
UPDATE t3 SET b=b+1 WHERE a=204;
|
||||
UPDATE t3 SET b=b+1 WHERE a=204;
|
||||
UPDATE t3 SET b=b+1 WHERE a=203;
|
||||
UPDATE t3 SET b=b+1 WHERE a=205;
|
||||
UPDATE t3 SET b=b+1 WHERE a=205;
|
||||
SET SESSION debug_dbug=@old_dbug;
|
||||
SELECT * FROM t3 WHERE a>=200 ORDER BY a;
|
||||
a b
|
||||
201 3
|
||||
202 4
|
||||
203 4
|
||||
204 4
|
||||
205 3
|
||||
include/save_master_gtid.inc
|
||||
include/start_slave.inc
|
||||
include/sync_with_master_gtid.inc
|
||||
SELECT * FROM t3 WHERE a>=200 ORDER BY a;
|
||||
a b
|
||||
201 3
|
||||
202 4
|
||||
203 4
|
||||
204 4
|
||||
205 3
|
||||
include/stop_slave.inc
|
||||
SET GLOBAL debug_dbug= @old_dbug;
|
||||
include/start_slave.inc
|
||||
*** Check getting deadlock killed inside open_binlog() during retry. ***
|
||||
include/stop_slave.inc
|
||||
SET @old_dbug= @@GLOBAL.debug_dbug;
|
||||
SET GLOBAL debug_dbug= '+d,inject_retry_event_group_open_binlog_kill';
|
||||
SET @old_max= @@GLOBAL.max_relay_log_size;
|
||||
SET GLOBAL max_relay_log_size= 4096;
|
||||
SET @old_dbug= @@SESSION.debug_dbug;
|
||||
SET SESSION debug_dbug="+d,binlog_force_commit_id";
|
||||
SET @commit_id= 10210;
|
||||
Omit long queries that cause relaylog rotations and transaction retries...
|
||||
SET SESSION debug_dbug=@old_dbug;
|
||||
SELECT * FROM t3 WHERE a>=200 ORDER BY a;
|
||||
a b
|
||||
201 6
|
||||
202 8
|
||||
203 7
|
||||
204 7
|
||||
205 5
|
||||
include/save_master_gtid.inc
|
||||
include/start_slave.inc
|
||||
include/sync_with_master_gtid.inc
|
||||
SELECT * FROM t3 WHERE a>=200 ORDER BY a;
|
||||
a b
|
||||
201 6
|
||||
202 8
|
||||
203 7
|
||||
204 7
|
||||
205 5
|
||||
include/stop_slave.inc
|
||||
SET GLOBAL debug_dbug= @old_debg;
|
||||
SET GLOBAL max_relay_log_size= @old_max;
|
||||
include/start_slave.inc
|
||||
include/stop_slave.inc
|
||||
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
|
||||
|
|
|
@ -2215,7 +2215,7 @@ SELECT * FROM t3 WHERE a >= 120 ORDER BY a;
|
|||
SELECT * FROM t3 WHERE a >= 120 ORDER BY a;
|
||||
|
||||
--source include/stop_slave.inc
|
||||
SET GLOBAL debug_dbug= @old_debug;
|
||||
SET GLOBAL debug_dbug= @old_dbug;
|
||||
--source include/start_slave.inc
|
||||
|
||||
|
||||
|
@ -2262,10 +2262,119 @@ SELECT * FROM t3 WHERE a >= 130 ORDER BY a;
|
|||
SELECT * FROM t3 WHERE a >= 130 ORDER BY a;
|
||||
|
||||
--source include/stop_slave.inc
|
||||
SET GLOBAL debug_dbug= @old_debug;
|
||||
SET GLOBAL debug_dbug= @old_dbug;
|
||||
--source include/start_slave.inc
|
||||
|
||||
|
||||
--echo *** MDEV-8031: Parallel replication stops on "connection killed" error (probably incorrectly handled deadlock kill) ***
|
||||
|
||||
--connection server_1
|
||||
INSERT INTO t3 VALUES (201,0), (202,0);
|
||||
--source include/save_master_gtid.inc
|
||||
|
||||
--connection server_2
|
||||
--source include/sync_with_master_gtid.inc
|
||||
--source include/stop_slave.inc
|
||||
SET @old_dbug= @@GLOBAL.debug_dbug;
|
||||
SET GLOBAL debug_dbug= '+d,inject_mdev8031';
|
||||
|
||||
--connection server_1
|
||||
# We artificially create a situation that hopefully resembles the original
|
||||
# bug which was only seen "in the wild", and only once.
|
||||
# Setup a fake group commit with lots of conflicts that will lead to deadloc
|
||||
# kill. The slave DBUG injection causes the slave to be deadlock killed at
|
||||
# a particular point during the retry, and then later do a small sleep at
|
||||
# another critical point where the prior transaction then has a chance to
|
||||
# complete. Finally an extra KILL check catches an unhandled, lingering
|
||||
# deadlock kill. So rather artificial, but at least it exercises the
|
||||
# relevant code paths.
|
||||
SET @old_dbug= @@SESSION.debug_dbug;
|
||||
SET SESSION debug_dbug="+d,binlog_force_commit_id";
|
||||
|
||||
SET @commit_id= 10200;
|
||||
INSERT INTO t3 VALUES (203, 1);
|
||||
INSERT INTO t3 VALUES (204, 1);
|
||||
INSERT INTO t3 VALUES (205, 1);
|
||||
UPDATE t3 SET b=b+1 WHERE a=201;
|
||||
UPDATE t3 SET b=b+1 WHERE a=201;
|
||||
UPDATE t3 SET b=b+1 WHERE a=201;
|
||||
UPDATE t3 SET b=b+1 WHERE a=202;
|
||||
UPDATE t3 SET b=b+1 WHERE a=202;
|
||||
UPDATE t3 SET b=b+1 WHERE a=202;
|
||||
UPDATE t3 SET b=b+1 WHERE a=202;
|
||||
UPDATE t3 SET b=b+1 WHERE a=203;
|
||||
UPDATE t3 SET b=b+1 WHERE a=203;
|
||||
UPDATE t3 SET b=b+1 WHERE a=204;
|
||||
UPDATE t3 SET b=b+1 WHERE a=204;
|
||||
UPDATE t3 SET b=b+1 WHERE a=204;
|
||||
UPDATE t3 SET b=b+1 WHERE a=203;
|
||||
UPDATE t3 SET b=b+1 WHERE a=205;
|
||||
UPDATE t3 SET b=b+1 WHERE a=205;
|
||||
SET SESSION debug_dbug=@old_dbug;
|
||||
|
||||
SELECT * FROM t3 WHERE a>=200 ORDER BY a;
|
||||
--source include/save_master_gtid.inc
|
||||
|
||||
--connection server_2
|
||||
--source include/start_slave.inc
|
||||
--source include/sync_with_master_gtid.inc
|
||||
|
||||
SELECT * FROM t3 WHERE a>=200 ORDER BY a;
|
||||
--source include/stop_slave.inc
|
||||
SET GLOBAL debug_dbug= @old_dbug;
|
||||
--source include/start_slave.inc
|
||||
|
||||
|
||||
--echo *** Check getting deadlock killed inside open_binlog() during retry. ***
|
||||
|
||||
--connection server_2
|
||||
--source include/stop_slave.inc
|
||||
SET @old_dbug= @@GLOBAL.debug_dbug;
|
||||
SET GLOBAL debug_dbug= '+d,inject_retry_event_group_open_binlog_kill';
|
||||
SET @old_max= @@GLOBAL.max_relay_log_size;
|
||||
SET GLOBAL max_relay_log_size= 4096;
|
||||
|
||||
--connection server_1
|
||||
SET @old_dbug= @@SESSION.debug_dbug;
|
||||
SET SESSION debug_dbug="+d,binlog_force_commit_id";
|
||||
|
||||
--let $large= `SELECT REPEAT("*", 8192)`
|
||||
SET @commit_id= 10210;
|
||||
--echo Omit long queries that cause relaylog rotations and transaction retries...
|
||||
--disable_query_log
|
||||
eval UPDATE t3 SET b=b+1 WHERE a=201 /* $large */;
|
||||
eval UPDATE t3 SET b=b+1 WHERE a=201 /* $large */;
|
||||
eval UPDATE t3 SET b=b+1 WHERE a=201 /* $large */;
|
||||
eval UPDATE t3 SET b=b+1 WHERE a=202 /* $large */;
|
||||
eval UPDATE t3 SET b=b+1 WHERE a=202 /* $large */;
|
||||
eval UPDATE t3 SET b=b+1 WHERE a=202 /* $large */;
|
||||
eval UPDATE t3 SET b=b+1 WHERE a=202 /* $large */;
|
||||
eval UPDATE t3 SET b=b+1 WHERE a=203 /* $large */;
|
||||
eval UPDATE t3 SET b=b+1 WHERE a=203 /* $large */;
|
||||
eval UPDATE t3 SET b=b+1 WHERE a=204 /* $large */;
|
||||
eval UPDATE t3 SET b=b+1 WHERE a=204 /* $large */;
|
||||
eval UPDATE t3 SET b=b+1 WHERE a=204 /* $large */;
|
||||
eval UPDATE t3 SET b=b+1 WHERE a=203 /* $large */;
|
||||
eval UPDATE t3 SET b=b+1 WHERE a=205 /* $large */;
|
||||
eval UPDATE t3 SET b=b+1 WHERE a=205 /* $large */;
|
||||
--enable_query_log
|
||||
SET SESSION debug_dbug=@old_dbug;
|
||||
|
||||
SELECT * FROM t3 WHERE a>=200 ORDER BY a;
|
||||
--source include/save_master_gtid.inc
|
||||
|
||||
--connection server_2
|
||||
--source include/start_slave.inc
|
||||
--source include/sync_with_master_gtid.inc
|
||||
|
||||
SELECT * FROM t3 WHERE a>=200 ORDER BY a;
|
||||
--source include/stop_slave.inc
|
||||
SET GLOBAL debug_dbug= @old_debg;
|
||||
SET GLOBAL max_relay_log_size= @old_max;
|
||||
--source include/start_slave.inc
|
||||
|
||||
|
||||
|
||||
# Clean up.
|
||||
--connection server_2
|
||||
--source include/stop_slave.inc
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
#include "rpl_parallel.h"
|
||||
#include "slave.h"
|
||||
#include "rpl_mi.h"
|
||||
#include "sql_parse.h"
|
||||
#include "debug_sync.h"
|
||||
|
||||
/*
|
||||
|
@ -333,7 +334,7 @@ retry_event_group(rpl_group_info *rgi, rpl_parallel_thread *rpt,
|
|||
IO_CACHE rlog;
|
||||
LOG_INFO linfo;
|
||||
File fd= (File)-1;
|
||||
const char *errmsg= NULL;
|
||||
const char *errmsg;
|
||||
inuse_relaylog *ir= rgi->relay_log;
|
||||
uint64 event_count;
|
||||
uint64 events_to_execute= rgi->retry_event_count;
|
||||
|
@ -349,6 +350,7 @@ retry_event_group(rpl_group_info *rgi, rpl_parallel_thread *rpt,
|
|||
do_retry:
|
||||
event_count= 0;
|
||||
err= 0;
|
||||
errmsg= NULL;
|
||||
|
||||
/*
|
||||
If we already started committing before getting the deadlock (or other
|
||||
|
@ -384,7 +386,16 @@ do_retry:
|
|||
*/
|
||||
if(thd->wait_for_commit_ptr)
|
||||
thd->wait_for_commit_ptr->unregister_wait_for_prior_commit();
|
||||
DBUG_EXECUTE_IF("inject_mdev8031", {
|
||||
/* Simulate that we get deadlock killed at this exact point. */
|
||||
rgi->killed_for_retry= true;
|
||||
mysql_mutex_lock(&thd->LOCK_thd_data);
|
||||
thd->killed= KILL_CONNECTION;
|
||||
mysql_mutex_unlock(&thd->LOCK_thd_data);
|
||||
});
|
||||
rgi->cleanup_context(thd, 1);
|
||||
thd->reset_killed();
|
||||
thd->clear_error();
|
||||
|
||||
/*
|
||||
If we retry due to a deadlock kill that occured during the commit step, we
|
||||
|
@ -428,10 +439,22 @@ do_retry:
|
|||
complete its commit.
|
||||
*/
|
||||
thd->clear_error();
|
||||
thd->reset_killed();
|
||||
if(thd->wait_for_commit_ptr)
|
||||
thd->wait_for_commit_ptr->unregister_wait_for_prior_commit();
|
||||
DBUG_EXECUTE_IF("inject_mdev8031", {
|
||||
/* Inject a small sleep to give prior transaction a chance to commit. */
|
||||
my_sleep(100000);
|
||||
});
|
||||
}
|
||||
|
||||
/*
|
||||
Let us clear any lingering deadlock kill one more time, here after
|
||||
wait_for_prior_commit() has completed. This should rule out any
|
||||
possibility of an old deadlock kill lingering on beyond this point.
|
||||
*/
|
||||
thd->reset_killed();
|
||||
|
||||
strmake_buf(log_name, ir->name);
|
||||
if ((fd= open_binlog(&rlog, log_name, &errmsg)) <0)
|
||||
{
|
||||
|
@ -447,6 +470,14 @@ do_retry:
|
|||
err= 1;
|
||||
goto err;
|
||||
}
|
||||
DBUG_EXECUTE_IF("inject_mdev8031", {
|
||||
/* Simulate pending KILL caught in read_relay_log_description_event(). */
|
||||
if (thd->check_killed()) {
|
||||
thd->send_kill_message();
|
||||
err= 1;
|
||||
goto err;
|
||||
}
|
||||
});
|
||||
my_b_seek(&rlog, cur_offset);
|
||||
|
||||
do
|
||||
|
@ -469,7 +500,7 @@ do_retry:
|
|||
{
|
||||
errmsg= "slave SQL thread aborted because of I/O error";
|
||||
err= 1;
|
||||
goto err;
|
||||
goto check_retry;
|
||||
}
|
||||
if (rlog.error > 0)
|
||||
{
|
||||
|
@ -498,10 +529,25 @@ do_retry:
|
|||
}
|
||||
strmake_buf(log_name ,linfo.log_file_name);
|
||||
|
||||
DBUG_EXECUTE_IF("inject_retry_event_group_open_binlog_kill", {
|
||||
if (retries < 2)
|
||||
{
|
||||
/* Simulate that we get deadlock killed during open_binlog(). */
|
||||
mysql_reset_thd_for_next_command(thd);
|
||||
rgi->killed_for_retry= true;
|
||||
mysql_mutex_lock(&thd->LOCK_thd_data);
|
||||
thd->killed= KILL_CONNECTION;
|
||||
mysql_mutex_unlock(&thd->LOCK_thd_data);
|
||||
thd->send_kill_message();
|
||||
fd= (File)-1;
|
||||
err= 1;
|
||||
goto check_retry;
|
||||
}
|
||||
});
|
||||
if ((fd= open_binlog(&rlog, log_name, &errmsg)) <0)
|
||||
{
|
||||
err= 1;
|
||||
goto err;
|
||||
goto check_retry;
|
||||
}
|
||||
/* Loop to try again on the new log file. */
|
||||
}
|
||||
|
@ -544,26 +590,31 @@ do_retry:
|
|||
if (retries == 0) err= dbug_simulate_tmp_error(rgi, thd););
|
||||
DBUG_EXECUTE_IF("rpl_parallel_simulate_infinite_temp_err_gtid_0_x_100",
|
||||
err= dbug_simulate_tmp_error(rgi, thd););
|
||||
if (err)
|
||||
if (!err)
|
||||
continue;
|
||||
|
||||
check_retry:
|
||||
convert_kill_to_deadlock_error(rgi);
|
||||
if (has_temporary_error(thd))
|
||||
{
|
||||
convert_kill_to_deadlock_error(rgi);
|
||||
if (has_temporary_error(thd))
|
||||
++retries;
|
||||
if (retries < slave_trans_retries)
|
||||
{
|
||||
++retries;
|
||||
if (retries < slave_trans_retries)
|
||||
if (fd >= 0)
|
||||
{
|
||||
end_io_cache(&rlog);
|
||||
mysql_file_close(fd, MYF(MY_WME));
|
||||
fd= (File)-1;
|
||||
goto do_retry;
|
||||
}
|
||||
sql_print_error("Slave worker thread retried transaction %lu time(s) "
|
||||
"in vain, giving up. Consider raising the value of "
|
||||
"the slave_transaction_retries variable.",
|
||||
slave_trans_retries);
|
||||
goto do_retry;
|
||||
}
|
||||
goto err;
|
||||
sql_print_error("Slave worker thread retried transaction %lu time(s) "
|
||||
"in vain, giving up. Consider raising the value of "
|
||||
"the slave_transaction_retries variable.",
|
||||
slave_trans_retries);
|
||||
}
|
||||
goto err;
|
||||
|
||||
} while (event_count < events_to_execute);
|
||||
|
||||
err:
|
||||
|
|
Loading…
Reference in a new issue