mirror of
https://github.com/MariaDB/server.git
synced 2025-01-29 02:05:57 +01:00
backporting fixes of bug@45940 to 5.1-rpl+2 to cover failures rpl_heartbeat_* as well.
This commit is contained in:
parent
75a4dd3e6a
commit
124ad21d01
11 changed files with 342 additions and 170 deletions
|
@ -1,17 +0,0 @@
|
||||||
stop slave;
|
|
||||||
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
|
|
||||||
reset master;
|
|
||||||
reset slave;
|
|
||||||
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
|
|
||||||
start slave;
|
|
||||||
create table t1 (words varchar(20)) engine=myisam;
|
|
||||||
load data infile '../../std_data/words.dat' into table t1 (words);
|
|
||||||
select count(*) from t1;
|
|
||||||
count(*)
|
|
||||||
70
|
|
||||||
select count(*) from t1;
|
|
||||||
count(*)
|
|
||||||
70
|
|
||||||
drop table t1;
|
|
||||||
include/stop_slave.inc
|
|
||||||
drop table t1;
|
|
61
mysql-test/suite/rpl/r/rpl_stop_middle_group.result
Normal file
61
mysql-test/suite/rpl/r/rpl_stop_middle_group.result
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
stop slave;
|
||||||
|
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
|
||||||
|
reset master;
|
||||||
|
reset slave;
|
||||||
|
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
|
||||||
|
start slave;
|
||||||
|
create table tm (a int auto_increment primary key) engine=myisam;
|
||||||
|
create table ti (a int auto_increment primary key) engine=innodb;
|
||||||
|
set @@global.debug="+d,stop_slave_middle_group";
|
||||||
|
begin;
|
||||||
|
insert into ti set a=null;
|
||||||
|
insert into tm set a=null;
|
||||||
|
commit;
|
||||||
|
SELECT "NO" AS Last_SQL_Error, @check as `true`;
|
||||||
|
Last_SQL_Error true
|
||||||
|
NO 1
|
||||||
|
select count(*) as one from tm;
|
||||||
|
one
|
||||||
|
1
|
||||||
|
select count(*) as one from ti;
|
||||||
|
one
|
||||||
|
1
|
||||||
|
set @@global.debug="-d";
|
||||||
|
include/start_slave.inc
|
||||||
|
truncate table tm;
|
||||||
|
truncate table ti;
|
||||||
|
set @@global.debug="+d,stop_slave_middle_group";
|
||||||
|
set @@global.debug="+d,incomplete_group_in_relay_log";
|
||||||
|
begin;
|
||||||
|
insert into ti set a=null;
|
||||||
|
insert into tm set a=null;
|
||||||
|
commit;
|
||||||
|
SELECT "Fatal error: ... The slave SQL is stopped, leaving the current group of events unfinished with a non-transaction table changed. If the group consists solely of Row-based events, you can try restarting the slave with --slave-exec-mode=IDEMPOTENT, which ignores duplicate key, key not found, and similar errors (see documentation for details)." AS Last_SQL_Error, @check as `true`;
|
||||||
|
Last_SQL_Error true
|
||||||
|
Fatal error: ... The slave SQL is stopped, leaving the current group of events unfinished with a non-transaction table changed. If the group consists solely of Row-based events, you can try restarting the slave with --slave-exec-mode=IDEMPOTENT, which ignores duplicate key, key not found, and similar errors (see documentation for details). 1
|
||||||
|
select count(*) as one from tm;
|
||||||
|
one
|
||||||
|
1
|
||||||
|
select count(*) as zero from ti;
|
||||||
|
zero
|
||||||
|
0
|
||||||
|
set @@global.debug="-d";
|
||||||
|
stop slave;
|
||||||
|
truncate table tm;
|
||||||
|
include/start_slave.inc
|
||||||
|
set @@global.debug="+d,stop_slave_middle_group";
|
||||||
|
set @@global.debug="+d,incomplete_group_in_relay_log";
|
||||||
|
update tm as t1, ti as t2 set t1.a=t1.a * 2, t2.a=t2.a * 2;
|
||||||
|
SELECT "Fatal error: ... The slave SQL is stopped, leaving the current group of events unfinished with a non-transaction table changed. If the group consists solely of Row-based events, you can try restarting the slave with --slave-exec-mode=IDEMPOTENT, which ignores duplicate key, key not found, and similar errors (see documentation for details)." AS Last_SQL_Error, @check as `true`;
|
||||||
|
Last_SQL_Error true
|
||||||
|
Fatal error: ... The slave SQL is stopped, leaving the current group of events unfinished with a non-transaction table changed. If the group consists solely of Row-based events, you can try restarting the slave with --slave-exec-mode=IDEMPOTENT, which ignores duplicate key, key not found, and similar errors (see documentation for details). 1
|
||||||
|
select max(a) as two from tm;
|
||||||
|
two
|
||||||
|
2
|
||||||
|
select max(a) as one from ti;
|
||||||
|
one
|
||||||
|
1
|
||||||
|
set @@global.debug="-d";
|
||||||
|
drop table tm, ti;
|
||||||
|
include/stop_slave.inc
|
||||||
|
drop table tm, ti;
|
|
@ -1 +0,0 @@
|
||||||
--loose-binlog-row-event-max-size=256
|
|
|
@ -1 +0,0 @@
|
||||||
--loose-debug=d,STOP_SLAVE_after_first_Rows_event
|
|
|
@ -1,32 +0,0 @@
|
||||||
-- source include/have_binlog_format_row.inc
|
|
||||||
-- source include/have_debug.inc
|
|
||||||
-- source include/master-slave.inc
|
|
||||||
|
|
||||||
# master is asked to create small Rows events: if only one event is
|
|
||||||
# created, stopping slave at the end of that one will show no bug, we
|
|
||||||
# need at least two (and stop after first); in this test we use three.
|
|
||||||
|
|
||||||
connection master;
|
|
||||||
create table t1 (words varchar(20)) engine=myisam;
|
|
||||||
|
|
||||||
load data infile '../../std_data/words.dat' into table t1 (words);
|
|
||||||
select count(*) from t1;
|
|
||||||
save_master_pos;
|
|
||||||
|
|
||||||
connection slave;
|
|
||||||
|
|
||||||
# slave will automatically stop the sql thread thanks to the .opt
|
|
||||||
# file; it will initiate the stop request after the first
|
|
||||||
# Rows_log_event (out of 3) but should wait until the last one is
|
|
||||||
# executed before stopping.
|
|
||||||
|
|
||||||
source include/wait_for_slave_sql_to_stop.inc;
|
|
||||||
|
|
||||||
# check that we inserted all rows (waited until the last Rows event)
|
|
||||||
select count(*) from t1;
|
|
||||||
|
|
||||||
connection master;
|
|
||||||
drop table t1;
|
|
||||||
connection slave; # slave SQL thread is stopped
|
|
||||||
source include/stop_slave.inc;
|
|
||||||
drop table t1;
|
|
149
mysql-test/suite/rpl/t/rpl_stop_middle_group.test
Normal file
149
mysql-test/suite/rpl/t/rpl_stop_middle_group.test
Normal file
|
@ -0,0 +1,149 @@
|
||||||
|
-- source include/have_debug.inc
|
||||||
|
-- source include/master-slave.inc
|
||||||
|
-- source include/have_innodb.inc
|
||||||
|
|
||||||
|
# Proving that stopping in the middle of applying a group of events
|
||||||
|
# does not have immediate effect if a non-transaction table has been changed.
|
||||||
|
# The slave sql thread has to try to finish applying first.
|
||||||
|
# The tests rely on simulation of the killed status.
|
||||||
|
# The matter of testing correlates to some of `rpl_start_stop_slave' that does
|
||||||
|
# not require `have_debug'.
|
||||||
|
|
||||||
|
connection master;
|
||||||
|
|
||||||
|
create table tm (a int auto_increment primary key) engine=myisam;
|
||||||
|
create table ti (a int auto_increment primary key) engine=innodb;
|
||||||
|
|
||||||
|
sync_slave_with_master;
|
||||||
|
set @@global.debug="+d,stop_slave_middle_group";
|
||||||
|
|
||||||
|
connection master;
|
||||||
|
|
||||||
|
begin;
|
||||||
|
insert into ti set a=null;
|
||||||
|
insert into tm set a=null; # to simulate killed status on the slave
|
||||||
|
commit;
|
||||||
|
|
||||||
|
connection slave;
|
||||||
|
|
||||||
|
# slave will catch the killed status but won't shut down immediately
|
||||||
|
# only after the whole group has done (commit)
|
||||||
|
|
||||||
|
source include/wait_for_slave_sql_to_stop.inc;
|
||||||
|
|
||||||
|
# checking: no error and the group is finished
|
||||||
|
|
||||||
|
let $error= query_get_value("SHOW SLAVE STATUS", Last_SQL_Error, 1);
|
||||||
|
let $read = query_get_value("SHOW SLAVE STATUS", Read_Master_Log_Pos, 1);
|
||||||
|
let $exec = query_get_value("SHOW SLAVE STATUS", Exec_Master_Log_Pos, 1);
|
||||||
|
--disable_query_log
|
||||||
|
eval SELECT $read = $exec into @check;
|
||||||
|
--enable_query_log
|
||||||
|
eval SELECT "NO$error" AS Last_SQL_Error, @check as `true`;
|
||||||
|
select count(*) as one from tm;
|
||||||
|
select count(*) as one from ti;
|
||||||
|
|
||||||
|
set @@global.debug="-d";
|
||||||
|
|
||||||
|
#
|
||||||
|
# bug#45940 issues around rli->last_event_start_time
|
||||||
|
# Testing of slave stopped after it had waited (in vain) for
|
||||||
|
# the group be finished.
|
||||||
|
# It could not be finished because of simulation of failure to
|
||||||
|
# receive the terminal part
|
||||||
|
# The test relay on simulation of the incomplete group in the relay log
|
||||||
|
|
||||||
|
# Two cases are verified: a mixed transacton and a mixed multi-table update.
|
||||||
|
#
|
||||||
|
# The mixed transacton.
|
||||||
|
#
|
||||||
|
source include/start_slave.inc;
|
||||||
|
|
||||||
|
connection master;
|
||||||
|
|
||||||
|
truncate table tm; # cleanup of former tests
|
||||||
|
truncate table ti;
|
||||||
|
|
||||||
|
#connection slave;
|
||||||
|
sync_slave_with_master;
|
||||||
|
|
||||||
|
set @@global.debug="+d,stop_slave_middle_group";
|
||||||
|
set @@global.debug="+d,incomplete_group_in_relay_log";
|
||||||
|
|
||||||
|
connection master;
|
||||||
|
|
||||||
|
begin;
|
||||||
|
insert into ti set a=null;
|
||||||
|
insert into tm set a=null;
|
||||||
|
commit;
|
||||||
|
|
||||||
|
connection slave;
|
||||||
|
|
||||||
|
# slave will catch the killed status, won't shut down immediately
|
||||||
|
# but does it eventually having the whole group unfinished (not committed)
|
||||||
|
|
||||||
|
source include/wait_for_slave_sql_to_stop.inc;
|
||||||
|
|
||||||
|
# checking: the error and group unfinished
|
||||||
|
|
||||||
|
let $error= query_get_value("SHOW SLAVE STATUS", Last_SQL_Error, 1);
|
||||||
|
let $read = query_get_value("SHOW SLAVE STATUS", Read_Master_Log_Pos, 1);
|
||||||
|
let $exec = query_get_value("SHOW SLAVE STATUS", Exec_Master_Log_Pos, 1);
|
||||||
|
--disable_query_log
|
||||||
|
eval SELECT $read - $exec > 0 into @check;
|
||||||
|
--enable_query_log
|
||||||
|
eval SELECT "$error" AS Last_SQL_Error, @check as `true`;
|
||||||
|
select count(*) as one from tm;
|
||||||
|
select count(*) as zero from ti;
|
||||||
|
|
||||||
|
set @@global.debug="-d";
|
||||||
|
|
||||||
|
#
|
||||||
|
# The mixed multi-table update
|
||||||
|
#
|
||||||
|
stop slave;
|
||||||
|
truncate table tm;
|
||||||
|
source include/start_slave.inc;
|
||||||
|
|
||||||
|
connection master;
|
||||||
|
|
||||||
|
#connection slave;
|
||||||
|
sync_slave_with_master;
|
||||||
|
set @@global.debug="+d,stop_slave_middle_group";
|
||||||
|
set @@global.debug="+d,incomplete_group_in_relay_log";
|
||||||
|
|
||||||
|
connection master;
|
||||||
|
update tm as t1, ti as t2 set t1.a=t1.a * 2, t2.a=t2.a * 2;
|
||||||
|
|
||||||
|
connection slave;
|
||||||
|
|
||||||
|
# slave will catch the killed status, won't shut down immediately
|
||||||
|
# but does it eventually having the whole group unfinished (not committed)
|
||||||
|
#
|
||||||
|
|
||||||
|
source include/wait_for_slave_sql_to_stop.inc;
|
||||||
|
|
||||||
|
# checking: the error and group unfinished
|
||||||
|
|
||||||
|
let $error= query_get_value("SHOW SLAVE STATUS", Last_SQL_Error, 1);
|
||||||
|
let $read = query_get_value("SHOW SLAVE STATUS", Read_Master_Log_Pos, 1);
|
||||||
|
let $exec = query_get_value("SHOW SLAVE STATUS", Exec_Master_Log_Pos, 1);
|
||||||
|
--disable_query_log
|
||||||
|
eval SELECT $read - $exec > 0 into @check;
|
||||||
|
--enable_query_log
|
||||||
|
eval SELECT "$error" AS Last_SQL_Error, @check as `true`;
|
||||||
|
select max(a) as two from tm;
|
||||||
|
select max(a) as one from ti;
|
||||||
|
|
||||||
|
set @@global.debug="-d";
|
||||||
|
|
||||||
|
#
|
||||||
|
# clean-up
|
||||||
|
#
|
||||||
|
|
||||||
|
connection master;
|
||||||
|
drop table tm, ti;
|
||||||
|
|
||||||
|
connection slave; # slave SQL thread is stopped
|
||||||
|
source include/stop_slave.inc;
|
||||||
|
drop table tm, ti;
|
|
@ -3272,6 +3272,21 @@ Default database: '%s'. Query: '%s'",
|
||||||
*/
|
*/
|
||||||
} /* End of if (db_ok(... */
|
} /* End of if (db_ok(... */
|
||||||
|
|
||||||
|
{/**
|
||||||
|
The following failure injecion works in cooperation with tests
|
||||||
|
setting @@global.debug= 'd,stop_slave_middle_group'.
|
||||||
|
The sql thread receives the killed status and will proceed
|
||||||
|
to shutdown trying to finish incomplete events group.
|
||||||
|
*/
|
||||||
|
DBUG_EXECUTE_IF("stop_slave_middle_group",
|
||||||
|
if (strcmp("COMMIT", query) != 0 &&
|
||||||
|
strcmp("BEGIN", query) != 0)
|
||||||
|
{
|
||||||
|
if (thd->transaction.all.modified_non_trans_table)
|
||||||
|
const_cast<Relay_log_info*>(rli)->abort_slave= 1;
|
||||||
|
};);
|
||||||
|
}
|
||||||
|
|
||||||
end:
|
end:
|
||||||
/*
|
/*
|
||||||
Probably we have set thd->query, thd->db, thd->catalog to point to places
|
Probably we have set thd->query, thd->db, thd->catalog to point to places
|
||||||
|
@ -7475,8 +7490,16 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli)
|
||||||
thd->transaction.stmt.modified_non_trans_table= TRUE;
|
thd->transaction.stmt.modified_non_trans_table= TRUE;
|
||||||
} // row processing loop
|
} // row processing loop
|
||||||
|
|
||||||
DBUG_EXECUTE_IF("STOP_SLAVE_after_first_Rows_event",
|
{/**
|
||||||
const_cast<Relay_log_info*>(rli)->abort_slave= 1;);
|
The following failure injecion works in cooperation with tests
|
||||||
|
setting @@global.debug= 'd,stop_slave_middle_group'.
|
||||||
|
The sql thread receives the killed status and will proceed
|
||||||
|
to shutdown trying to finish incomplete events group.
|
||||||
|
*/
|
||||||
|
DBUG_EXECUTE_IF("stop_slave_middle_group",
|
||||||
|
if (thd->transaction.all.modified_non_trans_table)
|
||||||
|
const_cast<Relay_log_info*>(rli)->abort_slave= 1;);
|
||||||
|
}
|
||||||
|
|
||||||
if ((error= do_after_row_operations(rli, error)) &&
|
if ((error= do_after_row_operations(rli, error)) &&
|
||||||
ignored_error_code(convert_handler_error(error, thd, table)))
|
ignored_error_code(convert_handler_error(error, thd, table)))
|
||||||
|
@ -7513,32 +7536,6 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli)
|
||||||
const_cast<Relay_log_info*>(rli)->cleanup_context(thd, error);
|
const_cast<Relay_log_info*>(rli)->cleanup_context(thd, error);
|
||||||
thd->is_slave_error= 1;
|
thd->is_slave_error= 1;
|
||||||
}
|
}
|
||||||
/*
|
|
||||||
This code would ideally be placed in do_update_pos() instead, but
|
|
||||||
since we have no access to table there, we do the setting of
|
|
||||||
last_event_start_time here instead.
|
|
||||||
*/
|
|
||||||
else if (table && (table->s->primary_key == MAX_KEY) &&
|
|
||||||
!cache_stmt && get_flags(STMT_END_F) == RLE_NO_FLAGS)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
------------ Temporary fix until WL#2975 is implemented ---------
|
|
||||||
|
|
||||||
This event is not the last one (no STMT_END_F). If we stop now
|
|
||||||
(in case of terminate_slave_thread()), how will we restart? We
|
|
||||||
have to restart from Table_map_log_event, but as this table is
|
|
||||||
not transactional, the rows already inserted will still be
|
|
||||||
present, and idempotency is not guaranteed (no PK) so we risk
|
|
||||||
that repeating leads to double insert. So we desperately try to
|
|
||||||
continue, hope we'll eventually leave this buggy situation (by
|
|
||||||
executing the final Rows_log_event). If we are in a hopeless
|
|
||||||
wait (reached end of last relay log and nothing gets appended
|
|
||||||
there), we timeout after one minute, and notify DBA about the
|
|
||||||
problem. When WL#2975 is implemented, just remove the member
|
|
||||||
Relay_log_info::last_event_start_time and all its occurrences.
|
|
||||||
*/
|
|
||||||
const_cast<Relay_log_info*>(rli)->last_event_start_time= my_time(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
DBUG_RETURN(error);
|
DBUG_RETURN(error);
|
||||||
}
|
}
|
||||||
|
|
|
@ -226,7 +226,7 @@ Old_rows_log_event::do_apply_event(Old_rows_log_event *ev, const Relay_log_info
|
||||||
|
|
||||||
row_start= row_end;
|
row_start= row_end;
|
||||||
}
|
}
|
||||||
DBUG_EXECUTE_IF("STOP_SLAVE_after_first_Rows_event",
|
DBUG_EXECUTE_IF("stop_slave_middle_group",
|
||||||
const_cast<Relay_log_info*>(rli)->abort_slave= 1;);
|
const_cast<Relay_log_info*>(rli)->abort_slave= 1;);
|
||||||
error= do_after_row_operations(table, error);
|
error= do_after_row_operations(table, error);
|
||||||
if (!ev->cache_stmt)
|
if (!ev->cache_stmt)
|
||||||
|
@ -269,34 +269,6 @@ Old_rows_log_event::do_apply_event(Old_rows_log_event *ev, const Relay_log_info
|
||||||
DBUG_RETURN(error);
|
DBUG_RETURN(error);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
This code would ideally be placed in do_update_pos() instead, but
|
|
||||||
since we have no access to table there, we do the setting of
|
|
||||||
last_event_start_time here instead.
|
|
||||||
*/
|
|
||||||
if (table && (table->s->primary_key == MAX_KEY) &&
|
|
||||||
!ev->cache_stmt &&
|
|
||||||
ev->get_flags(Old_rows_log_event::STMT_END_F) == Old_rows_log_event::RLE_NO_FLAGS)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
------------ Temporary fix until WL#2975 is implemented ---------
|
|
||||||
|
|
||||||
This event is not the last one (no STMT_END_F). If we stop now
|
|
||||||
(in case of terminate_slave_thread()), how will we restart? We
|
|
||||||
have to restart from Table_map_log_event, but as this table is
|
|
||||||
not transactional, the rows already inserted will still be
|
|
||||||
present, and idempotency is not guaranteed (no PK) so we risk
|
|
||||||
that repeating leads to double insert. So we desperately try to
|
|
||||||
continue, hope we'll eventually leave this buggy situation (by
|
|
||||||
executing the final Old_rows_log_event). If we are in a hopeless
|
|
||||||
wait (reached end of last relay log and nothing gets appended
|
|
||||||
there), we timeout after one minute, and notify DBA about the
|
|
||||||
problem. When WL#2975 is implemented, just remove the member
|
|
||||||
st_relay_log_info::last_event_start_time and all its occurences.
|
|
||||||
*/
|
|
||||||
const_cast<Relay_log_info*>(rli)->last_event_start_time= my_time(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
DBUG_RETURN(0);
|
DBUG_RETURN(0);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -1744,7 +1716,7 @@ int Old_rows_log_event::do_apply_event(Relay_log_info const *rli)
|
||||||
|
|
||||||
} // row processing loop
|
} // row processing loop
|
||||||
|
|
||||||
DBUG_EXECUTE_IF("STOP_SLAVE_after_first_Rows_event",
|
DBUG_EXECUTE_IF("stop_slave_middle_group",
|
||||||
const_cast<Relay_log_info*>(rli)->abort_slave= 1;);
|
const_cast<Relay_log_info*>(rli)->abort_slave= 1;);
|
||||||
error= do_after_row_operations(rli, error);
|
error= do_after_row_operations(rli, error);
|
||||||
if (!cache_stmt)
|
if (!cache_stmt)
|
||||||
|
@ -1787,33 +1759,6 @@ int Old_rows_log_event::do_apply_event(Relay_log_info const *rli)
|
||||||
DBUG_RETURN(error);
|
DBUG_RETURN(error);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
This code would ideally be placed in do_update_pos() instead, but
|
|
||||||
since we have no access to table there, we do the setting of
|
|
||||||
last_event_start_time here instead.
|
|
||||||
*/
|
|
||||||
if (table && (table->s->primary_key == MAX_KEY) &&
|
|
||||||
!cache_stmt && get_flags(STMT_END_F) == RLE_NO_FLAGS)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
------------ Temporary fix until WL#2975 is implemented ---------
|
|
||||||
|
|
||||||
This event is not the last one (no STMT_END_F). If we stop now
|
|
||||||
(in case of terminate_slave_thread()), how will we restart? We
|
|
||||||
have to restart from Table_map_log_event, but as this table is
|
|
||||||
not transactional, the rows already inserted will still be
|
|
||||||
present, and idempotency is not guaranteed (no PK) so we risk
|
|
||||||
that repeating leads to double insert. So we desperately try to
|
|
||||||
continue, hope we'll eventually leave this buggy situation (by
|
|
||||||
executing the final Old_rows_log_event). If we are in a hopeless
|
|
||||||
wait (reached end of last relay log and nothing gets appended
|
|
||||||
there), we timeout after one minute, and notify DBA about the
|
|
||||||
problem. When WL#2975 is implemented, just remove the member
|
|
||||||
Relay_log_info::last_event_start_time and all its occurrences.
|
|
||||||
*/
|
|
||||||
const_cast<Relay_log_info*>(rli)->last_event_start_time= my_time(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
DBUG_RETURN(0);
|
DBUG_RETURN(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1195,7 +1195,6 @@ void Relay_log_info::cleanup_context(THD *thd, bool error)
|
||||||
*/
|
*/
|
||||||
thd->options&= ~OPTION_NO_FOREIGN_KEY_CHECKS;
|
thd->options&= ~OPTION_NO_FOREIGN_KEY_CHECKS;
|
||||||
thd->options&= ~OPTION_RELAXED_UNIQUE_CHECKS;
|
thd->options&= ~OPTION_RELAXED_UNIQUE_CHECKS;
|
||||||
last_event_start_time= 0;
|
|
||||||
DBUG_VOID_RETURN;
|
DBUG_VOID_RETURN;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -349,12 +349,10 @@ public:
|
||||||
void clear_tables_to_lock();
|
void clear_tables_to_lock();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Used by row-based replication to detect that it should not stop at
|
Used to defer stopping the SQL thread to give it a chance
|
||||||
this event, but give it a chance to send more events. The time
|
to finish up the current group of events.
|
||||||
where the last event inside a group started is stored here. If the
|
The timestamp is set and reset in @c sql_slave_killed().
|
||||||
variable is zero, we are not in a group (but may be in a
|
*/
|
||||||
transaction).
|
|
||||||
*/
|
|
||||||
time_t last_event_start_time;
|
time_t last_event_start_time;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
128
sql/slave.cc
128
sql/slave.cc
|
@ -49,6 +49,10 @@
|
||||||
#define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
|
#define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
|
||||||
|
|
||||||
#define MAX_SLAVE_RETRY_PAUSE 5
|
#define MAX_SLAVE_RETRY_PAUSE 5
|
||||||
|
/*
|
||||||
|
a parameter of sql_slave_killed() to defer the killed status
|
||||||
|
*/
|
||||||
|
#define SLAVE_WAIT_GROUP_DONE 60
|
||||||
bool use_slave_mask = 0;
|
bool use_slave_mask = 0;
|
||||||
MY_BITMAP slave_error_mask;
|
MY_BITMAP slave_error_mask;
|
||||||
char slave_skip_error_names[SHOW_VAR_FUNC_BUFF_SIZE];
|
char slave_skip_error_names[SHOW_VAR_FUNC_BUFF_SIZE];
|
||||||
|
@ -790,46 +794,95 @@ static bool io_slave_killed(THD* thd, Master_info* mi)
|
||||||
DBUG_RETURN(mi->abort_slave || abort_loop || thd->killed);
|
DBUG_RETURN(mi->abort_slave || abort_loop || thd->killed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
The function analyzes a possible killed status and makes
|
||||||
|
a decision whether to accept it or not.
|
||||||
|
Normally upon accepting the sql thread goes to shutdown.
|
||||||
|
In the event of deffering decision @rli->last_event_start_time waiting
|
||||||
|
timer is set to force the killed status be accepted upon its expiration.
|
||||||
|
|
||||||
|
@param thd pointer to a THD instance
|
||||||
|
@param rli pointer to Relay_log_info instance
|
||||||
|
|
||||||
|
@return TRUE the killed status is recognized, FALSE a possible killed
|
||||||
|
status is deferred.
|
||||||
|
*/
|
||||||
static bool sql_slave_killed(THD* thd, Relay_log_info* rli)
|
static bool sql_slave_killed(THD* thd, Relay_log_info* rli)
|
||||||
{
|
{
|
||||||
|
bool ret= FALSE;
|
||||||
DBUG_ENTER("sql_slave_killed");
|
DBUG_ENTER("sql_slave_killed");
|
||||||
|
|
||||||
DBUG_ASSERT(rli->sql_thd == thd);
|
DBUG_ASSERT(rli->sql_thd == thd);
|
||||||
DBUG_ASSERT(rli->slave_running == 1);// tracking buffer overrun
|
DBUG_ASSERT(rli->slave_running == 1);// tracking buffer overrun
|
||||||
if (abort_loop || thd->killed || rli->abort_slave)
|
if (abort_loop || thd->killed || rli->abort_slave)
|
||||||
{
|
{
|
||||||
if (rli->abort_slave && rli->is_in_group() &&
|
if (thd->transaction.all.modified_non_trans_table && rli->is_in_group())
|
||||||
thd->transaction.all.modified_non_trans_table)
|
|
||||||
DBUG_RETURN(0);
|
|
||||||
/*
|
|
||||||
If we are in an unsafe situation (stopping could corrupt replication),
|
|
||||||
we give one minute to the slave SQL thread of grace before really
|
|
||||||
terminating, in the hope that it will be able to read more events and
|
|
||||||
the unsafe situation will soon be left. Note that this one minute starts
|
|
||||||
from the last time anything happened in the slave SQL thread. So it's
|
|
||||||
really one minute of idleness, we don't timeout if the slave SQL thread
|
|
||||||
is actively working.
|
|
||||||
*/
|
|
||||||
if (rli->last_event_start_time == 0)
|
|
||||||
DBUG_RETURN(1);
|
|
||||||
DBUG_PRINT("info", ("Slave SQL thread is in an unsafe situation, giving "
|
|
||||||
"it some grace period"));
|
|
||||||
if (difftime(time(0), rli->last_event_start_time) > 60)
|
|
||||||
{
|
{
|
||||||
rli->report(ERROR_LEVEL, 0,
|
char msg_stopped[]=
|
||||||
"SQL thread had to stop in an unsafe situation, in "
|
"... The slave SQL is stopped, leaving the current group "
|
||||||
"the middle of applying updates to a "
|
"of events unfinished with a non-transaction table changed. "
|
||||||
"non-transactional table without any primary key. "
|
"If the group consists solely of Row-based events, you can try "
|
||||||
"There is a risk of duplicate updates when the slave "
|
"restarting the slave with --slave-exec-mode=IDEMPOTENT, which "
|
||||||
"SQL thread is restarted. Please check your tables' "
|
"ignores duplicate key, key not found, and similar errors (see "
|
||||||
"contents after restart.");
|
"documentation for details).";
|
||||||
DBUG_RETURN(1);
|
|
||||||
|
if (rli->abort_slave)
|
||||||
|
{
|
||||||
|
DBUG_PRINT("info", ("Slave SQL thread is being stopped in the middle of"
|
||||||
|
" a group having updated a non-trans table, giving"
|
||||||
|
" it some grace period"));
|
||||||
|
|
||||||
|
/*
|
||||||
|
Slave sql thread shutdown in face of unfinished group modified
|
||||||
|
Non-trans table is handled via a timer. The slave may eventually
|
||||||
|
give out to complete the current group and in that case there
|
||||||
|
might be issues at consequent slave restart, see the error message.
|
||||||
|
WL#2975 offers a robust solution requiring to store the last exectuted
|
||||||
|
event's coordinates along with the group's coordianates
|
||||||
|
instead of waiting with @c last_event_start_time the timer.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (rli->last_event_start_time == 0)
|
||||||
|
rli->last_event_start_time= my_time(0);
|
||||||
|
ret= difftime(my_time(0), rli->last_event_start_time) <=
|
||||||
|
SLAVE_WAIT_GROUP_DONE ? FALSE : TRUE;
|
||||||
|
|
||||||
|
DBUG_EXECUTE_IF("stop_slave_middle_group",
|
||||||
|
DBUG_EXECUTE_IF("incomplete_group_in_relay_log",
|
||||||
|
ret= TRUE;);); // time is over
|
||||||
|
|
||||||
|
if (ret == 0)
|
||||||
|
{
|
||||||
|
rli->report(WARNING_LEVEL, 0,
|
||||||
|
"slave SQL thread is being stopped in the middle "
|
||||||
|
"of applying of a group having updated a non-transaction "
|
||||||
|
"table; waiting for the group completion ... ");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
|
||||||
|
ER(ER_SLAVE_FATAL_ERROR), msg_stopped);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ret= TRUE;
|
||||||
|
rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, ER(ER_SLAVE_FATAL_ERROR),
|
||||||
|
msg_stopped);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ret= TRUE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
DBUG_RETURN(0);
|
if (ret)
|
||||||
|
rli->last_event_start_time= 0;
|
||||||
|
|
||||||
|
DBUG_RETURN(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
skip_load_data_infile()
|
skip_load_data_infile()
|
||||||
|
|
||||||
|
@ -2525,6 +2578,27 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli)
|
||||||
delete ev;
|
delete ev;
|
||||||
DBUG_RETURN(1);
|
DBUG_RETURN(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{ /**
|
||||||
|
The following failure injecion works in cooperation with tests
|
||||||
|
setting @@global.debug= 'd,incomplete_group_in_relay_log'.
|
||||||
|
Xid or Commit events are not executed to force the slave sql
|
||||||
|
read hanging if the realy log does not have any more events.
|
||||||
|
*/
|
||||||
|
DBUG_EXECUTE_IF("incomplete_group_in_relay_log",
|
||||||
|
if ((ev->get_type_code() == XID_EVENT) ||
|
||||||
|
((ev->get_type_code() == QUERY_EVENT) &&
|
||||||
|
strcmp("COMMIT", ((Query_log_event *) ev)->query) == 0))
|
||||||
|
{
|
||||||
|
DBUG_ASSERT(thd->transaction.all.modified_non_trans_table);
|
||||||
|
rli->abort_slave= 1;
|
||||||
|
pthread_mutex_unlock(&rli->data_lock);
|
||||||
|
delete ev;
|
||||||
|
rli->inc_event_relay_log_pos();
|
||||||
|
DBUG_RETURN(0);
|
||||||
|
};);
|
||||||
|
}
|
||||||
|
|
||||||
exec_res= apply_event_and_update_pos(ev, thd, rli, TRUE);
|
exec_res= apply_event_and_update_pos(ev, thd, rli, TRUE);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -3953,7 +4027,7 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
|
||||||
goto skip_relay_logging;
|
goto skip_relay_logging;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
inc_pos= event_len;
|
inc_pos= event_len;
|
||||||
break;
|
break;
|
||||||
|
|
Loading…
Add table
Reference in a new issue