2005-03-02 11:29:48 +01:00
|
|
|
# See if slave restarts the transaction after failing on an InnoDB deadlock error.
|
|
|
|
|
|
|
|
# Note: testing what happens when too many retries is possible, but
|
|
|
|
# needs large waits when running with --debug, so we don't do it.
|
|
|
|
# The same way, this test may not test what is expected when run
|
|
|
|
# under Valgrind, timings are too short then (with --valgrind I
|
|
|
|
# (Guilhem) have seen the test manage to provoke lock wait timeout
|
|
|
|
# error but not deadlock error; that is ok as code deals with the two
|
|
|
|
# errors in exactly the same way.
|
"After Monty's review" changes to the fix for BUG#8325 "Deadlock in replication thread stops replication":
s/sleep/safe_sleep (thread safe); sleep 0/1/2/3/4/5/5/5 (get slave less late);
no message on error log (deadlock is too common sometimes), a global counter
instead (SHOW STATUS LIKE 'slave_retried_transactions').
Plus a fix for libmysql/Makefile.shared
libmysql/Makefile.shared:
When we "make clean" in libmysql/ we remove the symlinks there, so we
need to mark that they have to be recreated later: this is done by removing
../linked_libmysql_sources. If we don't do this, 'make' will fail after 'cd libmysql;make clean'.
This Makefile.shared is used by libmysql_r too.
No reason to remove linked_client_sources as we don't remove the links in client/.
mysql-test/r/rpl_deadlock.result:
result fix
mysql-test/t/rpl_deadlock.test:
small test addition
sql/mysqld.cc:
if active_mi could not be alloced, die. New SHOW STATUS LIKE "slave_retried_transactions".
sql/slave.cc:
If slave retries automatically a transaction, no message on error log
(too common situation); sleep 0 secs at first retry, then 1, 2, 3, 4,
5, 5, 5... Sleeping 0 is to get the least possible late, as deadlocks
are usually resolved at first try. New global counter rli->retried_trans
(for SHOW STATUS: total number of times the slave had to retry
any transaction). safe_sleep() is thread-safe, sleep() was not.
I change the rli->trans_retries counter to go from 0 to max instead
of the other way (better for new sleep()).
sql/slave.h:
new global counter rli->retried_trans
sql/sql_show.cc:
SHOW STATUS LIKE "slave_retried_transactions"; needs replication mutexes.
Can't be a simple SHOW_LONG, because active_mi is unset (not alloced yet)
when the static global status_vars is created (active_mi is set
in init_slave()).
sql/structs.h:
new SHOW_SLAVE_RETRIED_TRANS
BitKeeper/etc/logging_ok:
Logging to logging@openlogging.org accepted
2005-03-23 19:19:36 +01:00
|
|
|
# We don't 'show status like 'slave_retried_transactions'' because this
|
|
|
|
# is not repeatable (depends on sleeps).
|
2005-03-02 11:29:48 +01:00
|
|
|
|
|
|
|
source include/have_innodb.inc;
|
|
|
|
source include/master-slave.inc;
|
|
|
|
|
|
|
|
connection master;
|
|
|
|
create table t1 (a int not null, key(a)) engine=innodb;
|
|
|
|
create table t2 (a int not null, key(a)) engine=innodb;
|
|
|
|
create table t3 (a int) engine=innodb;
|
|
|
|
create table t4 (a int) engine=innodb;
|
"After Monty's review" changes to the fix for BUG#8325 "Deadlock in replication thread stops replication":
s/sleep/safe_sleep (thread safe); sleep 0/1/2/3/4/5/5/5 (get slave less late);
no message on error log (deadlock is too common sometimes), a global counter
instead (SHOW STATUS LIKE 'slave_retried_transactions').
Plus a fix for libmysql/Makefile.shared
libmysql/Makefile.shared:
When we "make clean" in libmysql/ we remove the symlinks there, so we
need to mark that they have to be recreated later: this is done by removing
../linked_libmysql_sources. If we don't do this, 'make' will fail after 'cd libmysql;make clean'.
This Makefile.shared is used by libmysql_r too.
No reason to remove linked_client_sources as we don't remove the links in client/.
mysql-test/r/rpl_deadlock.result:
result fix
mysql-test/t/rpl_deadlock.test:
small test addition
sql/mysqld.cc:
if active_mi could not be alloced, die. New SHOW STATUS LIKE "slave_retried_transactions".
sql/slave.cc:
If slave retries automatically a transaction, no message on error log
(too common situation); sleep 0 secs at first retry, then 1, 2, 3, 4,
5, 5, 5... Sleeping 0 is to get the least possible late, as deadlocks
are usually resolved at first try. New global counter rli->retried_trans
(for SHOW STATUS: total number of times the slave had to retry
any transaction). safe_sleep() is thread-safe, sleep() was not.
I change the rli->trans_retries counter to go from 0 to max instead
of the other way (better for new sleep()).
sql/slave.h:
new global counter rli->retried_trans
sql/sql_show.cc:
SHOW STATUS LIKE "slave_retried_transactions"; needs replication mutexes.
Can't be a simple SHOW_LONG, because active_mi is unset (not alloced yet)
when the static global status_vars is created (active_mi is set
in init_slave()).
sql/structs.h:
new SHOW_SLAVE_RETRIED_TRANS
BitKeeper/etc/logging_ok:
Logging to logging@openlogging.org accepted
2005-03-23 19:19:36 +01:00
|
|
|
show variables like 'slave_transaction_retries';
|
2005-03-02 11:29:48 +01:00
|
|
|
sync_slave_with_master;
|
|
|
|
|
|
|
|
show create table t1;
|
|
|
|
show create table t2;
|
"After Monty's review" changes to the fix for BUG#8325 "Deadlock in replication thread stops replication":
s/sleep/safe_sleep (thread safe); sleep 0/1/2/3/4/5/5/5 (get slave less late);
no message on error log (deadlock is too common sometimes), a global counter
instead (SHOW STATUS LIKE 'slave_retried_transactions').
Plus a fix for libmysql/Makefile.shared
libmysql/Makefile.shared:
When we "make clean" in libmysql/ we remove the symlinks there, so we
need to mark that they have to be recreated later: this is done by removing
../linked_libmysql_sources. If we don't do this, 'make' will fail after 'cd libmysql;make clean'.
This Makefile.shared is used by libmysql_r too.
No reason to remove linked_client_sources as we don't remove the links in client/.
mysql-test/r/rpl_deadlock.result:
result fix
mysql-test/t/rpl_deadlock.test:
small test addition
sql/mysqld.cc:
if active_mi could not be alloced, die. New SHOW STATUS LIKE "slave_retried_transactions".
sql/slave.cc:
If slave retries automatically a transaction, no message on error log
(too common situation); sleep 0 secs at first retry, then 1, 2, 3, 4,
5, 5, 5... Sleeping 0 is to get the least possible late, as deadlocks
are usually resolved at first try. New global counter rli->retried_trans
(for SHOW STATUS: total number of times the slave had to retry
any transaction). safe_sleep() is thread-safe, sleep() was not.
I change the rli->trans_retries counter to go from 0 to max instead
of the other way (better for new sleep()).
sql/slave.h:
new global counter rli->retried_trans
sql/sql_show.cc:
SHOW STATUS LIKE "slave_retried_transactions"; needs replication mutexes.
Can't be a simple SHOW_LONG, because active_mi is unset (not alloced yet)
when the static global status_vars is created (active_mi is set
in init_slave()).
sql/structs.h:
new SHOW_SLAVE_RETRIED_TRANS
BitKeeper/etc/logging_ok:
Logging to logging@openlogging.org accepted
2005-03-23 19:19:36 +01:00
|
|
|
show variables like 'slave_transaction_retries';
|
2005-03-02 11:29:48 +01:00
|
|
|
stop slave;
|
|
|
|
|
|
|
|
# 1) Test deadlock
|
|
|
|
|
|
|
|
connection master;
|
|
|
|
begin;
|
|
|
|
# Let's keep BEGIN and the locked statement in two different relay logs.
|
|
|
|
let $1=200;
|
|
|
|
disable_query_log;
|
|
|
|
while ($1)
|
|
|
|
{
|
|
|
|
eval insert into t3 values( $1 );
|
|
|
|
dec $1;
|
|
|
|
}
|
|
|
|
enable_query_log;
|
|
|
|
insert into t3 select * from t2 for update;
|
|
|
|
insert into t1 values(1);
|
|
|
|
commit;
|
|
|
|
save_master_pos;
|
|
|
|
|
|
|
|
connection slave;
|
|
|
|
begin;
|
|
|
|
# Let's make our transaction large so that it's slave who is chosen as
|
|
|
|
# victim
|
|
|
|
let $1=1000;
|
|
|
|
disable_query_log;
|
|
|
|
while ($1)
|
|
|
|
{
|
|
|
|
eval insert into t4 values( $1 );
|
|
|
|
dec $1;
|
|
|
|
}
|
|
|
|
enable_query_log;
|
|
|
|
select * from t1 for update;
|
|
|
|
start slave;
|
|
|
|
--sleep 3; # hope that slave is blocked now
|
|
|
|
insert into t2 values(22); # provoke deadlock, slave should be victim
|
|
|
|
commit;
|
|
|
|
sync_with_master;
|
|
|
|
select * from t1; # check that slave succeeded finally
|
|
|
|
select * from t2;
|
|
|
|
# check that no error is reported
|
|
|
|
--replace_column 1 # 8 # 9 # 23 # 33 #
|
|
|
|
--replace_result $MASTER_MYPORT MASTER_MYPORT
|
|
|
|
show slave status;
|
|
|
|
|
|
|
|
# 2) Test lock wait timeout
|
|
|
|
|
|
|
|
stop slave;
|
2005-03-25 14:51:17 +01:00
|
|
|
change master to master_log_pos=532; # the BEGIN log event
|
2005-03-02 11:29:48 +01:00
|
|
|
begin;
|
|
|
|
select * from t2 for update; # hold lock
|
|
|
|
start slave;
|
|
|
|
--sleep 10; # slave should have blocked, and be retrying
|
|
|
|
commit;
|
|
|
|
sync_with_master;
|
|
|
|
select * from t1; # check that slave succeeded finally
|
|
|
|
select * from t2;
|
|
|
|
# check that no error is reported
|
|
|
|
--replace_column 1 # 8 # 9 # 23 # 33 #
|
|
|
|
--replace_result $MASTER_MYPORT MASTER_MYPORT
|
|
|
|
show slave status;
|
|
|
|
|
|
|
|
# Now we repeat 2), but with BEGIN in the same relay log as
|
|
|
|
# COMMIT (to see if seeking into hot log is ok).
|
|
|
|
|
|
|
|
set global max_relay_log_size=0;
|
|
|
|
|
|
|
|
# This is really copy-paste of 2) of above
|
|
|
|
stop slave;
|
2005-03-25 14:51:17 +01:00
|
|
|
change master to master_log_pos=532;
|
2005-03-02 11:29:48 +01:00
|
|
|
begin;
|
|
|
|
select * from t2 for update;
|
|
|
|
start slave;
|
|
|
|
--sleep 10;
|
|
|
|
commit;
|
|
|
|
sync_with_master;
|
|
|
|
select * from t1;
|
|
|
|
select * from t2;
|
|
|
|
--replace_column 1 # 8 # 9 # 23 # 33 #
|
|
|
|
--replace_result $MASTER_MYPORT MASTER_MYPORT
|
|
|
|
show slave status;
|
|
|
|
|
|
|
|
connection master;
|
|
|
|
drop table t1,t2;
|
|
|
|
sync_slave_with_master;
|