mariadb/mysql-test/t/rpl_deadlock.test

# See if slave restarts the transaction after failing on an InnoDB deadlock error.

# Note: testing what happens when too many retries is possible, but
# needs large waits when running with --debug, so we don't do it.
# The same way, this test may not test what is expected when run
# under Valgrind, timings are too short then (with --valgrind I
# (Guilhem) have seen the test manage to provoke lock wait timeout
# error but not deadlock error; that is ok as code deals with the two
# errors in exactly the same way.
# We don't 'show status like 'slave_retried_transactions'' because this
# is not repeatable (depends on sleeps).

source include/have_innodb.inc;
source include/master-slave.inc;

connection master;
create table t1 (a int not null, key(a)) engine=innodb;
create table t2 (a int not null, key(a)) engine=innodb;
create table t3 (a int) engine=innodb;
create table t4 (a int) engine=innodb;
show variables like 'slave_transaction_retries';
sync_slave_with_master;

show create table t1;
show create table t2;
show variables like 'slave_transaction_retries';
stop slave;

# 1) Test deadlock

connection master;
begin;
# Let's keep BEGIN and the locked statement in two different relay logs.
let $1=200;
disable_query_log;
while ($1)
{
 eval insert into t3 values( $1 );
 dec $1;
}
enable_query_log;
insert into t3 select * from t2 for update;
insert into t1 values(1);
commit;
save_master_pos;

connection slave;
begin;
# Let's make our transaction large so that it's slave who is chosen as
# victim
let $1=1000;
disable_query_log;
while ($1)
{
 eval insert into t4 values( $1 );
 dec $1;
}
enable_query_log;
select * from t1 for update;
start slave;
--sleep 3; # hope that slave is blocked now
insert into t2 values(22); # provoke deadlock, slave should be victim
commit;
sync_with_master;
select * from t1; # check that slave succeeded finally
select * from t2;
# check that no error is reported
--replace_column 1 # 8 # 9 # 23 # 33 #
--replace_result $MASTER_MYPORT MASTER_MYPORT
show slave status;

# 2) Test lock wait timeout

stop slave;
change master to master_log_pos=401; # the BEGIN log event
begin;
select * from t2 for update; # hold lock
start slave;
--sleep 10; # slave should have blocked, and be retrying
commit;
sync_with_master;
select * from t1; # check that slave succeeded finally
select * from t2;
# check that no error is reported
--replace_column 1 # 8 # 9 # 23 # 33 #
--replace_result $MASTER_MYPORT MASTER_MYPORT
show slave status;

# Now we repeat 2), but with BEGIN in the same relay log as
# COMMIT (to see if seeking into hot log is ok).

set global max_relay_log_size=0;

# This is really copy-paste of 2) of above
stop slave;
change master to master_log_pos=401;
begin;
select * from t2 for update;
start slave;
--sleep 10;
commit;
sync_with_master;
select * from t1;
select * from t2;
--replace_column 1 # 8 # 9 # 23 # 33 #
--replace_result $MASTER_MYPORT MASTER_MYPORT
show slave status;

connection master;
drop table t1,t2;
sync_slave_with_master;
Fix for BUG#8325 "Deadlock in replication thread stops replication": in slave SQL thread: if a transaction fails because of InnoDB deadlock or innodb_lock_wait_timeout exceeded, optionally retry the transaction a certain number of times (new variable --slave_transaction_retries). sql/mysql_priv.h: new var slave_transaction_retries sql/mysqld.cc: new variable slave_transaction_retries. Plus fixing a typo. sql/set_var.cc: new global variable slave_transaction_retries (will be one per subslave, when we have multimaster). sql/slave.cc: Slave SQL thread: if a transaction fails because of InnoDB deadlock or innodb_lock_wait_timeout exceeded, optionally retry the transaction a certain number of times (--slave_transaction_retries). sql/slave.h: new RELAY_LOG_INFO::trans_retries. 2005-03-02 11:29:48 +01:00			`# See if slave restarts the transaction after failing on an InnoDB deadlock error.`

			`# Note: testing what happens when too many retries is possible, but`
			`# needs large waits when running with --debug, so we don't do it.`
			`# The same way, this test may not test what is expected when run`
			`# under Valgrind, timings are too short then (with --valgrind I`
			`# (Guilhem) have seen the test manage to provoke lock wait timeout`
			`# error but not deadlock error; that is ok as code deals with the two`
			`# errors in exactly the same way.`
"After Monty's review" changes to the fix for BUG#8325 "Deadlock in replication thread stops replication": s/sleep/safe_sleep (thread safe); sleep 0/1/2/3/4/5/5/5 (get slave less late); no message on error log (deadlock is too common sometimes), a global counter instead (SHOW STATUS LIKE 'slave_retried_transactions'). Plus a fix for libmysql/Makefile.shared libmysql/Makefile.shared: When we "make clean" in libmysql/ we remove the symlinks there, so we need to mark that they have to be recreated later: this is done by removing ../linked_libmysql_sources. If we don't do this, 'make' will fail after 'cd libmysql;make clean'. This Makefile.shared is used by libmysql_r too. No reason to remove linked_client_sources as we don't remove the links in client/. mysql-test/r/rpl_deadlock.result: result fix mysql-test/t/rpl_deadlock.test: small test addition sql/mysqld.cc: if active_mi could not be alloced, die. New SHOW STATUS LIKE "slave_retried_transactions". sql/slave.cc: If slave retries automatically a transaction, no message on error log (too common situation); sleep 0 secs at first retry, then 1, 2, 3, 4, 5, 5, 5... Sleeping 0 is to get the least possible late, as deadlocks are usually resolved at first try. New global counter rli->retried_trans (for SHOW STATUS: total number of times the slave had to retry any transaction). safe_sleep() is thread-safe, sleep() was not. I change the rli->trans_retries counter to go from 0 to max instead of the other way (better for new sleep()). sql/slave.h: new global counter rli->retried_trans sql/sql_show.cc: SHOW STATUS LIKE "slave_retried_transactions"; needs replication mutexes. Can't be a simple SHOW_LONG, because active_mi is unset (not alloced yet) when the static global status_vars is created (active_mi is set in init_slave()). sql/structs.h: new SHOW_SLAVE_RETRIED_TRANS BitKeeper/etc/logging_ok: Logging to logging@openlogging.org accepted 2005-03-23 19:19:36 +01:00			`# We don't 'show status like 'slave_retried_transactions'' because this`
			`# is not repeatable (depends on sleeps).`
Fix for BUG#8325 "Deadlock in replication thread stops replication": in slave SQL thread: if a transaction fails because of InnoDB deadlock or innodb_lock_wait_timeout exceeded, optionally retry the transaction a certain number of times (new variable --slave_transaction_retries). sql/mysql_priv.h: new var slave_transaction_retries sql/mysqld.cc: new variable slave_transaction_retries. Plus fixing a typo. sql/set_var.cc: new global variable slave_transaction_retries (will be one per subslave, when we have multimaster). sql/slave.cc: Slave SQL thread: if a transaction fails because of InnoDB deadlock or innodb_lock_wait_timeout exceeded, optionally retry the transaction a certain number of times (--slave_transaction_retries). sql/slave.h: new RELAY_LOG_INFO::trans_retries. 2005-03-02 11:29:48 +01:00
			`source include/have_innodb.inc;`
			`source include/master-slave.inc;`

			`connection master;`
			`create table t1 (a int not null, key(a)) engine=innodb;`
			`create table t2 (a int not null, key(a)) engine=innodb;`
			`create table t3 (a int) engine=innodb;`
			`create table t4 (a int) engine=innodb;`
"After Monty's review" changes to the fix for BUG#8325 "Deadlock in replication thread stops replication": s/sleep/safe_sleep (thread safe); sleep 0/1/2/3/4/5/5/5 (get slave less late); no message on error log (deadlock is too common sometimes), a global counter instead (SHOW STATUS LIKE 'slave_retried_transactions'). Plus a fix for libmysql/Makefile.shared libmysql/Makefile.shared: When we "make clean" in libmysql/ we remove the symlinks there, so we need to mark that they have to be recreated later: this is done by removing ../linked_libmysql_sources. If we don't do this, 'make' will fail after 'cd libmysql;make clean'. This Makefile.shared is used by libmysql_r too. No reason to remove linked_client_sources as we don't remove the links in client/. mysql-test/r/rpl_deadlock.result: result fix mysql-test/t/rpl_deadlock.test: small test addition sql/mysqld.cc: if active_mi could not be alloced, die. New SHOW STATUS LIKE "slave_retried_transactions". sql/slave.cc: If slave retries automatically a transaction, no message on error log (too common situation); sleep 0 secs at first retry, then 1, 2, 3, 4, 5, 5, 5... Sleeping 0 is to get the least possible late, as deadlocks are usually resolved at first try. New global counter rli->retried_trans (for SHOW STATUS: total number of times the slave had to retry any transaction). safe_sleep() is thread-safe, sleep() was not. I change the rli->trans_retries counter to go from 0 to max instead of the other way (better for new sleep()). sql/slave.h: new global counter rli->retried_trans sql/sql_show.cc: SHOW STATUS LIKE "slave_retried_transactions"; needs replication mutexes. Can't be a simple SHOW_LONG, because active_mi is unset (not alloced yet) when the static global status_vars is created (active_mi is set in init_slave()). sql/structs.h: new SHOW_SLAVE_RETRIED_TRANS BitKeeper/etc/logging_ok: Logging to logging@openlogging.org accepted 2005-03-23 19:19:36 +01:00			`show variables like 'slave_transaction_retries';`
Fix for BUG#8325 "Deadlock in replication thread stops replication": in slave SQL thread: if a transaction fails because of InnoDB deadlock or innodb_lock_wait_timeout exceeded, optionally retry the transaction a certain number of times (new variable --slave_transaction_retries). sql/mysql_priv.h: new var slave_transaction_retries sql/mysqld.cc: new variable slave_transaction_retries. Plus fixing a typo. sql/set_var.cc: new global variable slave_transaction_retries (will be one per subslave, when we have multimaster). sql/slave.cc: Slave SQL thread: if a transaction fails because of InnoDB deadlock or innodb_lock_wait_timeout exceeded, optionally retry the transaction a certain number of times (--slave_transaction_retries). sql/slave.h: new RELAY_LOG_INFO::trans_retries. 2005-03-02 11:29:48 +01:00			`sync_slave_with_master;`

			`show create table t1;`
			`show create table t2;`
"After Monty's review" changes to the fix for BUG#8325 "Deadlock in replication thread stops replication": s/sleep/safe_sleep (thread safe); sleep 0/1/2/3/4/5/5/5 (get slave less late); no message on error log (deadlock is too common sometimes), a global counter instead (SHOW STATUS LIKE 'slave_retried_transactions'). Plus a fix for libmysql/Makefile.shared libmysql/Makefile.shared: When we "make clean" in libmysql/ we remove the symlinks there, so we need to mark that they have to be recreated later: this is done by removing ../linked_libmysql_sources. If we don't do this, 'make' will fail after 'cd libmysql;make clean'. This Makefile.shared is used by libmysql_r too. No reason to remove linked_client_sources as we don't remove the links in client/. mysql-test/r/rpl_deadlock.result: result fix mysql-test/t/rpl_deadlock.test: small test addition sql/mysqld.cc: if active_mi could not be alloced, die. New SHOW STATUS LIKE "slave_retried_transactions". sql/slave.cc: If slave retries automatically a transaction, no message on error log (too common situation); sleep 0 secs at first retry, then 1, 2, 3, 4, 5, 5, 5... Sleeping 0 is to get the least possible late, as deadlocks are usually resolved at first try. New global counter rli->retried_trans (for SHOW STATUS: total number of times the slave had to retry any transaction). safe_sleep() is thread-safe, sleep() was not. I change the rli->trans_retries counter to go from 0 to max instead of the other way (better for new sleep()). sql/slave.h: new global counter rli->retried_trans sql/sql_show.cc: SHOW STATUS LIKE "slave_retried_transactions"; needs replication mutexes. Can't be a simple SHOW_LONG, because active_mi is unset (not alloced yet) when the static global status_vars is created (active_mi is set in init_slave()). sql/structs.h: new SHOW_SLAVE_RETRIED_TRANS BitKeeper/etc/logging_ok: Logging to logging@openlogging.org accepted 2005-03-23 19:19:36 +01:00			`show variables like 'slave_transaction_retries';`
Fix for BUG#8325 "Deadlock in replication thread stops replication": in slave SQL thread: if a transaction fails because of InnoDB deadlock or innodb_lock_wait_timeout exceeded, optionally retry the transaction a certain number of times (new variable --slave_transaction_retries). sql/mysql_priv.h: new var slave_transaction_retries sql/mysqld.cc: new variable slave_transaction_retries. Plus fixing a typo. sql/set_var.cc: new global variable slave_transaction_retries (will be one per subslave, when we have multimaster). sql/slave.cc: Slave SQL thread: if a transaction fails because of InnoDB deadlock or innodb_lock_wait_timeout exceeded, optionally retry the transaction a certain number of times (--slave_transaction_retries). sql/slave.h: new RELAY_LOG_INFO::trans_retries. 2005-03-02 11:29:48 +01:00			`stop slave;`

			`# 1) Test deadlock`

			`connection master;`
			`begin;`
			`# Let's keep BEGIN and the locked statement in two different relay logs.`
			`let $1=200;`
			`disable_query_log;`
			`while ($1)`
			`{`
			`eval insert into t3 values( $1 );`
			`dec $1;`
			`}`
			`enable_query_log;`
			`insert into t3 select * from t2 for update;`
			`insert into t1 values(1);`
			`commit;`
			`save_master_pos;`

			`connection slave;`
			`begin;`
			`# Let's make our transaction large so that it's slave who is chosen as`
			`# victim`
			`let $1=1000;`
			`disable_query_log;`
			`while ($1)`
			`{`
			`eval insert into t4 values( $1 );`
			`dec $1;`
			`}`
			`enable_query_log;`
			`select * from t1 for update;`
			`start slave;`
			`--sleep 3; # hope that slave is blocked now`
			`insert into t2 values(22); # provoke deadlock, slave should be victim`
			`commit;`
			`sync_with_master;`
			`select * from t1; # check that slave succeeded finally`
			`select * from t2;`
			`# check that no error is reported`
			`--replace_column 1 # 8 # 9 # 23 # 33 #`
			`--replace_result $MASTER_MYPORT MASTER_MYPORT`
			`show slave status;`

			`# 2) Test lock wait timeout`

			`stop slave;`
			`change master to master_log_pos=401; # the BEGIN log event`
			`begin;`
			`select * from t2 for update; # hold lock`
			`start slave;`
			`--sleep 10; # slave should have blocked, and be retrying`
			`commit;`
			`sync_with_master;`
			`select * from t1; # check that slave succeeded finally`
			`select * from t2;`
			`# check that no error is reported`
			`--replace_column 1 # 8 # 9 # 23 # 33 #`
			`--replace_result $MASTER_MYPORT MASTER_MYPORT`
			`show slave status;`

			`# Now we repeat 2), but with BEGIN in the same relay log as`
			`# COMMIT (to see if seeking into hot log is ok).`

			`set global max_relay_log_size=0;`

			`# This is really copy-paste of 2) of above`
			`stop slave;`
			`change master to master_log_pos=401;`
			`begin;`
			`select * from t2 for update;`
			`start slave;`
			`--sleep 10;`
			`commit;`
			`sync_with_master;`
			`select * from t1;`
			`select * from t2;`
			`--replace_column 1 # 8 # 9 # 23 # 33 #`
			`--replace_result $MASTER_MYPORT MASTER_MYPORT`
			`show slave status;`

			`connection master;`
			`drop table t1,t2;`
			`sync_slave_with_master;`