mirror of
https://github.com/MariaDB/server.git
synced 2025-01-27 09:14:17 +01:00
MDEV-26499 Fix error "mysql_shutdown failed" during MTR tests
- Fix to avoid mysqltest client getting killed abruptly during mysql_shutdown(). When Galera replication is shutdown, wait for THDs with `thd->stmt_da()->is_eof()` to disconnect (these are about to disconnect anyway). - Extract duplicate code from `wsrep_stop_replication()` and `wsrep_shutdown_replication()` in a new function. - No need to use a custom `shutdown_mysqld.inc` in galera suite. Delete it, so that the one in `mysql-test/include/` is used. Signed-off-by: Julius Goryavsky <julius.goryavsky@mariadb.com>
This commit is contained in:
parent
db0b9ec37b
commit
c71dc39529
5 changed files with 64 additions and 51 deletions
|
@ -1,18 +0,0 @@
|
|||
# This is the first half of include/restart_mysqld.inc.
|
||||
if ($rpl_inited)
|
||||
{
|
||||
if (!$allow_rpl_inited)
|
||||
{
|
||||
--die ERROR IN TEST: When using the replication test framework (master-slave.inc, rpl_init.inc etc), use rpl_restart_server.inc instead of restart_mysqld.inc. If you know what you are doing and you really have to use restart_mysqld.inc, set allow_rpl_inited=1 before you source restart_mysqld.inc
|
||||
}
|
||||
}
|
||||
|
||||
# Write file to make mysql-test-run.pl expect the "crash", but don't start it
|
||||
--let $_expect_file_name= `select regexp_replace(@@tmpdir, '^.*/','')`
|
||||
--let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/$_expect_file_name.expect
|
||||
--exec echo "wait" > $_expect_file_name
|
||||
|
||||
# Send shutdown to the connected server
|
||||
--shutdown_server
|
||||
--source include/wait_until_disconnected.inc
|
||||
|
6
mysql-test/suite/galera/r/MDEV-26499.result
Normal file
6
mysql-test/suite/galera/r/MDEV-26499.result
Normal file
|
@ -0,0 +1,6 @@
|
|||
connection node_2;
|
||||
connection node_1;
|
||||
connection node_1;
|
||||
connection node_2;
|
||||
connection node_2;
|
||||
SET GLOBAL debug_dbug="+d,simulate_slow_client_at_shutdown";
|
20
mysql-test/suite/galera/t/MDEV-26499.test
Normal file
20
mysql-test/suite/galera/t/MDEV-26499.test
Normal file
|
@ -0,0 +1,20 @@
|
|||
#
|
||||
# MDEV-26499
|
||||
#
|
||||
# This test reproduces some failure on mysql_shutdown() call
|
||||
# which manifests sporadically in some galera MTR tests during
|
||||
# restart of a node.
|
||||
#
|
||||
|
||||
--source include/galera_cluster.inc
|
||||
--source include/have_debug_sync.inc
|
||||
|
||||
--let $node_1=node_1
|
||||
--let $node_2=node_2
|
||||
--source include/auto_increment_offset_save.inc
|
||||
|
||||
--connection node_2
|
||||
SET GLOBAL debug_dbug="+d,simulate_slow_client_at_shutdown";
|
||||
--source include/restart_mysqld.inc
|
||||
|
||||
--source include/auto_increment_offset_restore.inc
|
|
@ -2208,6 +2208,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
|
|||
my_eof(thd);
|
||||
kill_mysql(thd);
|
||||
error=TRUE;
|
||||
DBUG_EXECUTE_IF("simulate_slow_client_at_shutdown", my_sleep(2000000););
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -1014,10 +1014,8 @@ void wsrep_recover()
|
|||
WSREP_INFO("Recovered position: %s", oss.str().c_str());
|
||||
}
|
||||
|
||||
|
||||
void wsrep_stop_replication(THD *thd)
|
||||
static void wsrep_stop_replication_common(THD *thd)
|
||||
{
|
||||
WSREP_INFO("Stop replication by %llu", (thd) ? thd->thread_id : 0);
|
||||
if (Wsrep_server_state::instance().state() !=
|
||||
Wsrep_server_state::s_disconnected)
|
||||
{
|
||||
|
@ -1030,40 +1028,30 @@ void wsrep_stop_replication(THD *thd)
|
|||
}
|
||||
}
|
||||
|
||||
/* my connection, should not terminate with wsrep_close_client_connection(),
|
||||
make transaction to rollback
|
||||
*/
|
||||
if (thd && !thd->wsrep_applier) trans_rollback(thd);
|
||||
/* my connection, should not terminate with
|
||||
wsrep_close_client_connections(), make transaction to rollback */
|
||||
if (thd && !thd->wsrep_applier)
|
||||
trans_rollback(thd);
|
||||
wsrep_close_client_connections(TRUE, thd);
|
||||
|
||||
|
||||
/* wait until appliers have stopped */
|
||||
wsrep_wait_appliers_close(thd);
|
||||
|
||||
node_uuid= WSREP_UUID_UNDEFINED;
|
||||
}
|
||||
|
||||
void wsrep_stop_replication(THD *thd)
|
||||
{
|
||||
WSREP_INFO("Stop replication by %llu", (thd) ? thd->thread_id : 0);
|
||||
wsrep_stop_replication_common(thd);
|
||||
}
|
||||
|
||||
void wsrep_shutdown_replication()
|
||||
{
|
||||
WSREP_INFO("Shutdown replication");
|
||||
if (Wsrep_server_state::instance().state() != wsrep::server_state::s_disconnected)
|
||||
{
|
||||
WSREP_DEBUG("Disconnect provider");
|
||||
Wsrep_server_state::instance().disconnect();
|
||||
if (Wsrep_server_state::instance().wait_until_state(
|
||||
Wsrep_server_state::s_disconnected))
|
||||
{
|
||||
WSREP_WARN("Wsrep interrupted while waiting for disconnected state");
|
||||
}
|
||||
}
|
||||
|
||||
wsrep_close_client_connections(TRUE);
|
||||
|
||||
/* wait until appliers have stopped */
|
||||
wsrep_wait_appliers_close(NULL);
|
||||
node_uuid= WSREP_UUID_UNDEFINED;
|
||||
|
||||
wsrep_stop_replication_common(nullptr);
|
||||
/* Undocking the thread specific data. */
|
||||
my_pthread_setspecific_ptr(THR_THD, NULL);
|
||||
my_pthread_setspecific_ptr(THR_THD, nullptr);
|
||||
}
|
||||
|
||||
bool wsrep_start_replication(const char *wsrep_cluster_address)
|
||||
|
@ -2644,14 +2632,19 @@ static my_bool have_client_connections(THD *thd, void*)
|
|||
{
|
||||
DBUG_PRINT("quit",("Informing thread %lld that it's time to die",
|
||||
(longlong) thd->thread_id));
|
||||
if (is_client_connection(thd) && thd->killed == KILL_CONNECTION)
|
||||
if (is_client_connection(thd))
|
||||
{
|
||||
WSREP_DEBUG("Informing thread %lld that it's time to die",
|
||||
thd->thread_id);
|
||||
(void)abort_replicated(thd);
|
||||
return true;
|
||||
if (thd->killed == KILL_CONNECTION)
|
||||
{
|
||||
(void)abort_replicated(thd);
|
||||
return true;
|
||||
}
|
||||
if (thd->get_stmt_da()->is_eof())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
static void wsrep_close_thread(THD *thd)
|
||||
|
@ -2691,14 +2684,24 @@ static my_bool kill_all_threads(THD *thd, THD *caller_thd)
|
|||
/* We skip slave threads & scheduler on this first loop through. */
|
||||
if (is_client_connection(thd) && thd != caller_thd)
|
||||
{
|
||||
if (thd->get_stmt_da()->is_eof())
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (is_replaying_connection(thd))
|
||||
{
|
||||
thd->set_killed(KILL_CONNECTION);
|
||||
else if (!abort_replicated(thd))
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!abort_replicated(thd))
|
||||
{
|
||||
/* replicated transactions must be skipped */
|
||||
WSREP_DEBUG("closing connection %lld", (longlong) thd->thread_id);
|
||||
/* instead of wsrep_close_thread() we do now soft kill by THD::awake */
|
||||
thd->awake(KILL_CONNECTION);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
@ -2710,6 +2713,7 @@ static my_bool kill_remaining_threads(THD *thd, THD *caller_thd)
|
|||
if (is_client_connection(thd) &&
|
||||
!abort_replicated(thd) &&
|
||||
!is_replaying_connection(thd) &&
|
||||
!thd->get_stmt_da()->is_eof() &&
|
||||
thd_is_connection_alive(thd) &&
|
||||
thd != caller_thd)
|
||||
{
|
||||
|
|
Loading…
Add table
Reference in a new issue