mirror of
https://github.com/MariaDB/server.git
synced 2025-01-27 09:14:17 +01:00
MDEV-26499 Fix error "mysql_shutdown failed" during MTR tests
- Fix to avoid mysqltest client getting killed abruptly during mysql_shutdown(). When Galera replication is shutdown, wait for THDs with `thd->stmt_da()->is_eof()` to disconnect (these are about to disconnect anyway). - Extract duplicate code from `wsrep_stop_replication()` and `wsrep_shutdown_replication()` in a new function. - No need to use a custom `shutdown_mysqld.inc` in galera suite. Delete it, so that the one in `mysql-test/include/` is used. Signed-off-by: Julius Goryavsky <julius.goryavsky@mariadb.com>
This commit is contained in:
parent
db0b9ec37b
commit
c71dc39529
5 changed files with 64 additions and 51 deletions
|
@ -1,18 +0,0 @@
|
||||||
# This is the first half of include/restart_mysqld.inc.
|
|
||||||
if ($rpl_inited)
|
|
||||||
{
|
|
||||||
if (!$allow_rpl_inited)
|
|
||||||
{
|
|
||||||
--die ERROR IN TEST: When using the replication test framework (master-slave.inc, rpl_init.inc etc), use rpl_restart_server.inc instead of restart_mysqld.inc. If you know what you are doing and you really have to use restart_mysqld.inc, set allow_rpl_inited=1 before you source restart_mysqld.inc
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Write file to make mysql-test-run.pl expect the "crash", but don't start it
|
|
||||||
--let $_expect_file_name= `select regexp_replace(@@tmpdir, '^.*/','')`
|
|
||||||
--let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/$_expect_file_name.expect
|
|
||||||
--exec echo "wait" > $_expect_file_name
|
|
||||||
|
|
||||||
# Send shutdown to the connected server
|
|
||||||
--shutdown_server
|
|
||||||
--source include/wait_until_disconnected.inc
|
|
||||||
|
|
6
mysql-test/suite/galera/r/MDEV-26499.result
Normal file
6
mysql-test/suite/galera/r/MDEV-26499.result
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
connection node_2;
|
||||||
|
connection node_1;
|
||||||
|
connection node_1;
|
||||||
|
connection node_2;
|
||||||
|
connection node_2;
|
||||||
|
SET GLOBAL debug_dbug="+d,simulate_slow_client_at_shutdown";
|
20
mysql-test/suite/galera/t/MDEV-26499.test
Normal file
20
mysql-test/suite/galera/t/MDEV-26499.test
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
#
|
||||||
|
# MDEV-26499
|
||||||
|
#
|
||||||
|
# This test reproduces some failure on mysql_shutdown() call
|
||||||
|
# which manifests sporadically in some galera MTR tests during
|
||||||
|
# restart of a node.
|
||||||
|
#
|
||||||
|
|
||||||
|
--source include/galera_cluster.inc
|
||||||
|
--source include/have_debug_sync.inc
|
||||||
|
|
||||||
|
--let $node_1=node_1
|
||||||
|
--let $node_2=node_2
|
||||||
|
--source include/auto_increment_offset_save.inc
|
||||||
|
|
||||||
|
--connection node_2
|
||||||
|
SET GLOBAL debug_dbug="+d,simulate_slow_client_at_shutdown";
|
||||||
|
--source include/restart_mysqld.inc
|
||||||
|
|
||||||
|
--source include/auto_increment_offset_restore.inc
|
|
@ -2208,6 +2208,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
|
||||||
my_eof(thd);
|
my_eof(thd);
|
||||||
kill_mysql(thd);
|
kill_mysql(thd);
|
||||||
error=TRUE;
|
error=TRUE;
|
||||||
|
DBUG_EXECUTE_IF("simulate_slow_client_at_shutdown", my_sleep(2000000););
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1014,10 +1014,8 @@ void wsrep_recover()
|
||||||
WSREP_INFO("Recovered position: %s", oss.str().c_str());
|
WSREP_INFO("Recovered position: %s", oss.str().c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void wsrep_stop_replication_common(THD *thd)
|
||||||
void wsrep_stop_replication(THD *thd)
|
|
||||||
{
|
{
|
||||||
WSREP_INFO("Stop replication by %llu", (thd) ? thd->thread_id : 0);
|
|
||||||
if (Wsrep_server_state::instance().state() !=
|
if (Wsrep_server_state::instance().state() !=
|
||||||
Wsrep_server_state::s_disconnected)
|
Wsrep_server_state::s_disconnected)
|
||||||
{
|
{
|
||||||
|
@ -1030,40 +1028,30 @@ void wsrep_stop_replication(THD *thd)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* my connection, should not terminate with wsrep_close_client_connection(),
|
/* my connection, should not terminate with
|
||||||
make transaction to rollback
|
wsrep_close_client_connections(), make transaction to rollback */
|
||||||
*/
|
if (thd && !thd->wsrep_applier)
|
||||||
if (thd && !thd->wsrep_applier) trans_rollback(thd);
|
trans_rollback(thd);
|
||||||
wsrep_close_client_connections(TRUE, thd);
|
wsrep_close_client_connections(TRUE, thd);
|
||||||
|
|
||||||
/* wait until appliers have stopped */
|
/* wait until appliers have stopped */
|
||||||
wsrep_wait_appliers_close(thd);
|
wsrep_wait_appliers_close(thd);
|
||||||
|
|
||||||
node_uuid= WSREP_UUID_UNDEFINED;
|
node_uuid= WSREP_UUID_UNDEFINED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void wsrep_stop_replication(THD *thd)
|
||||||
|
{
|
||||||
|
WSREP_INFO("Stop replication by %llu", (thd) ? thd->thread_id : 0);
|
||||||
|
wsrep_stop_replication_common(thd);
|
||||||
|
}
|
||||||
|
|
||||||
void wsrep_shutdown_replication()
|
void wsrep_shutdown_replication()
|
||||||
{
|
{
|
||||||
WSREP_INFO("Shutdown replication");
|
WSREP_INFO("Shutdown replication");
|
||||||
if (Wsrep_server_state::instance().state() != wsrep::server_state::s_disconnected)
|
wsrep_stop_replication_common(nullptr);
|
||||||
{
|
|
||||||
WSREP_DEBUG("Disconnect provider");
|
|
||||||
Wsrep_server_state::instance().disconnect();
|
|
||||||
if (Wsrep_server_state::instance().wait_until_state(
|
|
||||||
Wsrep_server_state::s_disconnected))
|
|
||||||
{
|
|
||||||
WSREP_WARN("Wsrep interrupted while waiting for disconnected state");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
wsrep_close_client_connections(TRUE);
|
|
||||||
|
|
||||||
/* wait until appliers have stopped */
|
|
||||||
wsrep_wait_appliers_close(NULL);
|
|
||||||
node_uuid= WSREP_UUID_UNDEFINED;
|
|
||||||
|
|
||||||
/* Undocking the thread specific data. */
|
/* Undocking the thread specific data. */
|
||||||
my_pthread_setspecific_ptr(THR_THD, NULL);
|
my_pthread_setspecific_ptr(THR_THD, nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool wsrep_start_replication(const char *wsrep_cluster_address)
|
bool wsrep_start_replication(const char *wsrep_cluster_address)
|
||||||
|
@ -2644,14 +2632,19 @@ static my_bool have_client_connections(THD *thd, void*)
|
||||||
{
|
{
|
||||||
DBUG_PRINT("quit",("Informing thread %lld that it's time to die",
|
DBUG_PRINT("quit",("Informing thread %lld that it's time to die",
|
||||||
(longlong) thd->thread_id));
|
(longlong) thd->thread_id));
|
||||||
if (is_client_connection(thd) && thd->killed == KILL_CONNECTION)
|
if (is_client_connection(thd))
|
||||||
{
|
{
|
||||||
WSREP_DEBUG("Informing thread %lld that it's time to die",
|
if (thd->killed == KILL_CONNECTION)
|
||||||
thd->thread_id);
|
{
|
||||||
(void)abort_replicated(thd);
|
(void)abort_replicated(thd);
|
||||||
return true;
|
return true;
|
||||||
|
}
|
||||||
|
if (thd->get_stmt_da()->is_eof())
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return 0;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void wsrep_close_thread(THD *thd)
|
static void wsrep_close_thread(THD *thd)
|
||||||
|
@ -2691,14 +2684,24 @@ static my_bool kill_all_threads(THD *thd, THD *caller_thd)
|
||||||
/* We skip slave threads & scheduler on this first loop through. */
|
/* We skip slave threads & scheduler on this first loop through. */
|
||||||
if (is_client_connection(thd) && thd != caller_thd)
|
if (is_client_connection(thd) && thd != caller_thd)
|
||||||
{
|
{
|
||||||
|
if (thd->get_stmt_da()->is_eof())
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (is_replaying_connection(thd))
|
if (is_replaying_connection(thd))
|
||||||
|
{
|
||||||
thd->set_killed(KILL_CONNECTION);
|
thd->set_killed(KILL_CONNECTION);
|
||||||
else if (!abort_replicated(thd))
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!abort_replicated(thd))
|
||||||
{
|
{
|
||||||
/* replicated transactions must be skipped */
|
/* replicated transactions must be skipped */
|
||||||
WSREP_DEBUG("closing connection %lld", (longlong) thd->thread_id);
|
WSREP_DEBUG("closing connection %lld", (longlong) thd->thread_id);
|
||||||
/* instead of wsrep_close_thread() we do now soft kill by THD::awake */
|
/* instead of wsrep_close_thread() we do now soft kill by THD::awake */
|
||||||
thd->awake(KILL_CONNECTION);
|
thd->awake(KILL_CONNECTION);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -2710,6 +2713,7 @@ static my_bool kill_remaining_threads(THD *thd, THD *caller_thd)
|
||||||
if (is_client_connection(thd) &&
|
if (is_client_connection(thd) &&
|
||||||
!abort_replicated(thd) &&
|
!abort_replicated(thd) &&
|
||||||
!is_replaying_connection(thd) &&
|
!is_replaying_connection(thd) &&
|
||||||
|
!thd->get_stmt_da()->is_eof() &&
|
||||||
thd_is_connection_alive(thd) &&
|
thd_is_connection_alive(thd) &&
|
||||||
thd != caller_thd)
|
thd != caller_thd)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Add table
Reference in a new issue