mirror of
https://github.com/MariaDB/server.git
synced 2025-03-27 17:38:45 +01:00
MDEV-15636 mariabackup --lock-ddl-per-table hangs if ALTER table is running
concurrently. There is a deadlock between C1 mariabackup's connection that holds MDL locks C2 Online ALTER TABLE that wants to have MDL exclusively and tries to upgrade its mdl lock. C3 another mariabackup's connection that does FLUSH TABLES (or FTWRL) C3 waits waits for C2, which waits for C1, which waits for C3, thus the deadlock. MDL locks cannot be released until FLUSH succeeds, because otherwise it would allow ALTER to sneak in, causing backup to abort and breaking lock-ddl-per-table's promise. The fix here workarounds the deadlock, by killing connections in "Waiting for metadata lock" status (i.e ALTER). This killing continues until FTWRL succeeds. Killing connections is skipped in case --no-locks parameter was passed to backup, because there won't be a FLUSH. For the reference,in Percona's xtrabackup --lock-ddl-per-connection silently implies --no-lock ie FLUSH is always skipped there. A rather large part of fix is introducing DBUG capability to start a query the new connection at the right moment of backup compensating somewhat for mariabackup' lack of send_query or DBUG_SYNC.
This commit is contained in:
parent
a1d68faa38
commit
27c24808f7
4 changed files with 179 additions and 27 deletions
extra/mariabackup
|
@ -1428,6 +1428,10 @@ void backup_release()
|
|||
history_lock_time = time(NULL) - history_lock_time;
|
||||
}
|
||||
|
||||
if (opt_lock_ddl_per_table) {
|
||||
mdl_unlock_all();
|
||||
}
|
||||
|
||||
if (opt_safe_slave_backup && sql_thread_started) {
|
||||
msg("Starting slave SQL thread\n");
|
||||
xb_mysql_query(mysql_connection,
|
||||
|
|
|
@ -868,6 +868,76 @@ stop_query_killer()
|
|||
os_event_wait_time(kill_query_thread_stopped, 60000);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Killing connections that wait for MDL lock.
|
||||
If lock-ddl-per-table is used, there can be some DDL statements
|
||||
|
||||
FLUSH TABLES would hang infinitely, if DDL statements are waiting for
|
||||
MDL lock, which mariabackup currently holds. Therefore we start killing
|
||||
those statements from a dedicated thread, until FLUSH TABLES WITH READ LOCK
|
||||
succeeds.
|
||||
*/
|
||||
|
||||
static os_event_t mdl_killer_stop_event;
|
||||
static os_event_t mdl_killer_finished_event;
|
||||
|
||||
static
|
||||
os_thread_ret_t
|
||||
DECLARE_THREAD(kill_mdl_waiters_thread(void *))
|
||||
{
|
||||
MYSQL *mysql;
|
||||
if ((mysql = xb_mysql_connect()) == NULL) {
|
||||
msg("Error: kill mdl waiters thread failed to connect\n");
|
||||
goto stop_thread;
|
||||
}
|
||||
|
||||
for(;;){
|
||||
if (os_event_wait_time(mdl_killer_stop_event, 1000) == 0)
|
||||
break;
|
||||
|
||||
MYSQL_RES *result = xb_mysql_query(mysql,
|
||||
"SELECT ID, COMMAND FROM INFORMATION_SCHEMA.PROCESSLIST "
|
||||
" WHERE State='Waiting for table metadata lock'",
|
||||
true, true);
|
||||
while (MYSQL_ROW row = mysql_fetch_row(result))
|
||||
{
|
||||
char query[64];
|
||||
msg_ts("Killing MDL waiting query '%s' on connection '%s'\n",
|
||||
row[1], row[0]);
|
||||
snprintf(query, sizeof(query), "KILL QUERY %s", row[0]);
|
||||
xb_mysql_query(mysql, query, true);
|
||||
}
|
||||
}
|
||||
|
||||
mysql_close(mysql);
|
||||
|
||||
stop_thread:
|
||||
msg_ts("Kill mdl waiters thread stopped\n");
|
||||
os_event_set(mdl_killer_finished_event);
|
||||
os_thread_exit();
|
||||
return os_thread_ret_t(0);
|
||||
}
|
||||
|
||||
|
||||
static void start_mdl_waiters_killer()
|
||||
{
|
||||
mdl_killer_stop_event = os_event_create(0);
|
||||
mdl_killer_finished_event = os_event_create(0);
|
||||
os_thread_create(kill_mdl_waiters_thread, 0, 0);
|
||||
}
|
||||
|
||||
|
||||
/* Tell MDL killer to stop and finish for its completion*/
|
||||
static void stop_mdl_waiters_killer()
|
||||
{
|
||||
os_event_set(mdl_killer_stop_event);
|
||||
os_event_wait(mdl_killer_finished_event);
|
||||
|
||||
os_event_destroy(mdl_killer_stop_event);
|
||||
os_event_destroy(mdl_killer_finished_event);
|
||||
}
|
||||
|
||||
/*********************************************************************//**
|
||||
Function acquires either a backup tables lock, if supported
|
||||
by the server, or a global read lock (FLUSH TABLES WITH READ LOCK)
|
||||
|
@ -890,6 +960,10 @@ lock_tables(MYSQL *connection)
|
|||
return(true);
|
||||
}
|
||||
|
||||
if (opt_lock_ddl_per_table) {
|
||||
start_mdl_waiters_killer();
|
||||
}
|
||||
|
||||
if (!opt_lock_wait_timeout && !opt_kill_long_queries_timeout) {
|
||||
|
||||
/* We do first a FLUSH TABLES. If a long update is running, the
|
||||
|
@ -930,6 +1004,10 @@ lock_tables(MYSQL *connection)
|
|||
|
||||
xb_mysql_query(connection, "FLUSH TABLES WITH READ LOCK", false);
|
||||
|
||||
if (opt_lock_ddl_per_table) {
|
||||
stop_mdl_waiters_killer();
|
||||
}
|
||||
|
||||
if (opt_kill_long_queries_timeout) {
|
||||
stop_query_killer();
|
||||
}
|
||||
|
@ -1647,25 +1725,6 @@ mdl_lock_init()
|
|||
}
|
||||
}
|
||||
|
||||
#ifndef DBUG_OFF
|
||||
/* Test that table is really locked, if lock_ddl_per_table is set.
|
||||
The test is executed in DBUG_EXECUTE_IF block inside mdl_lock_table().
|
||||
*/
|
||||
static void check_mdl_lock_works(const char *table_name)
|
||||
{
|
||||
MYSQL *test_con= xb_mysql_connect();
|
||||
char *query;
|
||||
xb_a(asprintf(&query,
|
||||
"SET STATEMENT max_statement_time=1 FOR ALTER TABLE %s"
|
||||
" ADD COLUMN mdl_lock_column int", table_name));
|
||||
int err = mysql_query(test_con, query);
|
||||
DBUG_ASSERT(err);
|
||||
int err_no = mysql_errno(test_con);
|
||||
DBUG_ASSERT(err_no == ER_STATEMENT_TIMEOUT);
|
||||
mysql_close(test_con);
|
||||
free(query);
|
||||
}
|
||||
#endif
|
||||
void
|
||||
mdl_lock_table(ulint space_id)
|
||||
{
|
||||
|
@ -1681,13 +1740,10 @@ mdl_lock_table(ulint space_id)
|
|||
while (MYSQL_ROW row = mysql_fetch_row(mysql_result)) {
|
||||
std::string full_table_name = ut_get_name(0,row[0]);
|
||||
std::ostringstream lock_query;
|
||||
lock_query << "SELECT * FROM " << full_table_name << " LIMIT 0";
|
||||
lock_query << "SELECT 1 FROM " << full_table_name << " LIMIT 0";
|
||||
|
||||
msg_ts("Locking MDL for %s\n", full_table_name.c_str());
|
||||
xb_mysql_query(mdl_con, lock_query.str().c_str(), false, false);
|
||||
|
||||
DBUG_EXECUTE_IF("check_mdl_lock_works",
|
||||
check_mdl_lock_works(full_table_name.c_str()););
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&mdl_lock_con_mutex);
|
||||
|
|
|
@ -434,6 +434,91 @@ datafiles_iter_free(datafiles_iter_t *it)
|
|||
free(it);
|
||||
}
|
||||
|
||||
#ifndef DBUG_OFF
|
||||
struct dbug_thread_param_t
|
||||
{
|
||||
MYSQL *con;
|
||||
const char *query;
|
||||
int expect_err;
|
||||
int expect_errno;
|
||||
os_event_t done_event;
|
||||
};
|
||||
|
||||
|
||||
/* Thread procedure used in dbug_start_query_thread. */
|
||||
extern "C"
|
||||
os_thread_ret_t
|
||||
DECLARE_THREAD(dbug_execute_in_new_connection)(void *arg)
|
||||
{
|
||||
mysql_thread_init();
|
||||
dbug_thread_param_t *par= (dbug_thread_param_t *)arg;
|
||||
int err = mysql_query(par->con, par->query);
|
||||
int err_no = mysql_errno(par->con);
|
||||
DBUG_ASSERT(par->expect_err == err);
|
||||
if (err && par->expect_errno)
|
||||
DBUG_ASSERT(err_no == par->expect_errno);
|
||||
mysql_close(par->con);
|
||||
mysql_thread_end();
|
||||
os_event_t done = par->done_event;
|
||||
delete par;
|
||||
os_event_set(done);
|
||||
os_thread_exit();
|
||||
return os_thread_ret_t(0);
|
||||
}
|
||||
|
||||
/*
|
||||
Execute query from a new connection, in own thread.
|
||||
|
||||
@param query - query to be executed
|
||||
@param wait_state - if not NULL, wait until query from new connection
|
||||
reaches this state (value of column State in I_S.PROCESSLIST)
|
||||
@param expected_err - if 0, query is supposed to finish successfully,
|
||||
otherwise query should return error.
|
||||
@param expected_errno - if not 0, and query finished with error,
|
||||
expected mysql_errno()
|
||||
*/
|
||||
static os_event_t dbug_start_query_thread(
|
||||
const char *query,
|
||||
const char *wait_state,
|
||||
int expected_err,
|
||||
int expected_errno)
|
||||
|
||||
{
|
||||
dbug_thread_param_t *par = new dbug_thread_param_t;
|
||||
par->query = query;
|
||||
par->expect_err = expected_err;
|
||||
par->expect_errno = expected_errno;
|
||||
par->done_event = os_event_create(0);
|
||||
par->con = xb_mysql_connect();
|
||||
os_thread_create(dbug_execute_in_new_connection, par, 0);
|
||||
|
||||
if (!wait_state)
|
||||
return par->done_event;
|
||||
|
||||
char q[256];
|
||||
snprintf(q, sizeof(q),
|
||||
"SELECT 1 FROM INFORMATION_SCHEMA.PROCESSLIST where ID=%lu"
|
||||
" AND Command='Query' AND State='%s'",
|
||||
mysql_thread_id(par->con), wait_state);
|
||||
for (;;) {
|
||||
MYSQL_RES *result = xb_mysql_query(mysql_connection,q, true, true);
|
||||
while (MYSQL_ROW row = mysql_fetch_row(result)) {
|
||||
goto end;
|
||||
}
|
||||
msg_ts("Waiting for query '%s' on connection %lu to "
|
||||
" reach state '%s'", query, mysql_thread_id(par->con),
|
||||
wait_state);
|
||||
my_sleep(1000);
|
||||
}
|
||||
end:
|
||||
msg_ts("query '%s' on connection %lu reached state '%s'", query,
|
||||
mysql_thread_id(par->con), wait_state);
|
||||
return par->done_event;
|
||||
}
|
||||
|
||||
os_event_t dbug_alter_thread_done;
|
||||
#endif
|
||||
|
||||
void mdl_lock_all()
|
||||
{
|
||||
mdl_lock_init();
|
||||
|
@ -449,6 +534,11 @@ void mdl_lock_all()
|
|||
mdl_lock_table(node->space->id);
|
||||
}
|
||||
datafiles_iter_free(it);
|
||||
|
||||
DBUG_EXECUTE_IF("check_mdl_lock_works",
|
||||
dbug_alter_thread_done =
|
||||
dbug_start_query_thread("ALTER TABLE test.t ADD COLUMN mdl_lock_column int",
|
||||
"Waiting for table metadata lock",1, ER_QUERY_INTERRUPTED););
|
||||
}
|
||||
|
||||
/** Check if the space id belongs to the table which name should
|
||||
|
@ -4078,6 +4168,11 @@ reread_log_header:
|
|||
|
||||
backup_release();
|
||||
|
||||
DBUG_EXECUTE_IF("check_mdl_lock_works",
|
||||
os_event_wait(dbug_alter_thread_done);
|
||||
os_event_destroy(dbug_alter_thread_done);
|
||||
);
|
||||
|
||||
if (ok) {
|
||||
backup_finish();
|
||||
}
|
||||
|
@ -4087,10 +4182,6 @@ reread_log_header:
|
|||
goto fail;
|
||||
}
|
||||
|
||||
if (opt_lock_ddl_per_table) {
|
||||
mdl_unlock_all();
|
||||
}
|
||||
|
||||
xtrabackup_destroy_datasinks();
|
||||
|
||||
msg("mariabackup: Redo log (from LSN " LSN_PF " to " LSN_PF
|
||||
|
|
|
@ -110,6 +110,7 @@ extern my_bool opt_noversioncheck;
|
|||
extern my_bool opt_no_backup_locks;
|
||||
extern my_bool opt_decompress;
|
||||
extern my_bool opt_remove_original;
|
||||
extern my_bool opt_lock_ddl_per_table;
|
||||
|
||||
extern char *opt_incremental_history_name;
|
||||
extern char *opt_incremental_history_uuid;
|
||||
|
|
Loading…
Add table
Reference in a new issue