MDEV-23080: desync and pause node on BACKUP STAGE BLOCK_DDL

make BACKUP STAGE behave as FTWRL, desyncing and pausing the node
to prevent BF threads (appliers) from interfering with blocking stages.
This is needed because BF threads don't respect BACKUP MDL locks.

Reviewed-by: Jan Lindström <jan.lindstrom@mariadb.com>
This commit is contained in:
Leandro Pacheco 2021-07-20 12:45:00 -03:00 committed by Jan Lindström
commit 2b84e1c966
8 changed files with 231 additions and 5 deletions

View file

@ -34,6 +34,7 @@
#include "sql_insert.h" // kill_delayed_threads
#include "sql_handler.h" // mysql_ha_cleanup_no_free
#include <my_sys.h>
#include "wsrep_mysqld.h"
static const char *stage_names[]=
{"START", "FLUSH", "BLOCK_DDL", "BLOCK_COMMIT", "END", 0};
@ -254,6 +255,21 @@ static bool backup_block_ddl(THD *thd)
(void) flush_tables(thd, FLUSH_NON_TRANS_TABLES);
thd->clear_error();
#ifdef WITH_WSREP
/*
We desync the node for BACKUP STAGE because applier threads
bypass backup MDL locks (see MDL_lock::can_grant_lock)
*/
if (WSREP_NNULL(thd))
{
Wsrep_server_state &server_state= Wsrep_server_state::instance();
if (server_state.desync_and_pause().is_undefined()) {
DBUG_RETURN(1);
}
thd->wsrep_desynced_backup_stage= true;
}
#endif /* WITH_WSREP */
/*
block new DDL's, in addition to all previous blocks
We didn't do this lock above, as we wanted DDL's to be executed while
@ -318,6 +334,14 @@ bool backup_end(THD *thd)
ha_end_backup();
thd->current_backup_stage= BACKUP_FINISHED;
thd->mdl_context.release_lock(backup_flush_ticket);
#ifdef WITH_WSREP
if (WSREP_NNULL(thd) && thd->wsrep_desynced_backup_stage)
{
Wsrep_server_state &server_state= Wsrep_server_state::instance();
server_state.resume_and_resync();
thd->wsrep_desynced_backup_stage= false;
}
#endif /* WITH_WSREP */
}
DBUG_RETURN(0);
}

View file

@ -1281,6 +1281,7 @@ void THD::init()
m_wsrep_next_trx_id = WSREP_UNDEFINED_TRX_ID;
wsrep_replicate_GTID = false;
wsrep_aborter = 0;
wsrep_desynced_backup_stage= false;
#endif /* WITH_WSREP */
if (variables.sql_log_bin)

View file

@ -3011,6 +3011,9 @@ public:
uint server_status,open_options;
enum enum_thread_type system_thread;
enum backup_stages current_backup_stage;
#ifdef WITH_WSREP
bool wsrep_desynced_backup_stage;
#endif /* WITH_WSREP */
/*
Current or next transaction isolation level.
When a connection is established, the value is taken from

View file

@ -2168,7 +2168,7 @@ int wsrep_to_isolation_begin(THD *thd, const char *db_, const char *table_,
if (Wsrep_server_state::instance().desynced_on_pause())
{
my_message(ER_UNKNOWN_COM_ERROR,
"Aborting TOI: Global Read-Lock (FTWRL) in place.", MYF(0));
"Aborting TOI: Replication paused on node for FTWRL/BACKUP STAGE.", MYF(0));
return -1;
}