MDEV-11675 Lag Free Alter On Slave

This commit implements two phase binloggable ALTER.
When a new

      @@session.binlog_alter_two_phase = YES

ALTER query gets logged in two parts, the START ALTER and the COMMIT
or ROLLBACK ALTER. START Alter is written in binlog as soon as
necessary locks have been acquired for the table. The timing is
such that any concurrent DML:s that update the same table are either
committed, thus logged into binary log having done work on the old
version of the table, or will be queued for execution on its new
version.

The "COMPLETE" COMMIT or ROLLBACK ALTER are written at the very point
of a normal "single-piece" ALTER that is after the most of
the query work is done. When its result is positive COMMIT ALTER is
written, otherwise ROLLBACK ALTER is written with specific error
happened after START ALTER phase.
Replication of two-phase binloggable ALTER is
cross-version safe. Specifically the OLD slave merely does not
recognized the start alter part, still being able to process and
memorize its gtid.

Two phase logged ALTER is read from binlog by mysqlbinlog to produce
BINLOG 'string', where 'string' contains base64 encoded
Query_log_event containing either the start part of ALTER, or a
completion part. The Query details can be displayed with `-v` flag,
similarly to ROW format events.  Notice, mysqlbinlog output containing
parts of two-phase binloggable ALTER is processable correctly only by
binlog_alter_two_phase server.

@@log_warnings > 2 can reveal details of binlogging and slave side
processing of the ALTER parts.

The current commit also carries fixes to the following list of
reported bugs:
MDEV-27511, MDEV-27471, MDEV-27349, MDEV-27628, MDEV-27528.

Thanks to all people involved into early discussion of the feature
including Kristian Nielsen, those who helped to design, implement and
test: Sergei Golubchik, Andrei Elkin who took the burden of the
implemenation completion, Sujatha Sivakumar, Brandon
Nesterenko, Alice Sherepa, Ramesh Sivaraman, Jan Lindstrom.
This commit is contained in:
Sachin 2021-01-29 11:59:14 +00:00 committed by Andrei
commit 0c5d1342ae
87 changed files with 8451 additions and 150 deletions

View file

@ -20,6 +20,7 @@
#include "sql_parse.h"
#include "sql_acl.h"
#include "rpl_rli.h"
#include "rpl_mi.h"
#include "slave.h"
#include "log_event.h"
@ -70,7 +71,8 @@ static int check_event_type(int type, Relay_log_info *rli)
/* It is always allowed to execute FD events. */
return 0;
case QUERY_EVENT:
case TABLE_MAP_EVENT:
case WRITE_ROWS_EVENT_V1:
case UPDATE_ROWS_EVENT_V1:
@ -167,6 +169,69 @@ int binlog_defragment(THD *thd)
return 0;
}
/**
Wraps Log_event::apply_event to save and restore
session context in case of Query_log_event.
@param ev replication event
@param rgi execution context for the event
@return
0 on success,
non-zero otherwise.
*/
#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
int save_restore_context_apply_event(Log_event *ev, rpl_group_info *rgi)
{
int err= 0;
THD *thd= rgi->thd;
Relay_log_info *rli= thd->rli_fake;
sql_digest_state *m_digest;
PSI_statement_locker *m_statement_psi;
LEX_CSTRING save_db;
my_thread_id m_thread_id= 0;
LEX_CSTRING connection_name= { STRING_WITH_LEN("BINLOG_BASE64_EVENT") };
DBUG_ASSERT(!rli->mi);
if (ev->get_type_code() == QUERY_EVENT)
{
m_digest= thd->m_digest;
m_statement_psi= thd->m_statement_psi;
m_thread_id= thd->variables.pseudo_thread_id;
thd->system_thread_info.rpl_sql_info= NULL;
if ((rli->mi= new Master_info(&connection_name, false)))
{
save_db= thd->db;
thd->reset_db(&null_clex_str);
}
else
{
my_error(ER_OUT_OF_RESOURCES, MYF(0));
err= -1;
goto end;
}
thd->m_digest= NULL;
thd->m_statement_psi= NULL;
}
err= ev->apply_event(rgi);
end:
if (ev->get_type_code() == QUERY_EVENT)
{
thd->m_digest= m_digest;
thd->m_statement_psi= m_statement_psi;
thd->variables.pseudo_thread_id= m_thread_id;
thd->reset_db(&save_db);
delete rli->mi;
rli->mi= NULL;
}
return err;
}
#endif
/**
Execute a BINLOG statement.
@ -216,11 +281,9 @@ void mysql_client_binlog_statement(THD* thd)
if (!(rgi= thd->rgi_fake))
rgi= thd->rgi_fake= new rpl_group_info(rli);
rgi->thd= thd;
const char *error= 0;
Log_event *ev = 0;
my_bool is_fragmented= FALSE;
/*
Out of memory check
*/
@ -373,7 +436,7 @@ void mysql_client_binlog_statement(THD* thd)
LEX *backup_lex;
thd->backup_and_reset_current_lex(&backup_lex);
err= ev->apply_event(rgi);
err= save_restore_context_apply_event(ev, rgi);
thd->restore_current_lex(backup_lex);
}
thd->variables.option_bits=
@ -389,7 +452,7 @@ void mysql_client_binlog_statement(THD* thd)
i.e. when this thread terminates.
*/
if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
delete ev;
delete ev;
ev= 0;
if (err)
{
@ -397,7 +460,8 @@ void mysql_client_binlog_statement(THD* thd)
TODO: Maybe a better error message since the BINLOG statement
now contains several events.
*/
my_error(ER_UNKNOWN_ERROR, MYF(0));
if (!thd->is_error())
my_error(ER_UNKNOWN_ERROR, MYF(0));
goto end;
}
}
@ -413,5 +477,7 @@ end:
thd->variables.option_bits= thd_options;
rgi->slave_close_thread_tables(thd);
my_free(buf);
delete rgi;
rgi= thd->rgi_fake= NULL;
DBUG_VOID_RETURN;
}