2012-03-28 01:04:46 +02:00
|
|
|
/* Copyright (c) 2006, 2012, Oracle and/or its affiliates.
|
2006-10-31 16:51:51 +01:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
2006-12-27 02:23:51 +01:00
|
|
|
the Free Software Foundation; version 2 of the License.
|
2006-10-31 16:51:51 +01:00
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
2019-05-11 20:29:06 +02:00
|
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
|
2006-10-31 16:51:51 +01:00
|
|
|
|
|
|
|
#ifndef RPL_MI_H
|
|
|
|
#define RPL_MI_H
|
|
|
|
|
|
|
|
#ifdef HAVE_REPLICATION
|
|
|
|
|
2007-04-12 08:58:04 +02:00
|
|
|
#include "rpl_rli.h"
|
2007-06-11 22:15:39 +02:00
|
|
|
#include "rpl_reporting.h"
|
2017-06-18 05:42:16 +02:00
|
|
|
#include <my_sys.h>
|
2013-04-16 13:43:28 +02:00
|
|
|
#include "rpl_filter.h"
|
|
|
|
#include "keycaches.h"
|
2007-04-12 08:58:04 +02:00
|
|
|
|
2010-03-31 16:05:33 +02:00
|
|
|
typedef struct st_mysql MYSQL;
|
2007-04-12 08:58:04 +02:00
|
|
|
|
2014-12-04 04:30:48 +01:00
|
|
|
/**
|
|
|
|
Domain id based filter to handle DO_DOMAIN_IDS and IGNORE_DOMAIN_IDS used to
|
|
|
|
set filtering on replication slave based on event's GTID domain_id.
|
|
|
|
*/
|
|
|
|
class Domain_id_filter
|
|
|
|
{
|
|
|
|
private:
|
|
|
|
/*
|
|
|
|
Flag to tell whether the events in the current GTID group get written to
|
|
|
|
the relay log. It is set according to the domain_id based filtering rule
|
|
|
|
on every GTID_EVENT and reset at the end of current GTID event group.
|
|
|
|
*/
|
|
|
|
bool m_filter;
|
|
|
|
|
2021-04-16 05:18:15 +02:00
|
|
|
public:
|
|
|
|
/* domain id list types */
|
|
|
|
enum enum_list_type {
|
|
|
|
DO_DOMAIN_IDS= 0,
|
|
|
|
IGNORE_DOMAIN_IDS
|
|
|
|
};
|
|
|
|
|
2014-12-04 04:30:48 +01:00
|
|
|
/*
|
|
|
|
DO_DOMAIN_IDS (0):
|
|
|
|
Ignore all the events which do not belong to any of the domain ids in the
|
|
|
|
list.
|
|
|
|
|
|
|
|
IGNORE_DOMAIN_IDS (1):
|
|
|
|
Ignore the events which belong to one of the domain ids in the list.
|
|
|
|
*/
|
|
|
|
DYNAMIC_ARRAY m_domain_ids[2];
|
|
|
|
|
|
|
|
Domain_id_filter();
|
|
|
|
|
|
|
|
~Domain_id_filter();
|
|
|
|
|
|
|
|
/*
|
|
|
|
Returns whether the current group needs to be filtered.
|
|
|
|
*/
|
|
|
|
bool is_group_filtered() { return m_filter; }
|
|
|
|
|
|
|
|
/*
|
|
|
|
Checks whether the group with the specified domain_id needs to be
|
|
|
|
filtered and updates m_filter flag accordingly.
|
|
|
|
*/
|
|
|
|
void do_filter(ulong domain_id);
|
|
|
|
|
|
|
|
/*
|
|
|
|
Reset m_filter. It should be called when IO thread receives COMMIT_EVENT or
|
|
|
|
XID_EVENT.
|
|
|
|
*/
|
|
|
|
void reset_filter();
|
|
|
|
|
MDEV-25284: Assertion `info->type == READ_CACHE || info->type == WRITE_CACHE' failed
Problem:
========
This patch addresses two issues.
First, if a CHANGE MASTER command is issued and an error happens
while locating the replica’s relay logs, the logs can be put into an
invalid state where future updates fail and future CHANGE MASTER
calls crash the server. More specifically, right before a replica
purges the relay logs (part of the `CHANGE MASTER TO` logic), the
relay log is temporarily closed with state LOG_TO_BE_OPENED. If the
server errors in-between the temporary log closure and purge, i.e.
during the function find_log_pos, the log should be closed.
MDEV-25284 reveals the log is not properly closed.
Second, upon issuing a RESET SLAVE ALL command, a slave’s GTID
filters are not cleared (DO_DOMAIN_IDS, IGNORE_DOMIAN_IDS,
IGNORE_SERVER_IDS). MySQL had a similar bug report, Bug #18816897,
which fixed this issue to clear IGNORE_SERVER_IDS after issuing
RESET SLAVE ALL in version 5.7.
Solution:
=========
To fix the first problem, the CHANGE MASTER error handling logic was
extended to transition the relay log state to LOG_CLOSED from
LOG_TO_BE_OPENED.
To fix the second problem, the RESET SLAVE ALL logic is extended to
clear the domain_id filter and ignore_server_ids.
Reviewed By:
============
Andrei Elkin <andrei.elkin@mariadb.com>
2021-10-13 15:31:32 +02:00
|
|
|
/*
|
|
|
|
Clear do_ids and ignore_ids to disable domain id filtering
|
|
|
|
*/
|
|
|
|
void clear_ids();
|
|
|
|
|
2014-12-04 04:30:48 +01:00
|
|
|
/*
|
|
|
|
Update the do/ignore domain id filter lists.
|
|
|
|
|
|
|
|
@param do_ids [IN] domain ids to be kept
|
|
|
|
@param ignore_ids [IN] domain ids to be filtered out
|
|
|
|
@param using_gtid [IN] use GTID?
|
|
|
|
|
|
|
|
@retval false Success
|
|
|
|
true Error
|
|
|
|
*/
|
|
|
|
bool update_ids(DYNAMIC_ARRAY *do_ids, DYNAMIC_ARRAY *ignore_ids,
|
|
|
|
bool using_gtid);
|
|
|
|
|
|
|
|
/*
|
|
|
|
Serialize and store the ids from domain id lists into the thd's protocol
|
|
|
|
buffer.
|
|
|
|
|
|
|
|
@param thd [IN] thread handler
|
|
|
|
|
|
|
|
@retval void
|
|
|
|
*/
|
|
|
|
void store_ids(THD *thd);
|
|
|
|
|
|
|
|
/*
|
|
|
|
Initialize the given domain id list (DYNAMIC_ARRAY) with the
|
|
|
|
space-separated list of numbers from the specified IO_CACHE where
|
|
|
|
the first number is the total number of entries to follows.
|
|
|
|
|
|
|
|
@param f [IN] IO_CACHE file
|
|
|
|
@param type [IN] domain id list type
|
|
|
|
|
|
|
|
@retval false Success
|
|
|
|
true Error
|
|
|
|
*/
|
|
|
|
bool init_ids(IO_CACHE *f, enum_list_type type);
|
|
|
|
|
|
|
|
/*
|
|
|
|
Return the elements of the give domain id list type as string.
|
|
|
|
|
|
|
|
@param type [IN] domain id list type
|
|
|
|
|
|
|
|
@retval a string buffer storing the total number
|
|
|
|
of elements followed by the individual
|
|
|
|
elements (space-separated) in the
|
|
|
|
specified list.
|
|
|
|
|
|
|
|
Note: Its caller's responsibility to free the returned string buffer.
|
|
|
|
*/
|
|
|
|
char *as_string(enum_list_type type);
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2014-12-05 16:09:48 +01:00
|
|
|
extern TYPELIB slave_parallel_mode_typelib;
|
|
|
|
|
2018-01-11 22:56:54 +01:00
|
|
|
typedef struct st_rows_event_tracker
|
|
|
|
{
|
|
|
|
char binlog_file_name[FN_REFLEN];
|
|
|
|
my_off_t first_seen;
|
|
|
|
my_off_t last_seen;
|
|
|
|
bool stmt_end_seen;
|
2021-01-26 01:20:05 +01:00
|
|
|
void update(const char *file_name, my_off_t pos,
|
|
|
|
const uchar *buf,
|
2018-01-11 22:56:54 +01:00
|
|
|
const Format_description_log_event *fdle);
|
|
|
|
void reset();
|
2018-01-13 19:27:46 +01:00
|
|
|
bool check_and_report(const char* file_name, my_off_t pos);
|
2018-01-11 22:56:54 +01:00
|
|
|
} Rows_event_tracker;
|
|
|
|
|
2006-10-31 16:51:51 +01:00
|
|
|
/*****************************************************************************
|
|
|
|
Replication IO Thread
|
|
|
|
|
2007-08-16 08:52:50 +02:00
|
|
|
Master_info contains:
|
2006-10-31 16:51:51 +01:00
|
|
|
- information about how to connect to a master
|
|
|
|
- current master log name
|
|
|
|
- current master log offset
|
|
|
|
- misc control variables
|
|
|
|
|
2007-08-16 08:52:50 +02:00
|
|
|
Master_info is initialized once from the master.info file if such
|
2006-10-31 16:51:51 +01:00
|
|
|
exists. Otherwise, data members corresponding to master.info fields
|
|
|
|
are initialized with defaults specified by master-* options. The
|
|
|
|
initialization is done through init_master_info() call.
|
|
|
|
|
|
|
|
The format of master.info file:
|
|
|
|
|
|
|
|
log_name
|
|
|
|
log_pos
|
|
|
|
master_host
|
|
|
|
master_user
|
|
|
|
master_pass
|
|
|
|
master_port
|
|
|
|
master_connect_retry
|
|
|
|
|
|
|
|
To write out the contents of master.info file to disk ( needed every
|
|
|
|
time we read and queue data from the master ), a call to
|
|
|
|
flush_master_info() is required.
|
|
|
|
|
|
|
|
To clean up, call end_master_info()
|
|
|
|
|
|
|
|
*****************************************************************************/
|
|
|
|
|
2007-08-16 08:52:50 +02:00
|
|
|
class Master_info : public Slave_reporting_capability
|
2006-10-31 16:51:51 +01:00
|
|
|
{
|
|
|
|
public:
|
MDEV-26: Global transaction ID.
Fix problems related to reconnect. When we need to reconnect (ie. explict
stop/start of just the IO thread by user, or automatic reconnect due to
loosing network connection with the master), it is a bit complex to correctly
resume at the right point without causing duplicate or missing events in the
relay log. The previous code had multiple problems in this regard.
With this patch, the problem is solved as follows. The IO thread keeps track
(in memory) of which GTID was last queued to the relay log. If it needs to
reconnect, it resumes at that GTID position. It also counts number of events
received within the last, possibly partial, event group, and skips the same
number of events after a reconnect, so that events already enqueued before the
reconnect are not duplicated.
(There is no need to keep any persistent state; whenever we restart slave
threads after both of them being stopped (such as after server restart), we
erase the relay logs and start over from the last GTID applied by SQL thread.
But while the SQL thread is running, this patch is needed to get correct relay
log).
2013-06-05 14:32:47 +02:00
|
|
|
enum enum_using_gtid {
|
|
|
|
USE_GTID_NO= 0, USE_GTID_CURRENT_POS= 1, USE_GTID_SLAVE_POS= 2
|
|
|
|
};
|
|
|
|
|
2017-04-23 18:39:57 +02:00
|
|
|
Master_info(LEX_CSTRING *connection_name, bool is_slave_recovery);
|
2007-08-16 08:52:50 +02:00
|
|
|
~Master_info();
|
2009-10-01 18:44:53 +02:00
|
|
|
bool shall_ignore_server_id(ulong s_id);
|
BUG#11809016 - NO WAY TO DISCOVER AN INSTANCE IS NO LONGER A SLAVE FOLLOWING MYSQL BUG#28796
Before BUG#28796, an empty host was used to identify that an instance was no
longer a slave. However, BUG#28796 changed this behavior and one cannot set
an empty host. Besides, a RESET SLAVE only cleans up information on the next
event to retrieve from the master, disables ssl and resets heartbeat period.
So a call to SHOW SLAVE STATUS after issuing a RESET SLAVE still returns some
valid information, such as host, port, user and password.
To fix this problem, we have introduced the command RESET SLAVE ALL that does
what a regular RESET SLAVE does and also clears host, port, user and password
information thus allowing users to identify when an instance is no longer a
slave.
2011-07-18 19:18:03 +02:00
|
|
|
void clear_in_memory_info(bool all);
|
2012-09-28 01:06:56 +02:00
|
|
|
bool error()
|
|
|
|
{
|
|
|
|
/* If malloc() in initialization failed */
|
|
|
|
return connection_name.str == 0;
|
|
|
|
}
|
MDEV-26: Global transaction ID.
Fix problems related to reconnect. When we need to reconnect (ie. explict
stop/start of just the IO thread by user, or automatic reconnect due to
loosing network connection with the master), it is a bit complex to correctly
resume at the right point without causing duplicate or missing events in the
relay log. The previous code had multiple problems in this regard.
With this patch, the problem is solved as follows. The IO thread keeps track
(in memory) of which GTID was last queued to the relay log. If it needs to
reconnect, it resumes at that GTID position. It also counts number of events
received within the last, possibly partial, event group, and skips the same
number of events after a reconnect, so that events already enqueued before the
reconnect are not duplicated.
(There is no need to keep any persistent state; whenever we restart slave
threads after both of them being stopped (such as after server restart), we
erase the relay logs and start over from the last GTID applied by SQL thread.
But while the SQL thread is running, this patch is needed to get correct relay
log).
2013-06-05 14:32:47 +02:00
|
|
|
static const char *using_gtid_astext(enum enum_using_gtid arg);
|
2014-12-05 16:09:48 +01:00
|
|
|
bool using_parallel()
|
|
|
|
{
|
2015-02-06 10:02:02 +01:00
|
|
|
return opt_slave_parallel_threads > 0 &&
|
|
|
|
parallel_mode > SLAVE_PARALLEL_NONE;
|
2014-12-05 16:09:48 +01:00
|
|
|
}
|
2017-01-29 21:10:56 +01:00
|
|
|
void release();
|
|
|
|
void wait_until_free();
|
2017-01-29 22:44:24 +01:00
|
|
|
void lock_slave_threads();
|
|
|
|
void unlock_slave_threads();
|
2006-10-31 16:51:51 +01:00
|
|
|
|
|
|
|
/* the variables below are needed because we can change masters on the fly */
|
2012-10-03 00:44:54 +02:00
|
|
|
char master_log_name[FN_REFLEN+6]; /* Room for multi-*/
|
2013-01-10 23:35:33 +01:00
|
|
|
char host[HOSTNAME_LENGTH*SYSTEM_CHARSET_MBMAXLEN+1];
|
2013-04-18 22:17:29 +02:00
|
|
|
char user[USERNAME_LENGTH+1];
|
2013-01-10 23:35:33 +01:00
|
|
|
char password[MAX_PASSWORD_LENGTH*SYSTEM_CHARSET_MBMAXLEN+1];
|
2017-04-23 18:39:57 +02:00
|
|
|
LEX_CSTRING connection_name; /* User supplied connection name */
|
|
|
|
LEX_CSTRING cmp_connection_name; /* Connection name in lower case */
|
2010-09-24 00:00:32 +02:00
|
|
|
bool ssl; // enables use of SSL connection if true
|
2006-10-31 16:51:51 +01:00
|
|
|
char ssl_ca[FN_REFLEN], ssl_capath[FN_REFLEN], ssl_cert[FN_REFLEN];
|
|
|
|
char ssl_cipher[FN_REFLEN], ssl_key[FN_REFLEN];
|
2012-08-14 16:23:34 +02:00
|
|
|
char ssl_crl[FN_REFLEN], ssl_crlpath[FN_REFLEN];
|
2010-09-24 00:00:32 +02:00
|
|
|
bool ssl_verify_server_cert;
|
2006-10-31 16:51:51 +01:00
|
|
|
|
|
|
|
my_off_t master_log_pos;
|
|
|
|
File fd; // we keep the file open, so we need to remember the file pointer
|
|
|
|
IO_CACHE file;
|
|
|
|
|
MDEV-11675 Lag Free Alter On Slave
This commit implements two phase binloggable ALTER.
When a new
@@session.binlog_alter_two_phase = YES
ALTER query gets logged in two parts, the START ALTER and the COMMIT
or ROLLBACK ALTER. START Alter is written in binlog as soon as
necessary locks have been acquired for the table. The timing is
such that any concurrent DML:s that update the same table are either
committed, thus logged into binary log having done work on the old
version of the table, or will be queued for execution on its new
version.
The "COMPLETE" COMMIT or ROLLBACK ALTER are written at the very point
of a normal "single-piece" ALTER that is after the most of
the query work is done. When its result is positive COMMIT ALTER is
written, otherwise ROLLBACK ALTER is written with specific error
happened after START ALTER phase.
Replication of two-phase binloggable ALTER is
cross-version safe. Specifically the OLD slave merely does not
recognized the start alter part, still being able to process and
memorize its gtid.
Two phase logged ALTER is read from binlog by mysqlbinlog to produce
BINLOG 'string', where 'string' contains base64 encoded
Query_log_event containing either the start part of ALTER, or a
completion part. The Query details can be displayed with `-v` flag,
similarly to ROW format events. Notice, mysqlbinlog output containing
parts of two-phase binloggable ALTER is processable correctly only by
binlog_alter_two_phase server.
@@log_warnings > 2 can reveal details of binlogging and slave side
processing of the ALTER parts.
The current commit also carries fixes to the following list of
reported bugs:
MDEV-27511, MDEV-27471, MDEV-27349, MDEV-27628, MDEV-27528.
Thanks to all people involved into early discussion of the feature
including Kristian Nielsen, those who helped to design, implement and
test: Sergei Golubchik, Andrei Elkin who took the burden of the
implemenation completion, Sujatha Sivakumar, Brandon
Nesterenko, Alice Sherepa, Ramesh Sivaraman, Jan Lindstrom.
2021-01-29 12:59:14 +01:00
|
|
|
mysql_mutex_t data_lock, run_lock, sleep_lock, start_stop_lock, start_alter_lock, start_alter_list_lock;
|
2012-01-23 13:09:37 +01:00
|
|
|
mysql_cond_t data_cond, start_cond, stop_cond, sleep_cond;
|
2006-10-31 16:51:51 +01:00
|
|
|
THD *io_thd;
|
|
|
|
MYSQL* mysql;
|
|
|
|
uint32 file_id; /* for 3.23 load data infile */
|
2007-08-16 07:37:50 +02:00
|
|
|
Relay_log_info rli;
|
2006-10-31 16:51:51 +01:00
|
|
|
uint port;
|
2013-04-16 13:43:28 +02:00
|
|
|
Rpl_filter* rpl_filter; /* Each replication can set its filter rule*/
|
2011-05-03 14:01:11 +02:00
|
|
|
/*
|
|
|
|
to hold checksum alg in use until IO thread has received FD.
|
|
|
|
Initialized to novalue, then set to the queried from master
|
|
|
|
@@global.binlog_checksum and deactivated once FD has been received.
|
|
|
|
*/
|
2015-08-30 15:03:55 +02:00
|
|
|
enum enum_binlog_checksum_alg checksum_alg_before_fd;
|
2006-10-31 16:51:51 +01:00
|
|
|
uint connect_retry;
|
|
|
|
#ifndef DBUG_OFF
|
|
|
|
int events_till_disconnect;
|
2014-12-04 04:30:48 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
The following are auxiliary DBUG variables used to kill IO thread in the
|
|
|
|
middle of a group/transaction (see "kill_slave_io_after_2_events").
|
|
|
|
*/
|
|
|
|
bool dbug_do_disconnect;
|
|
|
|
int dbug_event_counter;
|
2006-10-31 16:51:51 +01:00
|
|
|
#endif
|
|
|
|
bool inited;
|
|
|
|
volatile bool abort_slave;
|
|
|
|
volatile uint slave_running;
|
|
|
|
volatile ulong slave_run_id;
|
|
|
|
/*
|
|
|
|
The difference in seconds between the clock of the master and the clock of
|
|
|
|
the slave (second - first). It must be signed as it may be <0 or >0.
|
|
|
|
clock_diff_with_master is computed when the I/O thread starts; for this the
|
|
|
|
I/O thread does a SELECT UNIX_TIMESTAMP() on the master.
|
|
|
|
"how late the slave is compared to the master" is computed like this:
|
|
|
|
clock_of_slave - last_timestamp_executed_by_SQL_thread - clock_diff_with_master
|
|
|
|
|
|
|
|
*/
|
|
|
|
long clock_diff_with_master;
|
BUG#40337 Fsyncing master and relay log to disk after every event is too slow
NOTE: Backporting the patch to next-mr.
The fix proposed in BUG#35542 and BUG#31665 introduces a performance issue
when fsyncing the master.info, relay.info and relay-log.bin* after #th events.
Although such solution has been proposed to reduce the probability of corrupted
files due to a slave-crash, the performance penalty introduced by it has
made the approach impractical for highly intensive workloads.
In a nutshell, the option --syn-relay-log proposed in BUG#35542 and BUG#31665
simultaneously fsyncs master.info, relay-log.info and relay-log.bin* and
this is the main source of performance issues.
This patch introduces new options that give more control to the user on
what should be fsynced and how often:
1) (--sync-master-info, integer) which syncs the master.info after #th event;
2) (--sync-relay-log, integer) which syncs the relay-log.bin* after #th
events.
3) (--sync-relay-log-info, integer) which syncs the relay.info after #th
transactions.
To provide both performance and increased reliability, we recommend the following
setup:
1) --sync-master-info = 0 eventually the operating system will fsync it;
2) --sync-relay-log = 0 eventually the operating system will fsync it;
3) --sync-relay-log-info = 1 fsyncs it after every transaction;
Notice, that the previous setup does not reduce the probability of
corrupted master.info and relay-log.bin*. To overcome the issue, this patch also
introduces a recovery mechanism that right after restart throws away relay-log.bin*
retrieved from a master and updates the master.info based on the relay.info:
4) (--relay-log-recovery, boolean) which enables a recovery mechanism that
throws away relay-log.bin* after a crash.
However, it can only recover the incorrect binlog file and position in master.info,
if other informations (host, port password, etc) are corrupted or incorrect,
then this recovery mechanism will fail to work.
2009-09-29 16:40:52 +02:00
|
|
|
/*
|
|
|
|
Keeps track of the number of events before fsyncing.
|
|
|
|
The option --sync-master-info determines how many
|
|
|
|
events should happen before fsyncing.
|
|
|
|
*/
|
|
|
|
uint sync_counter;
|
2009-10-01 18:44:53 +02:00
|
|
|
float heartbeat_period; // interface with CHANGE MASTER or master.info
|
|
|
|
ulonglong received_heartbeats; // counter of received heartbeat events
|
|
|
|
DYNAMIC_ARRAY ignore_server_ids;
|
|
|
|
ulong master_id;
|
2014-09-02 14:07:01 +02:00
|
|
|
/*
|
|
|
|
At reconnect and until the first rotate event is seen, prev_master_id is
|
|
|
|
the value of master_id during the previous connection, used to detect
|
|
|
|
silent change of master server during reconnects.
|
|
|
|
*/
|
|
|
|
ulong prev_master_id;
|
2013-03-21 11:03:31 +01:00
|
|
|
/*
|
2013-05-22 17:36:48 +02:00
|
|
|
Which kind of GTID position (if any) is used when connecting to master.
|
|
|
|
|
|
|
|
Note that you can not change the numeric values of these, they are used
|
|
|
|
in master.info.
|
2013-03-21 11:03:31 +01:00
|
|
|
*/
|
MDEV-26: Global transaction ID.
Fix problems related to reconnect. When we need to reconnect (ie. explict
stop/start of just the IO thread by user, or automatic reconnect due to
loosing network connection with the master), it is a bit complex to correctly
resume at the right point without causing duplicate or missing events in the
relay log. The previous code had multiple problems in this regard.
With this patch, the problem is solved as follows. The IO thread keeps track
(in memory) of which GTID was last queued to the relay log. If it needs to
reconnect, it resumes at that GTID position. It also counts number of events
received within the last, possibly partial, event group, and skips the same
number of events after a reconnect, so that events already enqueued before the
reconnect are not duplicated.
(There is no need to keep any persistent state; whenever we restart slave
threads after both of them being stopped (such as after server restart), we
erase the relay logs and start over from the last GTID applied by SQL thread.
But while the SQL thread is running, this patch is needed to get correct relay
log).
2013-06-05 14:32:47 +02:00
|
|
|
enum enum_using_gtid using_gtid;
|
|
|
|
|
|
|
|
/*
|
|
|
|
This GTID position records how far we have fetched into the relay logs.
|
|
|
|
This is used to continue fetching when the IO thread reconnects to the
|
|
|
|
master.
|
|
|
|
|
|
|
|
(Full slave stop/start does not use it, as it resets the relay logs).
|
|
|
|
*/
|
|
|
|
slave_connection_state gtid_current_pos;
|
|
|
|
/*
|
|
|
|
If events_queued_since_last_gtid is non-zero, it is the number of events
|
|
|
|
queued so far in the relaylog of a GTID-prefixed event group.
|
|
|
|
It is zero when no partial event group has been queued at the moment.
|
|
|
|
*/
|
|
|
|
uint64 events_queued_since_last_gtid;
|
|
|
|
/*
|
|
|
|
The GTID of the partially-queued event group, when
|
|
|
|
events_queued_since_last_gtid is non-zero.
|
|
|
|
*/
|
|
|
|
rpl_gtid last_queued_gtid;
|
2013-10-11 11:21:18 +02:00
|
|
|
/* Whether last_queued_gtid had the FL_STANDALONE flag set. */
|
|
|
|
bool last_queued_gtid_standalone;
|
MDEV-26: Global transaction ID.
Fix problems related to reconnect. When we need to reconnect (ie. explict
stop/start of just the IO thread by user, or automatic reconnect due to
loosing network connection with the master), it is a bit complex to correctly
resume at the right point without causing duplicate or missing events in the
relay log. The previous code had multiple problems in this regard.
With this patch, the problem is solved as follows. The IO thread keeps track
(in memory) of which GTID was last queued to the relay log. If it needs to
reconnect, it resumes at that GTID position. It also counts number of events
received within the last, possibly partial, event group, and skips the same
number of events after a reconnect, so that events already enqueued before the
reconnect are not duplicated.
(There is no need to keep any persistent state; whenever we restart slave
threads after both of them being stopped (such as after server restart), we
erase the relay logs and start over from the last GTID applied by SQL thread.
But while the SQL thread is running, this patch is needed to get correct relay
log).
2013-06-05 14:32:47 +02:00
|
|
|
/*
|
|
|
|
When slave IO thread needs to reconnect, gtid_reconnect_event_skip_count
|
|
|
|
counts number of events to skip from the first GTID-prefixed event group,
|
|
|
|
to avoid duplicating events in the relay log.
|
|
|
|
*/
|
|
|
|
uint64 gtid_reconnect_event_skip_count;
|
|
|
|
/* gtid_event_seen is false until we receive first GTID event from master. */
|
|
|
|
bool gtid_event_seen;
|
2018-01-11 22:56:54 +01:00
|
|
|
/**
|
|
|
|
The struct holds some history of Rows- log-event reading/queuing
|
|
|
|
by the receiver thread. Its fields are updated per each such event
|
|
|
|
at time of queue_event(), and they are checked to detect
|
|
|
|
the Rows- event group integrity violation at time of first non-Rows-
|
|
|
|
event gets handled.
|
|
|
|
*/
|
|
|
|
Rows_event_tracker rows_event_tracker;
|
2017-01-29 21:10:56 +01:00
|
|
|
bool in_start_all_slaves, in_stop_all_slaves;
|
2017-06-05 09:40:24 +02:00
|
|
|
bool in_flush_all_relay_logs;
|
2017-01-29 21:10:56 +01:00
|
|
|
uint users; /* Active user for object */
|
|
|
|
uint killed;
|
2014-12-04 04:30:48 +01:00
|
|
|
|
2018-04-19 11:01:20 +02:00
|
|
|
|
|
|
|
/* No of DDL event group */
|
2018-12-28 15:51:13 +01:00
|
|
|
Atomic_counter<uint64> total_ddl_groups;
|
2018-04-19 11:01:20 +02:00
|
|
|
|
|
|
|
/* No of non-transactional event group*/
|
2018-12-28 15:51:13 +01:00
|
|
|
Atomic_counter<uint64> total_non_trans_groups;
|
2018-04-19 11:01:20 +02:00
|
|
|
|
|
|
|
/* No of transactional event group*/
|
2018-12-28 15:51:13 +01:00
|
|
|
Atomic_counter<uint64> total_trans_groups;
|
2018-04-19 11:01:20 +02:00
|
|
|
|
2014-12-04 04:30:48 +01:00
|
|
|
/* domain-id based filter */
|
|
|
|
Domain_id_filter domain_id_filter;
|
2014-12-05 16:09:48 +01:00
|
|
|
|
2015-02-06 10:02:02 +01:00
|
|
|
/* The parallel replication mode. */
|
|
|
|
enum_slave_parallel_mode parallel_mode;
|
2017-11-22 16:10:34 +01:00
|
|
|
/*
|
|
|
|
semi_ack is used to identify if the current binlog event needs an
|
|
|
|
ACK from slave, or if delay_master is enabled.
|
|
|
|
*/
|
|
|
|
int semi_ack;
|
2022-06-30 14:46:19 +02:00
|
|
|
/*
|
|
|
|
The flag has replicate_same_server_id semantics and is raised to accept
|
|
|
|
a same-server-id event group by the gtid strict mode semisync slave.
|
|
|
|
Own server-id events can normally appear as result of EITHER
|
|
|
|
A. this server semisync (failover to) slave crash-recovery:
|
|
|
|
the transaction was created on this server then being master,
|
|
|
|
got replicated elsewhere right before the crash before commit,
|
|
|
|
and finally at recovery the transaction gets evicted from the
|
|
|
|
server's binlog and its gtid (slave) state; OR
|
|
|
|
B. in a general circular configuration and then when a recieved (returned
|
|
|
|
to slave) gtid exists in the server's binlog. Then, in gtid strict mode,
|
|
|
|
it must be ignored similarly to the replicate-same-server-id rule.
|
|
|
|
*/
|
2022-07-28 09:33:26 +02:00
|
|
|
bool do_accept_own_server_id= false;
|
MDEV-11675 Lag Free Alter On Slave
This commit implements two phase binloggable ALTER.
When a new
@@session.binlog_alter_two_phase = YES
ALTER query gets logged in two parts, the START ALTER and the COMMIT
or ROLLBACK ALTER. START Alter is written in binlog as soon as
necessary locks have been acquired for the table. The timing is
such that any concurrent DML:s that update the same table are either
committed, thus logged into binary log having done work on the old
version of the table, or will be queued for execution on its new
version.
The "COMPLETE" COMMIT or ROLLBACK ALTER are written at the very point
of a normal "single-piece" ALTER that is after the most of
the query work is done. When its result is positive COMMIT ALTER is
written, otherwise ROLLBACK ALTER is written with specific error
happened after START ALTER phase.
Replication of two-phase binloggable ALTER is
cross-version safe. Specifically the OLD slave merely does not
recognized the start alter part, still being able to process and
memorize its gtid.
Two phase logged ALTER is read from binlog by mysqlbinlog to produce
BINLOG 'string', where 'string' contains base64 encoded
Query_log_event containing either the start part of ALTER, or a
completion part. The Query details can be displayed with `-v` flag,
similarly to ROW format events. Notice, mysqlbinlog output containing
parts of two-phase binloggable ALTER is processable correctly only by
binlog_alter_two_phase server.
@@log_warnings > 2 can reveal details of binlogging and slave side
processing of the ALTER parts.
The current commit also carries fixes to the following list of
reported bugs:
MDEV-27511, MDEV-27471, MDEV-27349, MDEV-27628, MDEV-27528.
Thanks to all people involved into early discussion of the feature
including Kristian Nielsen, those who helped to design, implement and
test: Sergei Golubchik, Andrei Elkin who took the burden of the
implemenation completion, Sujatha Sivakumar, Brandon
Nesterenko, Alice Sherepa, Ramesh Sivaraman, Jan Lindstrom.
2021-01-29 12:59:14 +01:00
|
|
|
List <start_alter_info> start_alter_list;
|
|
|
|
MEM_ROOT mem_root;
|
|
|
|
/*
|
|
|
|
Flag is raised at the parallel worker slave stop. Its purpose
|
|
|
|
is to mark the whole start_alter_list when slave stops.
|
|
|
|
The flag is read by Start Alter event to self-mark its state accordingly
|
|
|
|
at time its alter info struct is about to be appened to the list.
|
|
|
|
*/
|
2022-07-28 09:33:26 +02:00
|
|
|
bool is_shutdown= false;
|
MDEV-19801: Change defaults for CHANGE MASTER TO so that GTID-based replication is used by default if master supports it
This commit makes replicas crash-safe by default by changing the
Using_Gtid value to be Slave_Pos on a fresh slave start and after
RESET SLAVE is issued. If the primary server does not support GTIDs
(i.e., version < 10), the replica will fall back to Using_Gtid=No on
slave start and after RESET SLAVE.
The following additional informational messages/warnings are added:
1. When Using_Gtid is automatically changed. That is, if RESET
SLAVE reverts Using_Gtid back to Slave_Pos, or Using_Gtid is
inferred to No from a CHANGE MASTER TO given with log coordinates
without MASTER_USE_GTID.
2. If options are ignored in CHANGE MASTER TO. If CHANGE MASTER TO
is given with log coordinates, yet also specifies
MASTER_USE_GTID=Slave_Pos, a warning message is given that the log
coordinate options are ignored.
Additionally, an MTR macro has been added for RESET SLAVE,
reset_slave.inc, which provides modes/options for resetting a slave
in log coordinate or gtid modes. When in log coordinates mode, the
macro will execute CHANGE MASTER TO MASTER_USE_GTID=No after the
RESET SLAVE command. When in GTID mode, an extra parameter,
reset_slave_keep_gtid_state, can be set to reset or preserve the
value of gtid_slave_pos.
Reviewed By:
===========
Andrei Elkin <andrei.elkin@mariadb.com>
2022-05-23 22:14:00 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
A replica will default to Slave_Pos for using Using_Gtid; however, we
|
|
|
|
first need to test if the master supports GTIDs. If not, fall back to 'No'.
|
|
|
|
Cache the value so future RESET SLAVE commands don't revert to Slave_Pos.
|
|
|
|
*/
|
2022-07-28 10:25:21 +02:00
|
|
|
bool master_supports_gtid= true;
|
2022-06-08 04:06:42 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
When TRUE, transition this server from being an active master to a slave.
|
|
|
|
This updates the replication state to account for any transactions which
|
|
|
|
were committed into the binary log. In particular, it merges
|
|
|
|
gtid_binlog_pos into gtid_slave_pos.
|
|
|
|
*/
|
2022-07-28 10:25:21 +02:00
|
|
|
bool is_demotion= false;
|
MDEV-11675 Lag Free Alter On Slave
This commit implements two phase binloggable ALTER.
When a new
@@session.binlog_alter_two_phase = YES
ALTER query gets logged in two parts, the START ALTER and the COMMIT
or ROLLBACK ALTER. START Alter is written in binlog as soon as
necessary locks have been acquired for the table. The timing is
such that any concurrent DML:s that update the same table are either
committed, thus logged into binary log having done work on the old
version of the table, or will be queued for execution on its new
version.
The "COMPLETE" COMMIT or ROLLBACK ALTER are written at the very point
of a normal "single-piece" ALTER that is after the most of
the query work is done. When its result is positive COMMIT ALTER is
written, otherwise ROLLBACK ALTER is written with specific error
happened after START ALTER phase.
Replication of two-phase binloggable ALTER is
cross-version safe. Specifically the OLD slave merely does not
recognized the start alter part, still being able to process and
memorize its gtid.
Two phase logged ALTER is read from binlog by mysqlbinlog to produce
BINLOG 'string', where 'string' contains base64 encoded
Query_log_event containing either the start part of ALTER, or a
completion part. The Query details can be displayed with `-v` flag,
similarly to ROW format events. Notice, mysqlbinlog output containing
parts of two-phase binloggable ALTER is processable correctly only by
binlog_alter_two_phase server.
@@log_warnings > 2 can reveal details of binlogging and slave side
processing of the ALTER parts.
The current commit also carries fixes to the following list of
reported bugs:
MDEV-27511, MDEV-27471, MDEV-27349, MDEV-27628, MDEV-27528.
Thanks to all people involved into early discussion of the feature
including Kristian Nielsen, those who helped to design, implement and
test: Sergei Golubchik, Andrei Elkin who took the burden of the
implemenation completion, Sujatha Sivakumar, Brandon
Nesterenko, Alice Sherepa, Ramesh Sivaraman, Jan Lindstrom.
2021-01-29 12:59:14 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
struct start_alter_thd_args
|
|
|
|
{
|
|
|
|
rpl_group_info *rgi;
|
|
|
|
LEX_CSTRING query;
|
|
|
|
LEX_CSTRING *db;
|
|
|
|
char *catalog;
|
|
|
|
bool shutdown;
|
|
|
|
CHARSET_INFO *cs;
|
2006-10-31 16:51:51 +01:00
|
|
|
};
|
2014-12-04 04:30:48 +01:00
|
|
|
|
2007-08-16 08:52:50 +02:00
|
|
|
int init_master_info(Master_info* mi, const char* master_info_fname,
|
2006-10-31 16:51:51 +01:00
|
|
|
const char* slave_info_fname,
|
|
|
|
bool abort_if_no_master_info_file,
|
|
|
|
int thread_mask);
|
2007-08-16 08:52:50 +02:00
|
|
|
void end_master_info(Master_info* mi);
|
2010-02-03 17:56:17 +01:00
|
|
|
int flush_master_info(Master_info* mi,
|
|
|
|
bool flush_relay_log_cache,
|
|
|
|
bool need_lock_relay_log);
|
2013-04-16 13:43:28 +02:00
|
|
|
void copy_filter_setting(Rpl_filter* dst_filter, Rpl_filter* src_filter);
|
2014-12-04 04:30:48 +01:00
|
|
|
void update_change_master_ids(DYNAMIC_ARRAY *new_ids, DYNAMIC_ARRAY *old_ids);
|
|
|
|
void prot_store_ids(THD *thd, DYNAMIC_ARRAY *ids);
|
2006-10-31 16:51:51 +01:00
|
|
|
|
2012-09-28 01:06:56 +02:00
|
|
|
/*
|
|
|
|
Multi master are handled trough this struct.
|
|
|
|
Changes to this needs to be protected by LOCK_active_mi;
|
|
|
|
*/
|
|
|
|
|
|
|
|
class Master_info_index
|
|
|
|
{
|
|
|
|
private:
|
|
|
|
IO_CACHE index_file;
|
|
|
|
char index_file_name[FN_REFLEN];
|
|
|
|
|
|
|
|
public:
|
|
|
|
Master_info_index();
|
|
|
|
~Master_info_index();
|
|
|
|
|
|
|
|
HASH master_info_hash;
|
|
|
|
|
|
|
|
bool init_all_master_info();
|
2017-04-23 18:39:57 +02:00
|
|
|
bool write_master_name_to_index_file(LEX_CSTRING *connection_name,
|
2012-09-28 01:06:56 +02:00
|
|
|
bool do_sync);
|
|
|
|
|
2017-04-23 18:39:57 +02:00
|
|
|
bool check_duplicate_master_info(LEX_CSTRING *connection_name,
|
2012-09-28 01:06:56 +02:00
|
|
|
const char *host, uint port);
|
|
|
|
bool add_master_info(Master_info *mi, bool write_to_file);
|
2021-08-23 23:38:30 +02:00
|
|
|
bool remove_master_info(Master_info *mi, bool clear_log_files);
|
2017-04-23 18:39:57 +02:00
|
|
|
Master_info *get_master_info(const LEX_CSTRING *connection_name,
|
2013-06-15 17:32:08 +02:00
|
|
|
Sql_condition::enum_warning_level warning);
|
2012-10-03 00:44:54 +02:00
|
|
|
bool start_all_slaves(THD *thd);
|
|
|
|
bool stop_all_slaves(THD *thd);
|
2017-01-29 21:10:56 +01:00
|
|
|
void free_connections();
|
2017-06-05 09:40:24 +02:00
|
|
|
bool flush_all_relay_logs();
|
2012-09-28 01:06:56 +02:00
|
|
|
};
|
|
|
|
|
2014-04-25 12:58:31 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
The class rpl_io_thread_info is the THD::system_thread_info for the IO thread.
|
|
|
|
*/
|
|
|
|
class rpl_io_thread_info
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2017-04-23 18:39:57 +02:00
|
|
|
Master_info *get_master_info(const LEX_CSTRING *connection_name,
|
2017-01-29 21:10:56 +01:00
|
|
|
Sql_condition::enum_warning_level warning);
|
2017-04-23 18:39:57 +02:00
|
|
|
bool check_master_connection_name(LEX_CSTRING *name);
|
2012-10-23 11:19:42 +02:00
|
|
|
void create_logfile_name_with_suffix(char *res_file_name, size_t length,
|
2012-09-28 01:06:56 +02:00
|
|
|
const char *info_file,
|
2012-10-01 01:30:44 +02:00
|
|
|
bool append,
|
2017-04-23 18:39:57 +02:00
|
|
|
LEX_CSTRING *suffix);
|
2012-09-28 01:06:56 +02:00
|
|
|
|
|
|
|
uchar *get_key_master_info(Master_info *mi, size_t *length,
|
|
|
|
my_bool not_used __attribute__((unused)));
|
|
|
|
void free_key_master_info(Master_info *mi);
|
2017-04-23 10:49:58 +02:00
|
|
|
uint any_slave_sql_running(bool already_locked);
|
2017-01-29 21:10:56 +01:00
|
|
|
bool give_error_if_slave_running(bool already_lock);
|
2012-09-28 01:06:56 +02:00
|
|
|
|
2006-10-31 16:51:51 +01:00
|
|
|
#endif /* HAVE_REPLICATION */
|
|
|
|
#endif /* RPL_MI_H */
|