2012-03-28 01:04:46 +02:00
|
|
|
/* Copyright (c) 2006, 2012, Oracle and/or its affiliates.
|
2006-10-31 16:51:51 +01:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
2006-12-27 02:23:51 +01:00
|
|
|
the Free Software Foundation; version 2 of the License.
|
2006-10-31 16:51:51 +01:00
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
2011-06-30 17:46:53 +02:00
|
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
|
2006-10-31 16:51:51 +01:00
|
|
|
|
|
|
|
#ifndef RPL_MI_H
|
|
|
|
#define RPL_MI_H
|
|
|
|
|
|
|
|
#ifdef HAVE_REPLICATION
|
|
|
|
|
2007-04-12 08:58:04 +02:00
|
|
|
#include "rpl_rli.h"
|
2007-06-11 22:15:39 +02:00
|
|
|
#include "rpl_reporting.h"
|
2009-10-01 19:44:53 +03:00
|
|
|
#include "my_sys.h"
|
2013-04-16 19:43:28 +08:00
|
|
|
#include "rpl_filter.h"
|
|
|
|
#include "keycaches.h"
|
2007-04-12 08:58:04 +02:00
|
|
|
|
2010-03-31 16:05:33 +02:00
|
|
|
typedef struct st_mysql MYSQL;
|
2007-04-12 08:58:04 +02:00
|
|
|
|
2006-10-31 16:51:51 +01:00
|
|
|
/*****************************************************************************
|
|
|
|
Replication IO Thread
|
|
|
|
|
2007-08-16 08:52:50 +02:00
|
|
|
Master_info contains:
|
2006-10-31 16:51:51 +01:00
|
|
|
- information about how to connect to a master
|
|
|
|
- current master log name
|
|
|
|
- current master log offset
|
|
|
|
- misc control variables
|
|
|
|
|
2007-08-16 08:52:50 +02:00
|
|
|
Master_info is initialized once from the master.info file if such
|
2006-10-31 16:51:51 +01:00
|
|
|
exists. Otherwise, data members corresponding to master.info fields
|
|
|
|
are initialized with defaults specified by master-* options. The
|
|
|
|
initialization is done through init_master_info() call.
|
|
|
|
|
|
|
|
The format of master.info file:
|
|
|
|
|
|
|
|
log_name
|
|
|
|
log_pos
|
|
|
|
master_host
|
|
|
|
master_user
|
|
|
|
master_pass
|
|
|
|
master_port
|
|
|
|
master_connect_retry
|
|
|
|
|
|
|
|
To write out the contents of master.info file to disk ( needed every
|
|
|
|
time we read and queue data from the master ), a call to
|
|
|
|
flush_master_info() is required.
|
|
|
|
|
|
|
|
To clean up, call end_master_info()
|
|
|
|
|
|
|
|
*****************************************************************************/
|
|
|
|
|
2007-08-16 08:52:50 +02:00
|
|
|
class Master_info : public Slave_reporting_capability
|
2006-10-31 16:51:51 +01:00
|
|
|
{
|
|
|
|
public:
|
MDEV-26: Global transaction ID.
Fix problems related to reconnect. When we need to reconnect (ie. explict
stop/start of just the IO thread by user, or automatic reconnect due to
loosing network connection with the master), it is a bit complex to correctly
resume at the right point without causing duplicate or missing events in the
relay log. The previous code had multiple problems in this regard.
With this patch, the problem is solved as follows. The IO thread keeps track
(in memory) of which GTID was last queued to the relay log. If it needs to
reconnect, it resumes at that GTID position. It also counts number of events
received within the last, possibly partial, event group, and skips the same
number of events after a reconnect, so that events already enqueued before the
reconnect are not duplicated.
(There is no need to keep any persistent state; whenever we restart slave
threads after both of them being stopped (such as after server restart), we
erase the relay logs and start over from the last GTID applied by SQL thread.
But while the SQL thread is running, this patch is needed to get correct relay
log).
2013-06-05 14:32:47 +02:00
|
|
|
enum enum_using_gtid {
|
|
|
|
USE_GTID_NO= 0, USE_GTID_CURRENT_POS= 1, USE_GTID_SLAVE_POS= 2
|
|
|
|
};
|
|
|
|
|
2012-09-28 02:06:56 +03:00
|
|
|
Master_info(LEX_STRING *connection_name, bool is_slave_recovery);
|
2007-08-16 08:52:50 +02:00
|
|
|
~Master_info();
|
2009-10-01 19:44:53 +03:00
|
|
|
bool shall_ignore_server_id(ulong s_id);
|
BUG#11809016 - NO WAY TO DISCOVER AN INSTANCE IS NO LONGER A SLAVE FOLLOWING MYSQL BUG#28796
Before BUG#28796, an empty host was used to identify that an instance was no
longer a slave. However, BUG#28796 changed this behavior and one cannot set
an empty host. Besides, a RESET SLAVE only cleans up information on the next
event to retrieve from the master, disables ssl and resets heartbeat period.
So a call to SHOW SLAVE STATUS after issuing a RESET SLAVE still returns some
valid information, such as host, port, user and password.
To fix this problem, we have introduced the command RESET SLAVE ALL that does
what a regular RESET SLAVE does and also clears host, port, user and password
information thus allowing users to identify when an instance is no longer a
slave.
2011-07-18 18:18:03 +01:00
|
|
|
void clear_in_memory_info(bool all);
|
2012-09-28 02:06:56 +03:00
|
|
|
bool error()
|
|
|
|
{
|
|
|
|
/* If malloc() in initialization failed */
|
|
|
|
return connection_name.str == 0;
|
|
|
|
}
|
MDEV-26: Global transaction ID.
Fix problems related to reconnect. When we need to reconnect (ie. explict
stop/start of just the IO thread by user, or automatic reconnect due to
loosing network connection with the master), it is a bit complex to correctly
resume at the right point without causing duplicate or missing events in the
relay log. The previous code had multiple problems in this regard.
With this patch, the problem is solved as follows. The IO thread keeps track
(in memory) of which GTID was last queued to the relay log. If it needs to
reconnect, it resumes at that GTID position. It also counts number of events
received within the last, possibly partial, event group, and skips the same
number of events after a reconnect, so that events already enqueued before the
reconnect are not duplicated.
(There is no need to keep any persistent state; whenever we restart slave
threads after both of them being stopped (such as after server restart), we
erase the relay logs and start over from the last GTID applied by SQL thread.
But while the SQL thread is running, this patch is needed to get correct relay
log).
2013-06-05 14:32:47 +02:00
|
|
|
static const char *using_gtid_astext(enum enum_using_gtid arg);
|
2006-10-31 16:51:51 +01:00
|
|
|
|
|
|
|
/* the variables below are needed because we can change masters on the fly */
|
2012-10-03 01:44:54 +03:00
|
|
|
char master_log_name[FN_REFLEN+6]; /* Room for multi-*/
|
2013-01-11 00:35:33 +02:00
|
|
|
char host[HOSTNAME_LENGTH*SYSTEM_CHARSET_MBMAXLEN+1];
|
2013-04-18 22:17:29 +02:00
|
|
|
char user[USERNAME_LENGTH+1];
|
2013-01-11 00:35:33 +02:00
|
|
|
char password[MAX_PASSWORD_LENGTH*SYSTEM_CHARSET_MBMAXLEN+1];
|
2012-09-28 02:06:56 +03:00
|
|
|
LEX_STRING connection_name; /* User supplied connection name */
|
|
|
|
LEX_STRING cmp_connection_name; /* Connection name in lower case */
|
2010-09-24 01:00:32 +03:00
|
|
|
bool ssl; // enables use of SSL connection if true
|
2006-10-31 16:51:51 +01:00
|
|
|
char ssl_ca[FN_REFLEN], ssl_capath[FN_REFLEN], ssl_cert[FN_REFLEN];
|
|
|
|
char ssl_cipher[FN_REFLEN], ssl_key[FN_REFLEN];
|
2012-08-14 17:23:34 +03:00
|
|
|
char ssl_crl[FN_REFLEN], ssl_crlpath[FN_REFLEN];
|
2010-09-24 01:00:32 +03:00
|
|
|
bool ssl_verify_server_cert;
|
2006-10-31 16:51:51 +01:00
|
|
|
|
|
|
|
my_off_t master_log_pos;
|
|
|
|
File fd; // we keep the file open, so we need to remember the file pointer
|
|
|
|
IO_CACHE file;
|
|
|
|
|
2012-01-23 17:39:37 +05:30
|
|
|
mysql_mutex_t data_lock, run_lock, sleep_lock;
|
|
|
|
mysql_cond_t data_cond, start_cond, stop_cond, sleep_cond;
|
2006-10-31 16:51:51 +01:00
|
|
|
THD *io_thd;
|
|
|
|
MYSQL* mysql;
|
|
|
|
uint32 file_id; /* for 3.23 load data infile */
|
2007-08-16 07:37:50 +02:00
|
|
|
Relay_log_info rli;
|
2006-10-31 16:51:51 +01:00
|
|
|
uint port;
|
2013-04-16 19:43:28 +08:00
|
|
|
Rpl_filter* rpl_filter; /* Each replication can set its filter rule*/
|
2011-05-03 14:01:11 +02:00
|
|
|
/*
|
|
|
|
to hold checksum alg in use until IO thread has received FD.
|
|
|
|
Initialized to novalue, then set to the queried from master
|
|
|
|
@@global.binlog_checksum and deactivated once FD has been received.
|
|
|
|
*/
|
|
|
|
uint8 checksum_alg_before_fd;
|
2006-10-31 16:51:51 +01:00
|
|
|
uint connect_retry;
|
|
|
|
#ifndef DBUG_OFF
|
|
|
|
int events_till_disconnect;
|
|
|
|
#endif
|
|
|
|
bool inited;
|
|
|
|
volatile bool abort_slave;
|
|
|
|
volatile uint slave_running;
|
|
|
|
volatile ulong slave_run_id;
|
|
|
|
/*
|
|
|
|
The difference in seconds between the clock of the master and the clock of
|
|
|
|
the slave (second - first). It must be signed as it may be <0 or >0.
|
|
|
|
clock_diff_with_master is computed when the I/O thread starts; for this the
|
|
|
|
I/O thread does a SELECT UNIX_TIMESTAMP() on the master.
|
|
|
|
"how late the slave is compared to the master" is computed like this:
|
|
|
|
clock_of_slave - last_timestamp_executed_by_SQL_thread - clock_diff_with_master
|
|
|
|
|
|
|
|
*/
|
|
|
|
long clock_diff_with_master;
|
BUG#40337 Fsyncing master and relay log to disk after every event is too slow
NOTE: Backporting the patch to next-mr.
The fix proposed in BUG#35542 and BUG#31665 introduces a performance issue
when fsyncing the master.info, relay.info and relay-log.bin* after #th events.
Although such solution has been proposed to reduce the probability of corrupted
files due to a slave-crash, the performance penalty introduced by it has
made the approach impractical for highly intensive workloads.
In a nutshell, the option --syn-relay-log proposed in BUG#35542 and BUG#31665
simultaneously fsyncs master.info, relay-log.info and relay-log.bin* and
this is the main source of performance issues.
This patch introduces new options that give more control to the user on
what should be fsynced and how often:
1) (--sync-master-info, integer) which syncs the master.info after #th event;
2) (--sync-relay-log, integer) which syncs the relay-log.bin* after #th
events.
3) (--sync-relay-log-info, integer) which syncs the relay.info after #th
transactions.
To provide both performance and increased reliability, we recommend the following
setup:
1) --sync-master-info = 0 eventually the operating system will fsync it;
2) --sync-relay-log = 0 eventually the operating system will fsync it;
3) --sync-relay-log-info = 1 fsyncs it after every transaction;
Notice, that the previous setup does not reduce the probability of
corrupted master.info and relay-log.bin*. To overcome the issue, this patch also
introduces a recovery mechanism that right after restart throws away relay-log.bin*
retrieved from a master and updates the master.info based on the relay.info:
4) (--relay-log-recovery, boolean) which enables a recovery mechanism that
throws away relay-log.bin* after a crash.
However, it can only recover the incorrect binlog file and position in master.info,
if other informations (host, port password, etc) are corrupted or incorrect,
then this recovery mechanism will fail to work.
2009-09-29 15:40:52 +01:00
|
|
|
/*
|
|
|
|
Keeps track of the number of events before fsyncing.
|
|
|
|
The option --sync-master-info determines how many
|
|
|
|
events should happen before fsyncing.
|
|
|
|
*/
|
|
|
|
uint sync_counter;
|
2009-10-01 19:44:53 +03:00
|
|
|
float heartbeat_period; // interface with CHANGE MASTER or master.info
|
|
|
|
ulonglong received_heartbeats; // counter of received heartbeat events
|
|
|
|
DYNAMIC_ARRAY ignore_server_ids;
|
|
|
|
ulong master_id;
|
2013-03-21 11:03:31 +01:00
|
|
|
/*
|
2013-05-22 17:36:48 +02:00
|
|
|
Which kind of GTID position (if any) is used when connecting to master.
|
|
|
|
|
|
|
|
Note that you can not change the numeric values of these, they are used
|
|
|
|
in master.info.
|
2013-03-21 11:03:31 +01:00
|
|
|
*/
|
MDEV-26: Global transaction ID.
Fix problems related to reconnect. When we need to reconnect (ie. explict
stop/start of just the IO thread by user, or automatic reconnect due to
loosing network connection with the master), it is a bit complex to correctly
resume at the right point without causing duplicate or missing events in the
relay log. The previous code had multiple problems in this regard.
With this patch, the problem is solved as follows. The IO thread keeps track
(in memory) of which GTID was last queued to the relay log. If it needs to
reconnect, it resumes at that GTID position. It also counts number of events
received within the last, possibly partial, event group, and skips the same
number of events after a reconnect, so that events already enqueued before the
reconnect are not duplicated.
(There is no need to keep any persistent state; whenever we restart slave
threads after both of them being stopped (such as after server restart), we
erase the relay logs and start over from the last GTID applied by SQL thread.
But while the SQL thread is running, this patch is needed to get correct relay
log).
2013-06-05 14:32:47 +02:00
|
|
|
enum enum_using_gtid using_gtid;
|
|
|
|
|
|
|
|
/*
|
|
|
|
This GTID position records how far we have fetched into the relay logs.
|
|
|
|
This is used to continue fetching when the IO thread reconnects to the
|
|
|
|
master.
|
|
|
|
|
|
|
|
(Full slave stop/start does not use it, as it resets the relay logs).
|
|
|
|
*/
|
|
|
|
slave_connection_state gtid_current_pos;
|
|
|
|
/*
|
|
|
|
If events_queued_since_last_gtid is non-zero, it is the number of events
|
|
|
|
queued so far in the relaylog of a GTID-prefixed event group.
|
|
|
|
It is zero when no partial event group has been queued at the moment.
|
|
|
|
*/
|
|
|
|
uint64 events_queued_since_last_gtid;
|
|
|
|
/*
|
|
|
|
The GTID of the partially-queued event group, when
|
|
|
|
events_queued_since_last_gtid is non-zero.
|
|
|
|
*/
|
|
|
|
rpl_gtid last_queued_gtid;
|
2013-10-11 11:21:18 +02:00
|
|
|
/* Whether last_queued_gtid had the FL_STANDALONE flag set. */
|
|
|
|
bool last_queued_gtid_standalone;
|
MDEV-26: Global transaction ID.
Fix problems related to reconnect. When we need to reconnect (ie. explict
stop/start of just the IO thread by user, or automatic reconnect due to
loosing network connection with the master), it is a bit complex to correctly
resume at the right point without causing duplicate or missing events in the
relay log. The previous code had multiple problems in this regard.
With this patch, the problem is solved as follows. The IO thread keeps track
(in memory) of which GTID was last queued to the relay log. If it needs to
reconnect, it resumes at that GTID position. It also counts number of events
received within the last, possibly partial, event group, and skips the same
number of events after a reconnect, so that events already enqueued before the
reconnect are not duplicated.
(There is no need to keep any persistent state; whenever we restart slave
threads after both of them being stopped (such as after server restart), we
erase the relay logs and start over from the last GTID applied by SQL thread.
But while the SQL thread is running, this patch is needed to get correct relay
log).
2013-06-05 14:32:47 +02:00
|
|
|
/*
|
|
|
|
When slave IO thread needs to reconnect, gtid_reconnect_event_skip_count
|
|
|
|
counts number of events to skip from the first GTID-prefixed event group,
|
|
|
|
to avoid duplicating events in the relay log.
|
|
|
|
*/
|
|
|
|
uint64 gtid_reconnect_event_skip_count;
|
|
|
|
/* gtid_event_seen is false until we receive first GTID event from master. */
|
|
|
|
bool gtid_event_seen;
|
2006-10-31 16:51:51 +01:00
|
|
|
};
|
2007-08-16 08:52:50 +02:00
|
|
|
int init_master_info(Master_info* mi, const char* master_info_fname,
|
2006-10-31 16:51:51 +01:00
|
|
|
const char* slave_info_fname,
|
|
|
|
bool abort_if_no_master_info_file,
|
|
|
|
int thread_mask);
|
2007-08-16 08:52:50 +02:00
|
|
|
void end_master_info(Master_info* mi);
|
2010-02-03 16:56:17 +00:00
|
|
|
int flush_master_info(Master_info* mi,
|
|
|
|
bool flush_relay_log_cache,
|
|
|
|
bool need_lock_relay_log);
|
2009-10-01 19:44:53 +03:00
|
|
|
int change_master_server_id_cmp(ulong *id1, ulong *id2);
|
2013-04-16 19:43:28 +08:00
|
|
|
void copy_filter_setting(Rpl_filter* dst_filter, Rpl_filter* src_filter);
|
2006-10-31 16:51:51 +01:00
|
|
|
|
2012-09-28 02:06:56 +03:00
|
|
|
/*
|
|
|
|
Multi master are handled trough this struct.
|
|
|
|
Changes to this needs to be protected by LOCK_active_mi;
|
|
|
|
*/
|
|
|
|
|
|
|
|
class Master_info_index
|
|
|
|
{
|
|
|
|
private:
|
|
|
|
IO_CACHE index_file;
|
|
|
|
char index_file_name[FN_REFLEN];
|
|
|
|
|
|
|
|
public:
|
|
|
|
Master_info_index();
|
|
|
|
~Master_info_index();
|
|
|
|
|
|
|
|
HASH master_info_hash;
|
|
|
|
|
|
|
|
bool init_all_master_info();
|
|
|
|
bool write_master_name_to_index_file(LEX_STRING *connection_name,
|
|
|
|
bool do_sync);
|
|
|
|
|
|
|
|
bool check_duplicate_master_info(LEX_STRING *connection_name,
|
|
|
|
const char *host, uint port);
|
|
|
|
bool add_master_info(Master_info *mi, bool write_to_file);
|
|
|
|
bool remove_master_info(LEX_STRING *connection_name);
|
|
|
|
Master_info *get_master_info(LEX_STRING *connection_name,
|
2013-06-15 18:32:08 +03:00
|
|
|
Sql_condition::enum_warning_level warning);
|
2012-10-03 01:44:54 +03:00
|
|
|
bool give_error_if_slave_running();
|
|
|
|
bool start_all_slaves(THD *thd);
|
|
|
|
bool stop_all_slaves(THD *thd);
|
2012-09-28 02:06:56 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
bool check_master_connection_name(LEX_STRING *name);
|
2012-10-23 11:19:42 +02:00
|
|
|
void create_logfile_name_with_suffix(char *res_file_name, size_t length,
|
2012-09-28 02:06:56 +03:00
|
|
|
const char *info_file,
|
2012-10-01 02:30:44 +03:00
|
|
|
bool append,
|
2012-09-28 02:06:56 +03:00
|
|
|
LEX_STRING *suffix);
|
|
|
|
|
|
|
|
uchar *get_key_master_info(Master_info *mi, size_t *length,
|
|
|
|
my_bool not_used __attribute__((unused)));
|
|
|
|
void free_key_master_info(Master_info *mi);
|
|
|
|
|
|
|
|
|
2006-10-31 16:51:51 +01:00
|
|
|
#endif /* HAVE_REPLICATION */
|
|
|
|
#endif /* RPL_MI_H */
|