2013-03-11 16:02:40 +01:00
|
|
|
/* Copyright (c) 2013, Kristian Nielsen and MariaDB Services Ab.
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
|
|
|
|
|
|
|
|
#ifndef RPL_GTID_H
|
|
|
|
#define RPL_GTID_H
|
|
|
|
|
2014-02-07 19:15:28 +01:00
|
|
|
#include "hash.h"
|
|
|
|
#include "queues.h"
|
|
|
|
|
|
|
|
|
2013-03-11 16:02:40 +01:00
|
|
|
/* Definitions for MariaDB global transaction ID (GTID). */
|
|
|
|
|
|
|
|
|
2013-03-11 16:16:55 +01:00
|
|
|
extern const LEX_STRING rpl_gtid_slave_state_table_name;
|
|
|
|
|
2013-03-11 16:02:40 +01:00
|
|
|
class String;
|
|
|
|
|
|
|
|
struct rpl_gtid
|
|
|
|
{
|
|
|
|
uint32 domain_id;
|
|
|
|
uint32 server_id;
|
|
|
|
uint64 seq_no;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2013-03-21 11:03:31 +01:00
|
|
|
enum enum_gtid_skip_type {
|
|
|
|
GTID_SKIP_NOT, GTID_SKIP_STANDALONE, GTID_SKIP_TRANSACTION
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2014-02-08 22:28:41 +01:00
|
|
|
/*
|
|
|
|
Structure to keep track of threads waiting in MASTER_GTID_WAIT().
|
|
|
|
|
|
|
|
Since replication is (mostly) single-threaded, we want to minimise the
|
|
|
|
performance impact on that from MASTER_GTID_WAIT(). To achieve this, we
|
|
|
|
are careful to keep the common lock between replication threads and
|
|
|
|
MASTER_GTID_WAIT threads held for as short as possible. We keep only
|
|
|
|
a single thread waiting to be notified by the replication threads; this
|
|
|
|
thread then handles all the (potentially heavy) lifting of dealing with
|
|
|
|
all current waiting threads.
|
|
|
|
*/
|
|
|
|
struct gtid_waiting {
|
|
|
|
/* Elements in the hash, basically a priority queue for each domain. */
|
|
|
|
struct hash_element {
|
|
|
|
QUEUE queue;
|
|
|
|
uint32 domain_id;
|
|
|
|
};
|
|
|
|
/* A priority queue to handle waiters in one domain in seq_no order. */
|
|
|
|
struct queue_element {
|
|
|
|
uint64 wait_seq_no;
|
|
|
|
THD *thd;
|
|
|
|
int queue_idx;
|
|
|
|
/*
|
|
|
|
do_small_wait is true if we have responsibility for ensuring that there
|
|
|
|
is a small waiter.
|
|
|
|
*/
|
|
|
|
bool do_small_wait;
|
|
|
|
/*
|
|
|
|
The flag `done' is set when the wait is completed (either due to reaching
|
|
|
|
the position waited for, or due to timeout or kill). The queue_element
|
|
|
|
is in the queue if and only if `done' is true.
|
|
|
|
*/
|
|
|
|
bool done;
|
|
|
|
};
|
|
|
|
|
|
|
|
mysql_mutex_t LOCK_gtid_waiting;
|
|
|
|
HASH hash;
|
|
|
|
|
|
|
|
void init();
|
|
|
|
void destroy();
|
|
|
|
hash_element *get_entry(uint32 domain_id);
|
|
|
|
int wait_for_pos(THD *thd, String *gtid_str, longlong timeout_us);
|
|
|
|
void promote_new_waiter(gtid_waiting::hash_element *he);
|
|
|
|
int wait_for_gtid(THD *thd, rpl_gtid *wait_gtid, struct timespec *wait_until);
|
|
|
|
void process_wait_hash(uint64 wakeup_seq_no, gtid_waiting::hash_element *he);
|
|
|
|
int register_in_wait_queue(THD *thd, rpl_gtid *wait_gtid, hash_element *he,
|
|
|
|
queue_element *elem);
|
|
|
|
void remove_from_wait_queue(hash_element *he, queue_element *elem);
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2014-03-09 10:27:38 +01:00
|
|
|
class Relay_log_info;
|
2014-03-12 00:14:49 +01:00
|
|
|
struct rpl_group_info;
|
2014-03-09 10:27:38 +01:00
|
|
|
|
2013-03-11 16:02:40 +01:00
|
|
|
/*
|
|
|
|
Replication slave state.
|
|
|
|
|
|
|
|
For every independent replication stream (identified by domain_id), this
|
|
|
|
remembers the last gtid applied on the slave within this domain.
|
|
|
|
|
|
|
|
Since events are always committed in-order within a single domain, this is
|
|
|
|
sufficient to maintain the state of the replication slave.
|
|
|
|
*/
|
|
|
|
struct rpl_slave_state
|
|
|
|
{
|
|
|
|
/* Elements in the list of GTIDs kept for each domain_id. */
|
|
|
|
struct list_element
|
|
|
|
{
|
|
|
|
struct list_element *next;
|
|
|
|
uint64 sub_id;
|
|
|
|
uint64 seq_no;
|
|
|
|
uint32 server_id;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Elements in the HASH that hold the state for one domain_id. */
|
|
|
|
struct element
|
|
|
|
{
|
|
|
|
struct list_element *list;
|
|
|
|
uint32 domain_id;
|
2014-02-07 19:15:28 +01:00
|
|
|
/* Highest seq_no seen so far in this domain. */
|
|
|
|
uint64 highest_seq_no;
|
|
|
|
/*
|
2014-02-08 22:28:41 +01:00
|
|
|
If this is non-NULL, then it is the waiter responsible for the small
|
|
|
|
wait in MASTER_GTID_WAIT().
|
|
|
|
*/
|
|
|
|
gtid_waiting::queue_element *gtid_waiter;
|
|
|
|
/*
|
|
|
|
If gtid_waiter is non-NULL, then this is the seq_no that its
|
|
|
|
MASTER_GTID_WAIT() is waiting on. When we reach this seq_no, we need to
|
|
|
|
signal the waiter on COND_wait_gtid.
|
2014-02-07 19:15:28 +01:00
|
|
|
*/
|
|
|
|
uint64 min_wait_seq_no;
|
|
|
|
mysql_cond_t COND_wait_gtid;
|
2013-03-11 16:02:40 +01:00
|
|
|
|
2014-03-09 10:27:38 +01:00
|
|
|
/*
|
|
|
|
For --gtid-ignore-duplicates. The Relay_log_info that currently owns
|
|
|
|
this domain, and the number of worker threads that are active in it.
|
|
|
|
|
|
|
|
The idea is that only one of multiple master connections is allowed to
|
|
|
|
actively apply events for a given domain. Other connections must either
|
|
|
|
discard the events (if the seq_no in GTID shows they have already been
|
|
|
|
applied), or wait to see if the current owner will apply it.
|
|
|
|
*/
|
|
|
|
const Relay_log_info *owner_rli;
|
|
|
|
uint32 owner_count;
|
|
|
|
mysql_cond_t COND_gtid_ignore_duplicates;
|
|
|
|
|
2013-03-11 16:02:40 +01:00
|
|
|
list_element *grab_list() { list_element *l= list; list= NULL; return l; }
|
|
|
|
void add(list_element *l)
|
|
|
|
{
|
|
|
|
l->next= list;
|
|
|
|
list= l;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Mapping from domain_id to its element. */
|
|
|
|
HASH hash;
|
|
|
|
/* Mutex protecting access to the state. */
|
|
|
|
mysql_mutex_t LOCK_slave_state;
|
|
|
|
|
2013-10-25 21:17:14 +02:00
|
|
|
uint64 last_sub_id;
|
2013-03-11 16:02:40 +01:00
|
|
|
bool loaded;
|
|
|
|
|
|
|
|
rpl_slave_state();
|
|
|
|
~rpl_slave_state();
|
|
|
|
|
|
|
|
void truncate_hash();
|
|
|
|
ulong count() const { return hash.records; }
|
2014-03-09 10:27:38 +01:00
|
|
|
int update(uint32 domain_id, uint32 server_id, uint64 sub_id,
|
2014-03-12 00:14:49 +01:00
|
|
|
uint64 seq_no, rpl_group_info *rgi);
|
2013-03-11 16:02:40 +01:00
|
|
|
int truncate_state_table(THD *thd);
|
|
|
|
int record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id,
|
2013-05-22 17:36:48 +02:00
|
|
|
bool in_transaction, bool in_statement);
|
2013-06-20 09:04:44 +02:00
|
|
|
uint64 next_sub_id(uint32 domain_id);
|
MDEV-26: Global transaction ID.
Fix problems related to reconnect. When we need to reconnect (ie. explict
stop/start of just the IO thread by user, or automatic reconnect due to
loosing network connection with the master), it is a bit complex to correctly
resume at the right point without causing duplicate or missing events in the
relay log. The previous code had multiple problems in this regard.
With this patch, the problem is solved as follows. The IO thread keeps track
(in memory) of which GTID was last queued to the relay log. If it needs to
reconnect, it resumes at that GTID position. It also counts number of events
received within the last, possibly partial, event group, and skips the same
number of events after a reconnect, so that events already enqueued before the
reconnect are not duplicated.
(There is no need to keep any persistent state; whenever we restart slave
threads after both of them being stopped (such as after server restart), we
erase the relay logs and start over from the last GTID applied by SQL thread.
But while the SQL thread is running, this patch is needed to get correct relay
log).
2013-06-05 14:32:47 +02:00
|
|
|
int iterate(int (*cb)(rpl_gtid *, void *), void *data,
|
|
|
|
rpl_gtid *extra_gtids, uint32 num_extra);
|
2013-03-11 16:02:40 +01:00
|
|
|
int tostring(String *dest, rpl_gtid *extra_gtids, uint32 num_extra);
|
2013-03-18 15:09:36 +01:00
|
|
|
bool domain_to_gtid(uint32 domain_id, rpl_gtid *out_gtid);
|
2013-05-22 17:36:48 +02:00
|
|
|
int load(THD *thd, char *state_from_master, size_t len, bool reset,
|
|
|
|
bool in_statement);
|
2013-03-11 16:02:40 +01:00
|
|
|
bool is_empty();
|
|
|
|
|
|
|
|
element *get_element(uint32 domain_id);
|
2013-05-29 14:23:40 +02:00
|
|
|
int put_back_list(uint32 domain_id, list_element *list);
|
2013-03-11 16:02:40 +01:00
|
|
|
|
2014-03-12 00:14:49 +01:00
|
|
|
void update_state_hash(uint64 sub_id, rpl_gtid *gtid, rpl_group_info *rgi);
|
2013-07-03 19:03:21 +02:00
|
|
|
int record_and_update_gtid(THD *thd, struct rpl_group_info *rgi);
|
2014-03-12 00:14:49 +01:00
|
|
|
int check_duplicate_gtid(rpl_gtid *gtid, rpl_group_info *rgi);
|
|
|
|
void release_domain_owner(rpl_group_info *rgi);
|
2013-03-11 16:02:40 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Binlog state.
|
|
|
|
This keeps the last GTID written to the binlog for every distinct
|
|
|
|
(domain_id, server_id) pair.
|
|
|
|
This will be logged at the start of the next binlog file as a
|
|
|
|
Gtid_list_log_event; this way, it is easy to find the binlog file
|
|
|
|
containing a gigen GTID, by simply scanning backwards from the newest
|
|
|
|
one until a lower seq_no is found in the Gtid_list_log_event at the
|
|
|
|
start of a binlog for the given domain_id and server_id.
|
|
|
|
|
|
|
|
We also remember the last logged GTID for every domain_id. This is used
|
|
|
|
to know where to start when a master is changed to a slave. As a side
|
|
|
|
effect, it also allows to skip a hash lookup in the very common case of
|
|
|
|
logging a new GTID with same server id as last GTID.
|
|
|
|
*/
|
|
|
|
struct rpl_binlog_state
|
|
|
|
{
|
|
|
|
struct element {
|
|
|
|
uint32 domain_id;
|
|
|
|
HASH hash; /* Containing all server_id for one domain_id */
|
|
|
|
/* The most recent entry in the hash. */
|
|
|
|
rpl_gtid *last_gtid;
|
2013-05-28 13:28:31 +02:00
|
|
|
/* Counter to allocate next seq_no for this domain. */
|
|
|
|
uint64 seq_no_counter;
|
|
|
|
|
|
|
|
int update_element(const rpl_gtid *gtid);
|
2013-03-11 16:02:40 +01:00
|
|
|
};
|
|
|
|
/* Mapping from domain_id to collection of elements. */
|
|
|
|
HASH hash;
|
|
|
|
/* Mutex protecting access to the state. */
|
|
|
|
mysql_mutex_t LOCK_binlog_state;
|
2013-05-05 21:39:31 +03:00
|
|
|
my_bool initialized;
|
2013-03-11 16:02:40 +01:00
|
|
|
|
|
|
|
rpl_binlog_state();
|
|
|
|
~rpl_binlog_state();
|
|
|
|
|
2013-11-18 15:22:50 +01:00
|
|
|
void reset_nolock();
|
2013-03-11 16:02:40 +01:00
|
|
|
void reset();
|
2013-05-05 21:39:31 +03:00
|
|
|
void free();
|
2013-05-15 19:52:21 +02:00
|
|
|
bool load(struct rpl_gtid *list, uint32 count);
|
MDEV-6589: Incorrect relay log start position when restarting SQL thread after error in parallel replication
The problem occurs in parallel replication in GTID mode, when we are using
multiple replication domains. In this case, if the SQL thread stops, the
slave GTID position may refer to a different point in the relay log for each
domain.
The bug was that when the SQL thread was stopped and restarted (but the IO
thread was kept running), the SQL thread would resume applying the relay log
from the point of the most advanced replication domain, silently skipping all
earlier events within other domains. This caused replication corruption.
This patch solves the problem by storing, when the SQL thread stops with
multiple parallel replication domains active, the current GTID
position. Additionally, the current position in the relay logs is moved back
to a point known to be earlier than the current position of any replication
domain. Then when the SQL thread restarts from the earlier position, GTIDs
encountered are compared against the stored GTID position. Any GTID that was
already applied before the stop is skipped to avoid duplicate apply.
This patch should have no effect if multi-domain GTID parallel replication is
not used. Similarly, if both SQL and IO thread are stopped and restarted, the
patch has no effect, as in this case the existing relay logs are removed and
re-fetched from the master at the current global @@gtid_slave_pos.
2015-02-18 12:22:50 +01:00
|
|
|
bool load(rpl_slave_state *slave_pos);
|
2013-11-18 15:22:50 +01:00
|
|
|
int update_nolock(const struct rpl_gtid *gtid, bool strict);
|
2013-05-28 13:28:31 +02:00
|
|
|
int update(const struct rpl_gtid *gtid, bool strict);
|
|
|
|
int update_with_next_gtid(uint32 domain_id, uint32 server_id,
|
|
|
|
rpl_gtid *gtid);
|
2013-11-18 15:22:50 +01:00
|
|
|
int alloc_element_nolock(const rpl_gtid *gtid);
|
2013-05-28 13:28:31 +02:00
|
|
|
bool check_strict_sequence(uint32 domain_id, uint32 server_id, uint64 seq_no);
|
|
|
|
int bump_seq_no_if_needed(uint32 domain_id, uint64 seq_no);
|
2013-03-11 16:02:40 +01:00
|
|
|
int write_to_iocache(IO_CACHE *dest);
|
|
|
|
int read_from_iocache(IO_CACHE *src);
|
|
|
|
uint32 count();
|
|
|
|
int get_gtid_list(rpl_gtid *gtid_list, uint32 list_size);
|
|
|
|
int get_most_recent_gtid_list(rpl_gtid **list, uint32 *size);
|
2013-05-22 17:36:48 +02:00
|
|
|
bool append_pos(String *str);
|
2013-08-23 14:02:13 +02:00
|
|
|
bool append_state(String *str);
|
2013-11-18 15:22:50 +01:00
|
|
|
rpl_gtid *find_nolock(uint32 domain_id, uint32 server_id);
|
2013-03-18 15:09:36 +01:00
|
|
|
rpl_gtid *find(uint32 domain_id, uint32 server_id);
|
2013-05-28 13:28:31 +02:00
|
|
|
rpl_gtid *find_most_recent(uint32 domain_id);
|
2013-03-11 16:02:40 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Represent the GTID state that a slave connection to a master requests
|
|
|
|
the master to start sending binlog events from.
|
|
|
|
*/
|
|
|
|
struct slave_connection_state
|
|
|
|
{
|
2013-08-16 15:10:25 +02:00
|
|
|
struct entry {
|
|
|
|
rpl_gtid gtid;
|
|
|
|
uint32 flags;
|
|
|
|
};
|
|
|
|
static const uint32 START_OWN_SLAVE_POS= 0x1;
|
|
|
|
static const uint32 START_ON_EMPTY_DOMAIN= 0x2;
|
|
|
|
|
|
|
|
/* Mapping from domain_id to the entry with GTID requested for that domain. */
|
2013-03-11 16:02:40 +01:00
|
|
|
HASH hash;
|
|
|
|
|
|
|
|
slave_connection_state();
|
|
|
|
~slave_connection_state();
|
|
|
|
|
MDEV-26: Global transaction ID.
Fix problems related to reconnect. When we need to reconnect (ie. explict
stop/start of just the IO thread by user, or automatic reconnect due to
loosing network connection with the master), it is a bit complex to correctly
resume at the right point without causing duplicate or missing events in the
relay log. The previous code had multiple problems in this regard.
With this patch, the problem is solved as follows. The IO thread keeps track
(in memory) of which GTID was last queued to the relay log. If it needs to
reconnect, it resumes at that GTID position. It also counts number of events
received within the last, possibly partial, event group, and skips the same
number of events after a reconnect, so that events already enqueued before the
reconnect are not duplicated.
(There is no need to keep any persistent state; whenever we restart slave
threads after both of them being stopped (such as after server restart), we
erase the relay logs and start over from the last GTID applied by SQL thread.
But while the SQL thread is running, this patch is needed to get correct relay
log).
2013-06-05 14:32:47 +02:00
|
|
|
void reset() { my_hash_reset(&hash); }
|
2013-03-11 16:02:40 +01:00
|
|
|
int load(char *slave_request, size_t len);
|
|
|
|
int load(const rpl_gtid *gtid_list, uint32 count);
|
MDEV-26: Global transaction ID.
Fix problems related to reconnect. When we need to reconnect (ie. explict
stop/start of just the IO thread by user, or automatic reconnect due to
loosing network connection with the master), it is a bit complex to correctly
resume at the right point without causing duplicate or missing events in the
relay log. The previous code had multiple problems in this regard.
With this patch, the problem is solved as follows. The IO thread keeps track
(in memory) of which GTID was last queued to the relay log. If it needs to
reconnect, it resumes at that GTID position. It also counts number of events
received within the last, possibly partial, event group, and skips the same
number of events after a reconnect, so that events already enqueued before the
reconnect are not duplicated.
(There is no need to keep any persistent state; whenever we restart slave
threads after both of them being stopped (such as after server restart), we
erase the relay logs and start over from the last GTID applied by SQL thread.
But while the SQL thread is running, this patch is needed to get correct relay
log).
2013-06-05 14:32:47 +02:00
|
|
|
int load(rpl_slave_state *state, rpl_gtid *extra_gtids, uint32 num_extra);
|
2013-03-11 16:02:40 +01:00
|
|
|
rpl_gtid *find(uint32 domain_id);
|
2013-08-16 15:10:25 +02:00
|
|
|
entry *find_entry(uint32 domain_id);
|
2013-03-11 16:02:40 +01:00
|
|
|
int update(const rpl_gtid *in_gtid);
|
|
|
|
void remove(const rpl_gtid *gtid);
|
2013-08-22 12:36:42 +02:00
|
|
|
void remove_if_present(const rpl_gtid *in_gtid);
|
2013-03-11 16:02:40 +01:00
|
|
|
ulong count() const { return hash.records; }
|
|
|
|
int to_string(String *out_str);
|
2013-05-15 19:52:21 +02:00
|
|
|
int append_to_string(String *out_str);
|
2013-08-22 12:36:42 +02:00
|
|
|
int get_gtid_list(rpl_gtid *gtid_list, uint32 list_size);
|
2015-03-04 13:10:37 +01:00
|
|
|
bool is_pos_reached();
|
2013-03-11 16:02:40 +01:00
|
|
|
};
|
|
|
|
|
2014-02-07 19:15:28 +01:00
|
|
|
|
2013-03-11 16:02:40 +01:00
|
|
|
extern bool rpl_slave_state_tostring_helper(String *dest, const rpl_gtid *gtid,
|
|
|
|
bool *first);
|
2013-03-11 16:16:55 +01:00
|
|
|
extern int gtid_check_rpl_slave_state_table(TABLE *table);
|
2013-08-23 14:02:13 +02:00
|
|
|
extern rpl_gtid *gtid_parse_string_to_list(const char *p, size_t len,
|
|
|
|
uint32 *out_len);
|
2013-03-11 16:02:40 +01:00
|
|
|
|
|
|
|
#endif /* RPL_GTID_H */
|