2012-03-28 01:04:46 +02:00
|
|
|
/* Copyright (c) 2006, 2012, Oracle and/or its affiliates.
|
2011-11-03 19:17:05 +01:00
|
|
|
Copyright (c) 2010, 2011, Monty Program Ab
|
2006-10-31 16:51:51 +01:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
2006-12-27 02:23:51 +01:00
|
|
|
the Free Software Foundation; version 2 of the License.
|
2006-10-31 16:51:51 +01:00
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
2011-06-30 17:46:53 +02:00
|
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
|
2006-10-31 16:51:51 +01:00
|
|
|
|
|
|
|
#include <my_global.h> // For HAVE_REPLICATION
|
2010-03-31 16:05:33 +02:00
|
|
|
#include "sql_priv.h"
|
2006-10-31 16:51:51 +01:00
|
|
|
#include <my_dir.h>
|
2010-03-31 16:05:33 +02:00
|
|
|
#include "unireg.h" // REQUIRED by other includes
|
2006-10-31 16:51:51 +01:00
|
|
|
#include "rpl_mi.h"
|
2010-03-31 16:05:33 +02:00
|
|
|
#include "slave.h" // SLAVE_MAX_HEARTBEAT_PERIOD
|
2006-10-31 16:51:51 +01:00
|
|
|
|
|
|
|
#ifdef HAVE_REPLICATION
|
|
|
|
|
2009-11-04 12:28:20 +00:00
|
|
|
#define DEFAULT_CONNECT_RETRY 60
|
2006-10-31 16:51:51 +01:00
|
|
|
|
BUG#11809016 - NO WAY TO DISCOVER AN INSTANCE IS NO LONGER A SLAVE FOLLOWING MYSQL BUG#28796
Before BUG#28796, an empty host was used to identify that an instance was no
longer a slave. However, BUG#28796 changed this behavior and one cannot set
an empty host. Besides, a RESET SLAVE only cleans up information on the next
event to retrieve from the master, disables ssl and resets heartbeat period.
So a call to SHOW SLAVE STATUS after issuing a RESET SLAVE still returns some
valid information, such as host, port, user and password.
To fix this problem, we have introduced the command RESET SLAVE ALL that does
what a regular RESET SLAVE does and also clears host, port, user and password
information thus allowing users to identify when an instance is no longer a
slave.
2011-07-18 18:18:03 +01:00
|
|
|
static void init_master_log_pos(Master_info* mi);
|
|
|
|
|
BUG#40337 Fsyncing master and relay log to disk after every event is too slow
NOTE: Backporting the patch to next-mr.
The fix proposed in BUG#35542 and BUG#31665 introduces a performance issue
when fsyncing the master.info, relay.info and relay-log.bin* after #th events.
Although such solution has been proposed to reduce the probability of corrupted
files due to a slave-crash, the performance penalty introduced by it has
made the approach impractical for highly intensive workloads.
In a nutshell, the option --syn-relay-log proposed in BUG#35542 and BUG#31665
simultaneously fsyncs master.info, relay-log.info and relay-log.bin* and
this is the main source of performance issues.
This patch introduces new options that give more control to the user on
what should be fsynced and how often:
1) (--sync-master-info, integer) which syncs the master.info after #th event;
2) (--sync-relay-log, integer) which syncs the relay-log.bin* after #th
events.
3) (--sync-relay-log-info, integer) which syncs the relay.info after #th
transactions.
To provide both performance and increased reliability, we recommend the following
setup:
1) --sync-master-info = 0 eventually the operating system will fsync it;
2) --sync-relay-log = 0 eventually the operating system will fsync it;
3) --sync-relay-log-info = 1 fsyncs it after every transaction;
Notice, that the previous setup does not reduce the probability of
corrupted master.info and relay-log.bin*. To overcome the issue, this patch also
introduces a recovery mechanism that right after restart throws away relay-log.bin*
retrieved from a master and updates the master.info based on the relay.info:
4) (--relay-log-recovery, boolean) which enables a recovery mechanism that
throws away relay-log.bin* after a crash.
However, it can only recover the incorrect binlog file and position in master.info,
if other informations (host, port password, etc) are corrupted or incorrect,
then this recovery mechanism will fail to work.
2009-09-29 15:40:52 +01:00
|
|
|
Master_info::Master_info(bool is_slave_recovery)
|
2007-06-09 07:19:37 +02:00
|
|
|
:Slave_reporting_capability("I/O"),
|
2011-10-19 21:45:18 +02:00
|
|
|
ssl(0), ssl_verify_server_cert(1), fd(-1), io_thd(0),
|
2010-01-25 22:34:34 +01:00
|
|
|
rli(is_slave_recovery), port(MYSQL_PORT),
|
2011-10-19 21:45:18 +02:00
|
|
|
checksum_alg_before_fd(BINLOG_CHECKSUM_ALG_UNDEF),
|
2010-01-25 22:34:34 +01:00
|
|
|
connect_retry(DEFAULT_CONNECT_RETRY), inited(0), abort_slave(0),
|
2009-11-04 12:28:20 +00:00
|
|
|
slave_running(0), slave_run_id(0), sync_counter(0),
|
2009-10-01 19:44:53 +03:00
|
|
|
heartbeat_period(0), received_heartbeats(0), master_id(0)
|
2006-10-31 16:51:51 +01:00
|
|
|
{
|
|
|
|
host[0] = 0; user[0] = 0; password[0] = 0;
|
|
|
|
ssl_ca[0]= 0; ssl_capath[0]= 0; ssl_cert[0]= 0;
|
|
|
|
ssl_cipher[0]= 0; ssl_key[0]= 0;
|
2007-03-29 15:09:57 +02:00
|
|
|
|
2009-10-01 19:44:53 +03:00
|
|
|
my_init_dynamic_array(&ignore_server_ids, sizeof(::server_id), 16, 16);
|
2006-10-31 16:51:51 +01:00
|
|
|
bzero((char*) &file, sizeof(file));
|
2010-01-06 22:42:07 -07:00
|
|
|
mysql_mutex_init(key_master_info_run_lock, &run_lock, MY_MUTEX_INIT_FAST);
|
|
|
|
mysql_mutex_init(key_master_info_data_lock, &data_lock, MY_MUTEX_INIT_FAST);
|
2011-10-19 21:53:14 +02:00
|
|
|
mysql_mutex_setflags(&run_lock, MYF_NO_DEADLOCK_DETECTION);
|
|
|
|
mysql_mutex_setflags(&data_lock, MYF_NO_DEADLOCK_DETECTION);
|
2012-01-23 17:39:37 +05:30
|
|
|
mysql_mutex_init(key_master_info_sleep_lock, &sleep_lock, MY_MUTEX_INIT_FAST);
|
2010-01-06 22:42:07 -07:00
|
|
|
mysql_cond_init(key_master_info_data_cond, &data_cond, NULL);
|
|
|
|
mysql_cond_init(key_master_info_start_cond, &start_cond, NULL);
|
|
|
|
mysql_cond_init(key_master_info_stop_cond, &stop_cond, NULL);
|
2012-01-23 17:39:37 +05:30
|
|
|
mysql_cond_init(key_master_info_sleep_cond, &sleep_cond, NULL);
|
2006-10-31 16:51:51 +01:00
|
|
|
}
|
|
|
|
|
2007-08-16 08:52:50 +02:00
|
|
|
Master_info::~Master_info()
|
2006-10-31 16:51:51 +01:00
|
|
|
{
|
2009-10-01 19:44:53 +03:00
|
|
|
delete_dynamic(&ignore_server_ids);
|
2010-01-06 22:42:07 -07:00
|
|
|
mysql_mutex_destroy(&run_lock);
|
|
|
|
mysql_mutex_destroy(&data_lock);
|
2012-01-23 17:39:37 +05:30
|
|
|
mysql_mutex_destroy(&sleep_lock);
|
2010-01-06 22:42:07 -07:00
|
|
|
mysql_cond_destroy(&data_cond);
|
|
|
|
mysql_cond_destroy(&start_cond);
|
|
|
|
mysql_cond_destroy(&stop_cond);
|
2012-01-23 17:39:37 +05:30
|
|
|
mysql_cond_destroy(&sleep_cond);
|
2006-10-31 16:51:51 +01:00
|
|
|
}
|
|
|
|
|
2009-10-01 19:44:53 +03:00
|
|
|
/**
|
|
|
|
A comparison function to be supplied as argument to @c sort_dynamic()
|
|
|
|
and @c bsearch()
|
|
|
|
|
|
|
|
@return -1 if first argument is less, 0 if it equal to, 1 if it is greater
|
|
|
|
than the second
|
|
|
|
*/
|
|
|
|
int change_master_server_id_cmp(ulong *id1, ulong *id2)
|
|
|
|
{
|
|
|
|
return *id1 < *id2? -1 : (*id1 > *id2? 1 : 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
Reports if the s_id server has been configured to ignore events
|
|
|
|
it generates with
|
|
|
|
|
|
|
|
CHANGE MASTER IGNORE_SERVER_IDS= ( list of server ids )
|
|
|
|
|
|
|
|
Method is called from the io thread event receiver filtering.
|
|
|
|
|
|
|
|
@param s_id the master server identifier
|
|
|
|
|
|
|
|
@retval TRUE if s_id is in the list of ignored master servers,
|
|
|
|
@retval FALSE otherwise.
|
|
|
|
*/
|
|
|
|
bool Master_info::shall_ignore_server_id(ulong s_id)
|
|
|
|
{
|
|
|
|
if (likely(ignore_server_ids.elements == 1))
|
|
|
|
return (* (ulong*) dynamic_array_ptr(&ignore_server_ids, 0)) == s_id;
|
|
|
|
else
|
|
|
|
return bsearch((const ulong *) &s_id,
|
|
|
|
ignore_server_ids.buffer,
|
|
|
|
ignore_server_ids.elements, sizeof(ulong),
|
|
|
|
(int (*) (const void*, const void*)) change_master_server_id_cmp)
|
|
|
|
!= NULL;
|
|
|
|
}
|
2006-10-31 16:51:51 +01:00
|
|
|
|
BUG#11809016 - NO WAY TO DISCOVER AN INSTANCE IS NO LONGER A SLAVE FOLLOWING MYSQL BUG#28796
Before BUG#28796, an empty host was used to identify that an instance was no
longer a slave. However, BUG#28796 changed this behavior and one cannot set
an empty host. Besides, a RESET SLAVE only cleans up information on the next
event to retrieve from the master, disables ssl and resets heartbeat period.
So a call to SHOW SLAVE STATUS after issuing a RESET SLAVE still returns some
valid information, such as host, port, user and password.
To fix this problem, we have introduced the command RESET SLAVE ALL that does
what a regular RESET SLAVE does and also clears host, port, user and password
information thus allowing users to identify when an instance is no longer a
slave.
2011-07-18 18:18:03 +01:00
|
|
|
void Master_info::clear_in_memory_info(bool all)
|
|
|
|
{
|
|
|
|
init_master_log_pos(this);
|
|
|
|
if (all)
|
|
|
|
{
|
|
|
|
port= MYSQL_PORT;
|
|
|
|
host[0] = 0; user[0] = 0; password[0] = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-11-04 12:28:20 +00:00
|
|
|
void init_master_log_pos(Master_info* mi)
|
2006-10-31 16:51:51 +01:00
|
|
|
{
|
2009-11-04 12:28:20 +00:00
|
|
|
DBUG_ENTER("init_master_log_pos");
|
2006-10-31 16:51:51 +01:00
|
|
|
|
|
|
|
mi->master_log_name[0] = 0;
|
|
|
|
mi->master_log_pos = BIN_LOG_HEADER_SIZE; // skip magic number
|
|
|
|
|
2007-03-29 15:09:57 +02:00
|
|
|
/* Intentionally init ssl_verify_server_cert to 0, no option available */
|
|
|
|
mi->ssl_verify_server_cert= 0;
|
2009-09-29 14:16:23 +03:00
|
|
|
/*
|
|
|
|
always request heartbeat unless master_heartbeat_period is set
|
|
|
|
explicitly zero. Here is the default value for heartbeat period
|
|
|
|
if CHANGE MASTER did not specify it. (no data loss in conversion
|
|
|
|
as hb period has a max)
|
|
|
|
*/
|
|
|
|
mi->heartbeat_period= (float) min(SLAVE_MAX_HEARTBEAT_PERIOD,
|
|
|
|
(slave_net_timeout/2.0));
|
|
|
|
DBUG_ASSERT(mi->heartbeat_period > (float) 0.001
|
|
|
|
|| mi->heartbeat_period == 0);
|
|
|
|
|
2006-10-31 16:51:51 +01:00
|
|
|
DBUG_VOID_RETURN;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2007-03-29 15:09:57 +02:00
|
|
|
enum {
|
|
|
|
LINES_IN_MASTER_INFO_WITH_SSL= 14,
|
|
|
|
|
|
|
|
/* 5.1.16 added value of master_ssl_verify_server_cert */
|
|
|
|
LINE_FOR_MASTER_SSL_VERIFY_SERVER_CERT= 15,
|
2009-09-29 14:16:23 +03:00
|
|
|
/* 6.0 added value of master_heartbeat_period */
|
|
|
|
LINE_FOR_MASTER_HEARTBEAT_PERIOD= 16,
|
2010-06-02 10:11:49 +02:00
|
|
|
/* MySQL Cluster 6.3 added master_bind */
|
|
|
|
LINE_FOR_MASTER_BIND = 17,
|
2009-10-01 19:44:53 +03:00
|
|
|
/* 6.0 added value of master_ignore_server_id */
|
2010-06-02 10:11:49 +02:00
|
|
|
LINE_FOR_REPLICATE_IGNORE_SERVER_IDS= 18,
|
2007-03-29 15:09:57 +02:00
|
|
|
/* Number of lines currently used when saving master info file */
|
2009-10-01 19:44:53 +03:00
|
|
|
LINES_IN_MASTER_INFO= LINE_FOR_REPLICATE_IGNORE_SERVER_IDS
|
2007-03-29 15:09:57 +02:00
|
|
|
};
|
2006-10-31 16:51:51 +01:00
|
|
|
|
2007-08-16 08:52:50 +02:00
|
|
|
int init_master_info(Master_info* mi, const char* master_info_fname,
|
2006-10-31 16:51:51 +01:00
|
|
|
const char* slave_info_fname,
|
|
|
|
bool abort_if_no_master_info_file,
|
|
|
|
int thread_mask)
|
|
|
|
{
|
|
|
|
int fd,error;
|
|
|
|
char fname[FN_REFLEN+128];
|
|
|
|
DBUG_ENTER("init_master_info");
|
|
|
|
|
|
|
|
if (mi->inited)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
We have to reset read position of relay-log-bin as we may have
|
|
|
|
already been reading from 'hotlog' when the slave was stopped
|
|
|
|
last time. If this case pos_in_file would be set and we would
|
|
|
|
get a crash when trying to read the signature for the binary
|
|
|
|
relay log.
|
|
|
|
|
|
|
|
We only rewind the read position if we are starting the SQL
|
|
|
|
thread. The handle_slave_sql thread assumes that the read
|
|
|
|
position is at the beginning of the file, and will read the
|
|
|
|
"signature" and then fast-forward to the last position read.
|
|
|
|
*/
|
|
|
|
if (thread_mask & SLAVE_SQL)
|
|
|
|
{
|
2010-06-24 19:03:23 +01:00
|
|
|
bool hot_log= FALSE;
|
|
|
|
/*
|
|
|
|
my_b_seek does an implicit flush_io_cache, so we need to:
|
|
|
|
|
|
|
|
1. check if this log is active (hot)
|
|
|
|
2. if it is we keep log_lock until the seek ends, otherwise
|
|
|
|
release it right away.
|
|
|
|
|
|
|
|
If we did not take log_lock, SQL thread might race with IO
|
|
|
|
thread for the IO_CACHE mutex.
|
|
|
|
|
|
|
|
*/
|
|
|
|
mysql_mutex_t *log_lock= mi->rli.relay_log.get_log_lock();
|
|
|
|
mysql_mutex_lock(log_lock);
|
|
|
|
hot_log= mi->rli.relay_log.is_active(mi->rli.linfo.log_file_name);
|
|
|
|
|
|
|
|
if (!hot_log)
|
|
|
|
mysql_mutex_unlock(log_lock);
|
|
|
|
|
2006-10-31 16:51:51 +01:00
|
|
|
my_b_seek(mi->rli.cur_log, (my_off_t) 0);
|
2010-06-24 19:03:23 +01:00
|
|
|
|
|
|
|
if (hot_log)
|
|
|
|
mysql_mutex_unlock(log_lock);
|
2006-10-31 16:51:51 +01:00
|
|
|
}
|
|
|
|
DBUG_RETURN(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
mi->mysql=0;
|
|
|
|
mi->file_id=1;
|
|
|
|
fn_format(fname, master_info_fname, mysql_data_home, "", 4+32);
|
|
|
|
|
|
|
|
/*
|
|
|
|
We need a mutex while we are changing master info parameters to
|
|
|
|
keep other threads from reading bogus info
|
|
|
|
*/
|
|
|
|
|
2010-01-06 22:42:07 -07:00
|
|
|
mysql_mutex_lock(&mi->data_lock);
|
2006-10-31 16:51:51 +01:00
|
|
|
fd = mi->fd;
|
|
|
|
|
|
|
|
/* does master.info exist ? */
|
|
|
|
|
|
|
|
if (access(fname,F_OK))
|
|
|
|
{
|
|
|
|
if (abort_if_no_master_info_file)
|
|
|
|
{
|
2010-01-06 22:42:07 -07:00
|
|
|
mysql_mutex_unlock(&mi->data_lock);
|
2006-10-31 16:51:51 +01:00
|
|
|
DBUG_RETURN(0);
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
if someone removed the file from underneath our feet, just close
|
|
|
|
the old descriptor and re-create the old file
|
|
|
|
*/
|
|
|
|
if (fd >= 0)
|
2010-01-06 22:42:07 -07:00
|
|
|
mysql_file_close(fd, MYF(MY_WME));
|
|
|
|
if ((fd= mysql_file_open(key_file_master_info,
|
|
|
|
fname, O_CREAT|O_RDWR|O_BINARY, MYF(MY_WME))) < 0 )
|
2006-10-31 16:51:51 +01:00
|
|
|
{
|
|
|
|
sql_print_error("Failed to create a new master info file (\
|
|
|
|
file '%s', errno %d)", fname, my_errno);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
if (init_io_cache(&mi->file, fd, IO_SIZE*2, READ_CACHE, 0L,0,
|
|
|
|
MYF(MY_WME)))
|
|
|
|
{
|
|
|
|
sql_print_error("Failed to create a cache on master info file (\
|
|
|
|
file '%s')", fname);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
mi->fd = fd;
|
BUG#11809016 - NO WAY TO DISCOVER AN INSTANCE IS NO LONGER A SLAVE FOLLOWING MYSQL BUG#28796
Before BUG#28796, an empty host was used to identify that an instance was no
longer a slave. However, BUG#28796 changed this behavior and one cannot set
an empty host. Besides, a RESET SLAVE only cleans up information on the next
event to retrieve from the master, disables ssl and resets heartbeat period.
So a call to SHOW SLAVE STATUS after issuing a RESET SLAVE still returns some
valid information, such as host, port, user and password.
To fix this problem, we have introduced the command RESET SLAVE ALL that does
what a regular RESET SLAVE does and also clears host, port, user and password
information thus allowing users to identify when an instance is no longer a
slave.
2011-07-18 18:18:03 +01:00
|
|
|
mi->clear_in_memory_info(false);
|
2006-10-31 16:51:51 +01:00
|
|
|
|
|
|
|
}
|
|
|
|
else // file exists
|
|
|
|
{
|
|
|
|
if (fd >= 0)
|
|
|
|
reinit_io_cache(&mi->file, READ_CACHE, 0L,0,0);
|
|
|
|
else
|
|
|
|
{
|
2010-01-06 22:42:07 -07:00
|
|
|
if ((fd= mysql_file_open(key_file_master_info,
|
|
|
|
fname, O_RDWR|O_BINARY, MYF(MY_WME))) < 0 )
|
2006-10-31 16:51:51 +01:00
|
|
|
{
|
|
|
|
sql_print_error("Failed to open the existing master info file (\
|
|
|
|
file '%s', errno %d)", fname, my_errno);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
if (init_io_cache(&mi->file, fd, IO_SIZE*2, READ_CACHE, 0L,
|
|
|
|
0, MYF(MY_WME)))
|
|
|
|
{
|
|
|
|
sql_print_error("Failed to create a cache on master info file (\
|
|
|
|
file '%s')", fname);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
mi->fd = fd;
|
2007-03-29 15:09:57 +02:00
|
|
|
int port, connect_retry, master_log_pos, lines;
|
|
|
|
int ssl= 0, ssl_verify_server_cert= 0;
|
2009-09-29 14:16:23 +03:00
|
|
|
float master_heartbeat_period= 0.0;
|
2006-10-31 16:51:51 +01:00
|
|
|
char *first_non_digit;
|
2010-06-02 10:11:49 +02:00
|
|
|
char dummy_buf[HOSTNAME_LENGTH+1];
|
2006-10-31 16:51:51 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
Starting from 4.1.x master.info has new format. Now its
|
|
|
|
first line contains number of lines in file. By reading this
|
|
|
|
number we will be always distinguish to which version our
|
|
|
|
master.info corresponds to. We can't simply count lines in
|
|
|
|
file since versions before 4.1.x could generate files with more
|
|
|
|
lines than needed.
|
|
|
|
If first line doesn't contain a number or contain number less than
|
2007-03-29 15:09:57 +02:00
|
|
|
LINES_IN_MASTER_INFO_WITH_SSL then such file is treated like file
|
|
|
|
from pre 4.1.1 version.
|
2006-10-31 16:51:51 +01:00
|
|
|
There is no ambiguity when reading an old master.info, as before
|
|
|
|
4.1.1, the first line contained the binlog's name, which is either
|
|
|
|
empty or has an extension (contains a '.'), so can't be confused
|
|
|
|
with an integer.
|
|
|
|
|
|
|
|
So we're just reading first line and trying to figure which version
|
|
|
|
is this.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
The first row is temporarily stored in mi->master_log_name,
|
|
|
|
if it is line count and not binlog name (new format) it will be
|
|
|
|
overwritten by the second row later.
|
|
|
|
*/
|
|
|
|
if (init_strvar_from_file(mi->master_log_name,
|
|
|
|
sizeof(mi->master_log_name), &mi->file,
|
|
|
|
""))
|
|
|
|
goto errwithmsg;
|
|
|
|
|
|
|
|
lines= strtoul(mi->master_log_name, &first_non_digit, 10);
|
|
|
|
|
|
|
|
if (mi->master_log_name[0]!='\0' &&
|
|
|
|
*first_non_digit=='\0' && lines >= LINES_IN_MASTER_INFO_WITH_SSL)
|
2007-03-29 15:09:57 +02:00
|
|
|
{
|
|
|
|
/* Seems to be new format => read master log name from next line */
|
2006-10-31 16:51:51 +01:00
|
|
|
if (init_strvar_from_file(mi->master_log_name,
|
|
|
|
sizeof(mi->master_log_name), &mi->file, ""))
|
|
|
|
goto errwithmsg;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
lines= 7;
|
|
|
|
|
|
|
|
if (init_intvar_from_file(&master_log_pos, &mi->file, 4) ||
|
2009-11-04 12:28:20 +00:00
|
|
|
init_strvar_from_file(mi->host, sizeof(mi->host), &mi->file, 0) ||
|
|
|
|
init_strvar_from_file(mi->user, sizeof(mi->user), &mi->file, "test") ||
|
2006-10-31 16:51:51 +01:00
|
|
|
init_strvar_from_file(mi->password, SCRAMBLED_PASSWORD_CHAR_LENGTH+1,
|
2009-11-04 12:28:20 +00:00
|
|
|
&mi->file, 0) ||
|
|
|
|
init_intvar_from_file(&port, &mi->file, MYSQL_PORT) ||
|
2006-10-31 16:51:51 +01:00
|
|
|
init_intvar_from_file(&connect_retry, &mi->file,
|
2009-11-04 12:28:20 +00:00
|
|
|
DEFAULT_CONNECT_RETRY))
|
2006-10-31 16:51:51 +01:00
|
|
|
goto errwithmsg;
|
|
|
|
|
|
|
|
/*
|
|
|
|
If file has ssl part use it even if we have server without
|
2009-11-04 12:28:20 +00:00
|
|
|
SSL support. But these options will be ignored later when
|
2006-10-31 16:51:51 +01:00
|
|
|
slave will try connect to master, so in this case warning
|
|
|
|
is printed.
|
|
|
|
*/
|
2007-03-29 15:09:57 +02:00
|
|
|
if (lines >= LINES_IN_MASTER_INFO_WITH_SSL)
|
|
|
|
{
|
2009-11-04 12:28:20 +00:00
|
|
|
if (init_intvar_from_file(&ssl, &mi->file, 0) ||
|
2007-03-29 15:09:57 +02:00
|
|
|
init_strvar_from_file(mi->ssl_ca, sizeof(mi->ssl_ca),
|
2009-11-04 12:28:20 +00:00
|
|
|
&mi->file, 0) ||
|
2007-03-29 15:09:57 +02:00
|
|
|
init_strvar_from_file(mi->ssl_capath, sizeof(mi->ssl_capath),
|
2009-11-04 12:28:20 +00:00
|
|
|
&mi->file, 0) ||
|
2007-03-29 15:09:57 +02:00
|
|
|
init_strvar_from_file(mi->ssl_cert, sizeof(mi->ssl_cert),
|
2009-11-04 12:28:20 +00:00
|
|
|
&mi->file, 0) ||
|
2007-03-29 15:09:57 +02:00
|
|
|
init_strvar_from_file(mi->ssl_cipher, sizeof(mi->ssl_cipher),
|
2009-11-04 12:28:20 +00:00
|
|
|
&mi->file, 0) ||
|
2007-03-29 15:09:57 +02:00
|
|
|
init_strvar_from_file(mi->ssl_key, sizeof(mi->ssl_key),
|
2009-11-04 12:28:20 +00:00
|
|
|
&mi->file, 0))
|
2007-03-29 15:09:57 +02:00
|
|
|
goto errwithmsg;
|
|
|
|
|
|
|
|
/*
|
|
|
|
Starting from 5.1.16 ssl_verify_server_cert might be
|
|
|
|
in the file
|
|
|
|
*/
|
|
|
|
if (lines >= LINE_FOR_MASTER_SSL_VERIFY_SERVER_CERT &&
|
|
|
|
init_intvar_from_file(&ssl_verify_server_cert, &mi->file, 0))
|
|
|
|
goto errwithmsg;
|
2009-09-29 14:16:23 +03:00
|
|
|
/*
|
|
|
|
Starting from 6.0 master_heartbeat_period might be
|
|
|
|
in the file
|
|
|
|
*/
|
|
|
|
if (lines >= LINE_FOR_MASTER_HEARTBEAT_PERIOD &&
|
|
|
|
init_floatvar_from_file(&master_heartbeat_period, &mi->file, 0.0))
|
|
|
|
goto errwithmsg;
|
2010-06-02 10:11:49 +02:00
|
|
|
/*
|
|
|
|
Starting from MySQL Cluster 6.3 master_bind might be in the file
|
|
|
|
(this is just a reservation to avoid future upgrade problems)
|
|
|
|
*/
|
|
|
|
if (lines >= LINE_FOR_MASTER_BIND &&
|
|
|
|
init_strvar_from_file(dummy_buf, sizeof(dummy_buf), &mi->file, ""))
|
|
|
|
goto errwithmsg;
|
2009-10-01 19:44:53 +03:00
|
|
|
/*
|
|
|
|
Starting from 6.0 list of server_id of ignorable servers might be
|
|
|
|
in the file
|
|
|
|
*/
|
|
|
|
if (lines >= LINE_FOR_REPLICATE_IGNORE_SERVER_IDS &&
|
|
|
|
init_dynarray_intvar_from_file(&mi->ignore_server_ids, &mi->file))
|
|
|
|
{
|
|
|
|
sql_print_error("Failed to initialize master info ignore_server_ids");
|
|
|
|
goto errwithmsg;
|
|
|
|
}
|
2007-03-29 15:09:57 +02:00
|
|
|
}
|
|
|
|
|
2006-10-31 16:51:51 +01:00
|
|
|
#ifndef HAVE_OPENSSL
|
|
|
|
if (ssl)
|
|
|
|
sql_print_warning("SSL information in the master info file "
|
2009-11-04 12:28:20 +00:00
|
|
|
"('%s') are ignored because this MySQL slave was "
|
|
|
|
"compiled without SSL support.", fname);
|
2006-10-31 16:51:51 +01:00
|
|
|
#endif /* HAVE_OPENSSL */
|
|
|
|
|
|
|
|
/*
|
|
|
|
This has to be handled here as init_intvar_from_file can't handle
|
|
|
|
my_off_t types
|
|
|
|
*/
|
|
|
|
mi->master_log_pos= (my_off_t) master_log_pos;
|
|
|
|
mi->port= (uint) port;
|
|
|
|
mi->connect_retry= (uint) connect_retry;
|
|
|
|
mi->ssl= (my_bool) ssl;
|
2007-03-29 15:09:57 +02:00
|
|
|
mi->ssl_verify_server_cert= ssl_verify_server_cert;
|
2009-09-29 14:16:23 +03:00
|
|
|
mi->heartbeat_period= master_heartbeat_period;
|
2006-10-31 16:51:51 +01:00
|
|
|
}
|
|
|
|
DBUG_PRINT("master_info",("log_file_name: %s position: %ld",
|
|
|
|
mi->master_log_name,
|
|
|
|
(ulong) mi->master_log_pos));
|
|
|
|
|
|
|
|
mi->rli.mi = mi;
|
|
|
|
if (init_relay_log_info(&mi->rli, slave_info_fname))
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
mi->inited = 1;
|
2009-09-30 22:41:05 +01:00
|
|
|
mi->rli.is_relay_log_recovery= FALSE;
|
2006-10-31 16:51:51 +01:00
|
|
|
// now change cache READ -> WRITE - must do this before flush_master_info
|
|
|
|
reinit_io_cache(&mi->file, WRITE_CACHE, 0L, 0, 1);
|
2010-02-03 16:56:17 +00:00
|
|
|
if ((error=test(flush_master_info(mi, TRUE, TRUE))))
|
2006-10-31 16:51:51 +01:00
|
|
|
sql_print_error("Failed to flush master info file");
|
2010-01-06 22:42:07 -07:00
|
|
|
mysql_mutex_unlock(&mi->data_lock);
|
2006-10-31 16:51:51 +01:00
|
|
|
DBUG_RETURN(error);
|
|
|
|
|
|
|
|
errwithmsg:
|
|
|
|
sql_print_error("Error reading master configuration");
|
|
|
|
|
|
|
|
err:
|
|
|
|
if (fd >= 0)
|
|
|
|
{
|
2010-01-06 22:42:07 -07:00
|
|
|
mysql_file_close(fd, MYF(0));
|
2006-10-31 16:51:51 +01:00
|
|
|
end_io_cache(&mi->file);
|
|
|
|
}
|
|
|
|
mi->fd= -1;
|
2010-01-06 22:42:07 -07:00
|
|
|
mysql_mutex_unlock(&mi->data_lock);
|
2006-10-31 16:51:51 +01:00
|
|
|
DBUG_RETURN(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
RETURN
|
|
|
|
2 - flush relay log failed
|
|
|
|
1 - flush master info failed
|
|
|
|
0 - all ok
|
|
|
|
*/
|
2010-02-03 16:56:17 +00:00
|
|
|
int flush_master_info(Master_info* mi,
|
|
|
|
bool flush_relay_log_cache,
|
|
|
|
bool need_lock_relay_log)
|
2006-10-31 16:51:51 +01:00
|
|
|
{
|
|
|
|
IO_CACHE* file = &mi->file;
|
|
|
|
char lbuf[22];
|
2009-09-29 15:27:12 +01:00
|
|
|
int err= 0;
|
2007-03-29 15:09:57 +02:00
|
|
|
|
2006-10-31 16:51:51 +01:00
|
|
|
DBUG_ENTER("flush_master_info");
|
|
|
|
DBUG_PRINT("enter",("master_pos: %ld", (long) mi->master_log_pos));
|
|
|
|
|
|
|
|
/*
|
|
|
|
Flush the relay log to disk. If we don't do it, then the relay log while
|
|
|
|
have some part (its last kilobytes) in memory only, so if the slave server
|
|
|
|
dies now, with, say, from master's position 100 to 150 in memory only (not
|
|
|
|
on disk), and with position 150 in master.info, then when the slave
|
|
|
|
restarts, the I/O thread will fetch binlogs from 150, so in the relay log
|
|
|
|
we will have "[0, 100] U [150, infinity[" and nobody will notice it, so the
|
|
|
|
SQL thread will jump from 100 to 150, and replication will silently break.
|
|
|
|
|
|
|
|
When we come to this place in code, relay log may or not be initialized;
|
|
|
|
the caller is responsible for setting 'flush_relay_log_cache' accordingly.
|
|
|
|
*/
|
2009-09-29 15:27:12 +01:00
|
|
|
if (flush_relay_log_cache)
|
|
|
|
{
|
2010-02-03 17:19:58 +00:00
|
|
|
mysql_mutex_t *log_lock= mi->rli.relay_log.get_log_lock();
|
2009-09-29 15:27:12 +01:00
|
|
|
IO_CACHE *log_file= mi->rli.relay_log.get_log_file();
|
2010-02-03 16:56:17 +00:00
|
|
|
|
|
|
|
if (need_lock_relay_log)
|
2010-02-03 17:19:58 +00:00
|
|
|
mysql_mutex_lock(log_lock);
|
2010-02-03 16:56:17 +00:00
|
|
|
|
2010-02-03 17:19:58 +00:00
|
|
|
mysql_mutex_assert_owner(log_lock);
|
2010-02-03 16:56:17 +00:00
|
|
|
err= flush_io_cache(log_file);
|
|
|
|
|
|
|
|
if (need_lock_relay_log)
|
2010-02-03 17:19:58 +00:00
|
|
|
mysql_mutex_unlock(log_lock);
|
2010-02-03 16:56:17 +00:00
|
|
|
|
|
|
|
if (err)
|
2009-09-29 15:27:12 +01:00
|
|
|
DBUG_RETURN(2);
|
|
|
|
}
|
2009-10-01 19:44:53 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
produce a line listing the total number and all the ignored server_id:s
|
|
|
|
*/
|
|
|
|
char* ignore_server_ids_buf;
|
|
|
|
{
|
|
|
|
ignore_server_ids_buf=
|
|
|
|
(char *) my_malloc((sizeof(::server_id) * 3 + 1) *
|
|
|
|
(1 + mi->ignore_server_ids.elements), MYF(MY_WME));
|
|
|
|
if (!ignore_server_ids_buf)
|
|
|
|
DBUG_RETURN(1);
|
2010-07-09 09:28:51 -03:00
|
|
|
ulong cur_len= sprintf(ignore_server_ids_buf, "%u",
|
|
|
|
mi->ignore_server_ids.elements);
|
|
|
|
for (ulong i= 0; i < mi->ignore_server_ids.elements; i++)
|
2009-10-01 19:44:53 +03:00
|
|
|
{
|
|
|
|
ulong s_id;
|
|
|
|
get_dynamic(&mi->ignore_server_ids, (uchar*) &s_id, i);
|
2010-07-09 09:28:51 -03:00
|
|
|
cur_len+= sprintf(ignore_server_ids_buf + cur_len, " %lu", s_id);
|
2009-10-01 19:44:53 +03:00
|
|
|
}
|
|
|
|
}
|
2006-10-31 16:51:51 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
We flushed the relay log BEFORE the master.info file, because if we crash
|
|
|
|
now, we will get a duplicate event in the relay log at restart. If we
|
|
|
|
flushed in the other order, we would get a hole in the relay log.
|
|
|
|
And duplicate is better than hole (with a duplicate, in later versions we
|
|
|
|
can add detection and scrap one event; with a hole there's nothing we can
|
|
|
|
do).
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
In certain cases this code may create master.info files that seems
|
|
|
|
corrupted, because of extra lines filled with garbage in the end
|
|
|
|
file (this happens if new contents take less space than previous
|
|
|
|
contents of file). But because of number of lines in the first line
|
|
|
|
of file we don't care about this garbage.
|
|
|
|
*/
|
2009-09-29 14:16:23 +03:00
|
|
|
char heartbeat_buf[sizeof(mi->heartbeat_period) * 4]; // buffer to suffice always
|
2010-07-09 09:28:51 -03:00
|
|
|
sprintf(heartbeat_buf, "%.3f", mi->heartbeat_period);
|
2006-10-31 16:51:51 +01:00
|
|
|
my_b_seek(file, 0L);
|
2007-03-29 15:09:57 +02:00
|
|
|
my_b_printf(file,
|
2010-06-02 10:11:49 +02:00
|
|
|
"%u\n%s\n%s\n%s\n%s\n%s\n%d\n%d\n%d\n%s\n%s\n%s\n%s\n%s\n%d\n%s\n%s\n%s\n",
|
2007-03-29 15:09:57 +02:00
|
|
|
LINES_IN_MASTER_INFO,
|
2006-10-31 16:51:51 +01:00
|
|
|
mi->master_log_name, llstr(mi->master_log_pos, lbuf),
|
|
|
|
mi->host, mi->user,
|
|
|
|
mi->password, mi->port, mi->connect_retry,
|
|
|
|
(int)(mi->ssl), mi->ssl_ca, mi->ssl_capath, mi->ssl_cert,
|
2009-09-29 14:16:23 +03:00
|
|
|
mi->ssl_cipher, mi->ssl_key, mi->ssl_verify_server_cert,
|
2010-06-02 10:11:49 +02:00
|
|
|
heartbeat_buf, "", ignore_server_ids_buf);
|
Bug#34043: Server loops excessively in _checkchunk() when safemalloc is enabled
Essentially, the problem is that safemalloc is excruciatingly
slow as it checks all allocated blocks for overrun at each
memory management primitive, yielding a almost exponential
slowdown for the memory management functions (malloc, realloc,
free). The overrun check basically consists of verifying some
bytes of a block for certain magic keys, which catches some
simple forms of overrun. Another minor problem is violation
of aliasing rules and that its own internal list of blocks
is prone to corruption.
Another issue with safemalloc is rather the maintenance cost
as the tool has a significant impact on the server code.
Given the magnitude of memory debuggers available nowadays,
especially those that are provided with the platform malloc
implementation, maintenance of a in-house and largely obsolete
memory debugger becomes a burden that is not worth the effort
due to its slowness and lack of support for detecting more
common forms of heap corruption.
Since there are third-party tools that can provide the same
functionality at a lower or comparable performance cost, the
solution is to simply remove safemalloc. Third-party tools
can provide the same functionality at a lower or comparable
performance cost.
The removal of safemalloc also allows a simplification of the
malloc wrappers, removing quite a bit of kludge: redefinition
of my_malloc, my_free and the removal of the unused second
argument of my_free. Since free() always check whether the
supplied pointer is null, redudant checks are also removed.
Also, this patch adds unit testing for my_malloc and moves
my_realloc implementation into the same file as the other
memory allocation primitives.
client/mysqldump.c:
Pass my_free directly as its signature is compatible with the
callback type -- which wasn't the case for free_table_ent.
2010-07-08 18:20:08 -03:00
|
|
|
my_free(ignore_server_ids_buf);
|
2009-09-29 15:27:12 +01:00
|
|
|
err= flush_io_cache(file);
|
BUG#40337 Fsyncing master and relay log to disk after every event is too slow
NOTE: Backporting the patch to next-mr.
The fix proposed in BUG#35542 and BUG#31665 introduces a performance issue
when fsyncing the master.info, relay.info and relay-log.bin* after #th events.
Although such solution has been proposed to reduce the probability of corrupted
files due to a slave-crash, the performance penalty introduced by it has
made the approach impractical for highly intensive workloads.
In a nutshell, the option --syn-relay-log proposed in BUG#35542 and BUG#31665
simultaneously fsyncs master.info, relay-log.info and relay-log.bin* and
this is the main source of performance issues.
This patch introduces new options that give more control to the user on
what should be fsynced and how often:
1) (--sync-master-info, integer) which syncs the master.info after #th event;
2) (--sync-relay-log, integer) which syncs the relay-log.bin* after #th
events.
3) (--sync-relay-log-info, integer) which syncs the relay.info after #th
transactions.
To provide both performance and increased reliability, we recommend the following
setup:
1) --sync-master-info = 0 eventually the operating system will fsync it;
2) --sync-relay-log = 0 eventually the operating system will fsync it;
3) --sync-relay-log-info = 1 fsyncs it after every transaction;
Notice, that the previous setup does not reduce the probability of
corrupted master.info and relay-log.bin*. To overcome the issue, this patch also
introduces a recovery mechanism that right after restart throws away relay-log.bin*
retrieved from a master and updates the master.info based on the relay.info:
4) (--relay-log-recovery, boolean) which enables a recovery mechanism that
throws away relay-log.bin* after a crash.
However, it can only recover the incorrect binlog file and position in master.info,
if other informations (host, port password, etc) are corrupted or incorrect,
then this recovery mechanism will fail to work.
2009-09-29 15:40:52 +01:00
|
|
|
if (sync_masterinfo_period && !err &&
|
|
|
|
++(mi->sync_counter) >= sync_masterinfo_period)
|
|
|
|
{
|
2009-09-29 15:27:12 +01:00
|
|
|
err= my_sync(mi->fd, MYF(MY_WME));
|
BUG#40337 Fsyncing master and relay log to disk after every event is too slow
NOTE: Backporting the patch to next-mr.
The fix proposed in BUG#35542 and BUG#31665 introduces a performance issue
when fsyncing the master.info, relay.info and relay-log.bin* after #th events.
Although such solution has been proposed to reduce the probability of corrupted
files due to a slave-crash, the performance penalty introduced by it has
made the approach impractical for highly intensive workloads.
In a nutshell, the option --syn-relay-log proposed in BUG#35542 and BUG#31665
simultaneously fsyncs master.info, relay-log.info and relay-log.bin* and
this is the main source of performance issues.
This patch introduces new options that give more control to the user on
what should be fsynced and how often:
1) (--sync-master-info, integer) which syncs the master.info after #th event;
2) (--sync-relay-log, integer) which syncs the relay-log.bin* after #th
events.
3) (--sync-relay-log-info, integer) which syncs the relay.info after #th
transactions.
To provide both performance and increased reliability, we recommend the following
setup:
1) --sync-master-info = 0 eventually the operating system will fsync it;
2) --sync-relay-log = 0 eventually the operating system will fsync it;
3) --sync-relay-log-info = 1 fsyncs it after every transaction;
Notice, that the previous setup does not reduce the probability of
corrupted master.info and relay-log.bin*. To overcome the issue, this patch also
introduces a recovery mechanism that right after restart throws away relay-log.bin*
retrieved from a master and updates the master.info based on the relay.info:
4) (--relay-log-recovery, boolean) which enables a recovery mechanism that
throws away relay-log.bin* after a crash.
However, it can only recover the incorrect binlog file and position in master.info,
if other informations (host, port password, etc) are corrupted or incorrect,
then this recovery mechanism will fail to work.
2009-09-29 15:40:52 +01:00
|
|
|
mi->sync_counter= 0;
|
|
|
|
}
|
2009-09-29 15:27:12 +01:00
|
|
|
DBUG_RETURN(-err);
|
2006-10-31 16:51:51 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2007-08-16 08:52:50 +02:00
|
|
|
void end_master_info(Master_info* mi)
|
2006-10-31 16:51:51 +01:00
|
|
|
{
|
|
|
|
DBUG_ENTER("end_master_info");
|
|
|
|
|
|
|
|
if (!mi->inited)
|
|
|
|
DBUG_VOID_RETURN;
|
|
|
|
end_relay_log_info(&mi->rli);
|
|
|
|
if (mi->fd >= 0)
|
|
|
|
{
|
|
|
|
end_io_cache(&mi->file);
|
2010-01-06 22:42:07 -07:00
|
|
|
mysql_file_close(mi->fd, MYF(MY_WME));
|
2006-10-31 16:51:51 +01:00
|
|
|
mi->fd = -1;
|
|
|
|
}
|
|
|
|
mi->inited = 0;
|
|
|
|
|
|
|
|
DBUG_VOID_RETURN;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#endif /* HAVE_REPLICATION */
|