mariadb/sql/repl_failsafe.cc
Davi Arnaut f56dd32bf7 Bug#34043: Server loops excessively in _checkchunk() when safemalloc is enabled
Essentially, the problem is that safemalloc is excruciatingly
slow as it checks all allocated blocks for overrun at each
memory management primitive, yielding a almost exponential
slowdown for the memory management functions (malloc, realloc,
free). The overrun check basically consists of verifying some
bytes of a block for certain magic keys, which catches some
simple forms of overrun. Another minor problem is violation
of aliasing rules and that its own internal list of blocks
is prone to corruption.

Another issue with safemalloc is rather the maintenance cost
as the tool has a significant impact on the server code.
Given the magnitude of memory debuggers available nowadays,
especially those that are provided with the platform malloc
implementation, maintenance of a in-house and largely obsolete
memory debugger becomes a burden that is not worth the effort
due to its slowness and lack of support for detecting more
common forms of heap corruption.

Since there are third-party tools that can provide the same
functionality at a lower or comparable performance cost, the
solution is to simply remove safemalloc. Third-party tools
can provide the same functionality at a lower or comparable
performance cost. 

The removal of safemalloc also allows a simplification of the
malloc wrappers, removing quite a bit of kludge: redefinition
of my_malloc, my_free and the removal of the unused second
argument of my_free. Since free() always check whether the
supplied pointer is null, redudant checks are also removed.

Also, this patch adds unit testing for my_malloc and moves
my_realloc implementation into the same file as the other
memory allocation primitives.

client/mysqldump.c:
  Pass my_free directly as its signature is compatible with the
  callback type -- which wasn't the case for free_table_ent.
2010-07-08 18:20:08 -03:00

744 lines
19 KiB
C++

/* Copyright (C) 2001-2006 MySQL AB & Sasha, 2008-2009 Sun Microsystems, Inc
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
/**
@file
All of the functions defined in this file which are not used (the ones to
handle failsafe) are not used; their code has not been updated for more
than one year now so should be considered as BADLY BROKEN. Do not enable
it. The used functions (to handle LOAD DATA FROM MASTER, plus some small
functions like register_slave()) are working.
*/
#include "sql_priv.h"
#include "sql_parse.h" // check_access
#ifdef HAVE_REPLICATION
#include "repl_failsafe.h"
#include "sql_acl.h" // REPL_SLAVE_ACL
#include "sql_repl.h"
#include "slave.h"
#include "rpl_mi.h"
#include "rpl_filter.h"
#include "log_event.h"
#include "sql_db.h" // mysql_create_db
#include <mysql.h>
#define SLAVE_LIST_CHUNK 128
#define SLAVE_ERRMSG_SIZE (FN_REFLEN+64)
RPL_STATUS rpl_status=RPL_NULL;
mysql_mutex_t LOCK_rpl_status;
mysql_cond_t COND_rpl_status;
HASH slave_list;
const char *rpl_role_type[] = {"MASTER","SLAVE",NullS};
TYPELIB rpl_role_typelib = {array_elements(rpl_role_type)-1,"",
rpl_role_type, NULL};
const char* rpl_status_type[]=
{
"AUTH_MASTER","IDLE_SLAVE","ACTIVE_SLAVE","LOST_SOLDIER","TROOP_SOLDIER",
"RECOVERY_CAPTAIN","NULL",NullS
};
TYPELIB rpl_status_typelib= {array_elements(rpl_status_type)-1,"",
rpl_status_type, NULL};
static Slave_log_event* find_slave_event(IO_CACHE* log,
const char* log_file_name,
char* errmsg);
/*
All of the functions defined in this file which are not used (the ones to
handle failsafe) are not used; their code has not been updated for more than
one year now so should be considered as BADLY BROKEN. Do not enable it.
The used functions (to handle LOAD DATA FROM MASTER, plus some small
functions like register_slave()) are working.
*/
#if NOT_USED
static int init_failsafe_rpl_thread(THD* thd)
{
DBUG_ENTER("init_failsafe_rpl_thread");
thd->system_thread = SYSTEM_THREAD_DELAYED_INSERT;
/*
thd->bootstrap is to report errors barely to stderr; if this code is
enable again one day, one should check if bootstrap is still needed (maybe
this thread has no other error reporting method).
*/
thd->bootstrap = 1;
thd->security_ctx->skip_grants();
my_net_init(&thd->net, 0);
thd->net.read_timeout = slave_net_timeout;
thd->max_client_packet_length=thd->net.max_packet;
mysql_mutex_lock(&LOCK_thread_count);
thd->thread_id= thd->variables.pseudo_thread_id= thread_id++;
mysql_mutex_unlock(&LOCK_thread_count);
if (init_thr_lock() || thd->store_globals())
{
/* purecov: begin inspected */
close_connection(thd, ER_OUT_OF_RESOURCES, 1); // is this needed?
statistic_increment(aborted_connects,&LOCK_status);
one_thread_per_connection_end(thd,0);
DBUG_RETURN(-1);
/* purecov: end */
}
thd->mem_root->free= thd->mem_root->used= 0;
thd_proc_info(thd, "Thread initialized");
thd->set_time();
DBUG_RETURN(0);
}
#endif
void change_rpl_status(RPL_STATUS from_status, RPL_STATUS to_status)
{
mysql_mutex_lock(&LOCK_rpl_status);
if (rpl_status == from_status || rpl_status == RPL_ANY)
rpl_status = to_status;
mysql_cond_signal(&COND_rpl_status);
mysql_mutex_unlock(&LOCK_rpl_status);
}
#define get_object(p, obj, msg) \
{\
uint len = (uint)*p++; \
if (p + len > p_end || len >= sizeof(obj)) \
{\
errmsg= msg;\
goto err; \
}\
strmake(obj,(char*) p,len); \
p+= len; \
}\
static inline int cmp_master_pos(Slave_log_event* sev, LEX_MASTER_INFO* mi)
{
return cmp_master_pos(sev->master_log, sev->master_pos, mi->log_file_name,
mi->pos);
}
void unregister_slave(THD* thd, bool only_mine, bool need_mutex)
{
if (thd->server_id)
{
if (need_mutex)
mysql_mutex_lock(&LOCK_slave_list);
SLAVE_INFO* old_si;
if ((old_si = (SLAVE_INFO*)my_hash_search(&slave_list,
(uchar*)&thd->server_id, 4)) &&
(!only_mine || old_si->thd == thd))
my_hash_delete(&slave_list, (uchar*)old_si);
if (need_mutex)
mysql_mutex_unlock(&LOCK_slave_list);
}
}
/**
Register slave in 'slave_list' hash table.
@return
0 ok
@return
1 Error. Error message sent to client
*/
int register_slave(THD* thd, uchar* packet, uint packet_length)
{
int res;
SLAVE_INFO *si;
uchar *p= packet, *p_end= packet + packet_length;
const char *errmsg= "Wrong parameters to function register_slave";
if (check_access(thd, REPL_SLAVE_ACL, any_db, NULL, NULL, 0, 0))
return 1;
if (!(si = (SLAVE_INFO*)my_malloc(sizeof(SLAVE_INFO), MYF(MY_WME))))
goto err2;
thd->server_id= si->server_id= uint4korr(p);
p+= 4;
get_object(p,si->host, "Failed to register slave: too long 'report-host'");
get_object(p,si->user, "Failed to register slave: too long 'report-user'");
get_object(p,si->password, "Failed to register slave; too long 'report-password'");
if (p+10 > p_end)
goto err;
si->port= uint2korr(p);
p += 2;
/*
We need to by pass the bytes used in the fake rpl_recovery_rank
variable. It was removed in patch for BUG#13963. But this would
make a server with that patch unable to connect to an old master.
See: BUG#49259
*/
// si->rpl_recovery_rank= uint4korr(p);
p += 4;
if (!(si->master_id= uint4korr(p)))
si->master_id= server_id;
si->thd= thd;
mysql_mutex_lock(&LOCK_slave_list);
unregister_slave(thd,0,0);
res= my_hash_insert(&slave_list, (uchar*) si);
mysql_mutex_unlock(&LOCK_slave_list);
return res;
err:
my_free(si);
my_message(ER_UNKNOWN_ERROR, errmsg, MYF(0)); /* purecov: inspected */
err2:
return 1;
}
extern "C" uint32
*slave_list_key(SLAVE_INFO* si, size_t *len,
my_bool not_used __attribute__((unused)))
{
*len = 4;
return &si->server_id;
}
extern "C" void slave_info_free(void *s)
{
my_free(s);
}
#ifdef HAVE_PSI_INTERFACE
static PSI_mutex_key key_LOCK_slave_list;
static PSI_mutex_info all_slave_list_mutexes[]=
{
{ &key_LOCK_slave_list, "LOCK_slave_list", PSI_FLAG_GLOBAL}
};
static void init_all_slave_list_mutexes(void)
{
const char* category= "sql";
int count;
if (PSI_server == NULL)
return;
count= array_elements(all_slave_list_mutexes);
PSI_server->register_mutex(category, all_slave_list_mutexes, count);
}
#endif /* HAVE_PSI_INTERFACE */
void init_slave_list()
{
#ifdef HAVE_PSI_INTERFACE
init_all_slave_list_mutexes();
#endif
my_hash_init(&slave_list, system_charset_info, SLAVE_LIST_CHUNK, 0, 0,
(my_hash_get_key) slave_list_key,
(my_hash_free_key) slave_info_free, 0);
mysql_mutex_init(key_LOCK_slave_list, &LOCK_slave_list, MY_MUTEX_INIT_FAST);
}
void end_slave_list()
{
/* No protection by a mutex needed as we are only called at shutdown */
if (my_hash_inited(&slave_list))
{
my_hash_free(&slave_list);
mysql_mutex_destroy(&LOCK_slave_list);
}
}
static int find_target_pos(LEX_MASTER_INFO *mi, IO_CACHE *log, char *errmsg)
{
my_off_t log_pos = (my_off_t) mi->pos;
uint32 target_server_id = mi->server_id;
for (;;)
{
Log_event* ev;
if (!(ev= Log_event::read_log_event(log, (mysql_mutex_t*) 0, 0)))
{
if (log->error > 0)
strmov(errmsg, "Binary log truncated in the middle of event");
else if (log->error < 0)
strmov(errmsg, "I/O error reading binary log");
else
strmov(errmsg, "Could not find target event in the binary log");
return 1;
}
if (ev->log_pos >= log_pos && ev->server_id == target_server_id)
{
delete ev;
mi->pos = my_b_tell(log);
return 0;
}
delete ev;
}
/* Impossible */
}
/**
@details
Before 4.0.15 we had a member of THD called log_pos, it was meant for
failsafe replication code in repl_failsafe.cc which is disabled until
it is reworked. Event's log_pos used to be preserved through
log-slave-updates to make code in repl_failsafe.cc work (this
function, SHOW NEW MASTER); but on the other side it caused unexpected
values in Exec_Master_Log_Pos in A->B->C replication setup,
synchronization problems in master_pos_wait(), ... So we
(Dmitri & Guilhem) removed it.
So for now this function is broken.
*/
int translate_master(THD* thd, LEX_MASTER_INFO* mi, char* errmsg)
{
LOG_INFO linfo;
char last_log_name[FN_REFLEN];
IO_CACHE log;
File file = -1, last_file = -1;
mysql_mutex_t *log_lock;
const char* errmsg_p;
Slave_log_event* sev = 0;
my_off_t last_pos = 0;
int error = 1;
int cmp_res;
LINT_INIT(cmp_res);
DBUG_ENTER("translate_master");
if (!mysql_bin_log.is_open())
{
strmov(errmsg,"Binary log is not open");
DBUG_RETURN(1);
}
if (!server_id_supplied)
{
strmov(errmsg, "Misconfigured master - server id was not set");
DBUG_RETURN(1);
}
if (mysql_bin_log.find_log_pos(&linfo, NullS, 1))
{
strmov(errmsg,"Could not find first log");
DBUG_RETURN(1);
}
thd->current_linfo = &linfo;
bzero((char*) &log,sizeof(log));
log_lock = mysql_bin_log.get_log_lock();
mysql_mutex_lock(log_lock);
for (;;)
{
if ((file=open_binlog(&log, linfo.log_file_name, &errmsg_p)) < 0)
{
strmov(errmsg, errmsg_p);
goto err;
}
if (!(sev = find_slave_event(&log, linfo.log_file_name, errmsg)))
goto err;
cmp_res = cmp_master_pos(sev, mi);
delete sev;
if (!cmp_res)
{
/* Copy basename */
fn_format(mi->log_file_name, linfo.log_file_name, "","",1);
mi->pos = my_b_tell(&log);
goto mi_inited;
}
else if (cmp_res > 0)
{
if (!last_pos)
{
strmov(errmsg,
"Slave event in first log points past the target position");
goto err;
}
end_io_cache(&log);
mysql_file_close(file, MYF(MY_WME));
if (init_io_cache(&log, (file = last_file), IO_SIZE, READ_CACHE, 0, 0,
MYF(MY_WME)))
{
errmsg[0] = 0;
goto err;
}
break;
}
strmov(last_log_name, linfo.log_file_name);
last_pos = my_b_tell(&log);
switch (mysql_bin_log.find_next_log(&linfo, 1)) {
case LOG_INFO_EOF:
if (last_file >= 0)
mysql_file_close(last_file, MYF(MY_WME));
last_file = -1;
goto found_log;
case 0:
break;
default:
strmov(errmsg, "Error reading log index");
goto err;
}
end_io_cache(&log);
if (last_file >= 0)
mysql_file_close(last_file, MYF(MY_WME));
last_file = file;
}
found_log:
my_b_seek(&log, last_pos);
if (find_target_pos(mi,&log,errmsg))
goto err;
fn_format(mi->log_file_name, last_log_name, "","",1); /* Copy basename */
mi_inited:
error = 0;
err:
mysql_mutex_unlock(log_lock);
end_io_cache(&log);
mysql_mutex_lock(&LOCK_thread_count);
thd->current_linfo = 0;
mysql_mutex_unlock(&LOCK_thread_count);
if (file >= 0)
mysql_file_close(file, MYF(MY_WME));
if (last_file >= 0 && last_file != file)
mysql_file_close(last_file, MYF(MY_WME));
DBUG_RETURN(error);
}
/**
Caller must delete result when done.
*/
static Slave_log_event* find_slave_event(IO_CACHE* log,
const char* log_file_name,
char* errmsg)
{
Log_event* ev;
int i;
bool slave_event_found = 0;
LINT_INIT(ev);
for (i = 0; i < 2; i++)
{
if (!(ev= Log_event::read_log_event(log, (mysql_mutex_t*)0, 0)))
{
my_snprintf(errmsg, SLAVE_ERRMSG_SIZE,
"Error reading event in log '%s'",
(char*)log_file_name);
return 0;
}
if (ev->get_type_code() == SLAVE_EVENT)
{
slave_event_found = 1;
break;
}
delete ev;
}
if (!slave_event_found)
{
my_snprintf(errmsg, SLAVE_ERRMSG_SIZE,
"Could not find slave event in log '%s'",
(char*)log_file_name);
return 0;
}
return (Slave_log_event*)ev;
}
/**
This function is broken now.
@seealso translate_master()
*/
bool show_new_master(THD* thd)
{
Protocol *protocol= thd->protocol;
DBUG_ENTER("show_new_master");
List<Item> field_list;
char errmsg[SLAVE_ERRMSG_SIZE];
LEX_MASTER_INFO* lex_mi= &thd->lex->mi;
errmsg[0]=0; // Safety
if (translate_master(thd, lex_mi, errmsg))
{
if (errmsg[0])
my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0),
"SHOW NEW MASTER", errmsg);
DBUG_RETURN(TRUE);
}
else
{
field_list.push_back(new Item_empty_string("Log_name", 20));
field_list.push_back(new Item_return_int("Log_pos", 10,
MYSQL_TYPE_LONGLONG));
if (protocol->send_result_set_metadata(&field_list,
Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
DBUG_RETURN(TRUE);
protocol->prepare_for_resend();
protocol->store(lex_mi->log_file_name, &my_charset_bin);
protocol->store((ulonglong) lex_mi->pos);
if (protocol->write())
DBUG_RETURN(TRUE);
my_eof(thd);
DBUG_RETURN(FALSE);
}
}
/**
Asks the master for the list of its other connected slaves.
This is for failsafe replication:
in order for failsafe replication to work, the servers involved in
replication must know of each other. We accomplish this by having each
slave report to the master how to reach it, and on connection, each
slave receives information about where the other slaves are.
@param mysql pre-existing connection to the master
@param mi master info
@note
mi is used only to give detailed error messages which include the
hostname/port of the master, the username used by the slave to connect to
the master.
If the user used by the slave to connect to the master does not have the
REPLICATION SLAVE privilege, it will pop in this function because
SHOW SLAVE HOSTS will fail on the master.
@retval
1 error
@retval
0 success
*/
int update_slave_list(MYSQL* mysql, Master_info* mi)
{
MYSQL_RES* res=0;
MYSQL_ROW row;
const char* error=0;
bool have_auth_info;
int port_ind;
DBUG_ENTER("update_slave_list");
if (mysql_real_query(mysql, STRING_WITH_LEN("SHOW SLAVE HOSTS")) ||
!(res = mysql_store_result(mysql)))
{
error= mysql_error(mysql);
goto err;
}
switch (mysql_num_fields(res)) {
case 5:
have_auth_info = 0;
port_ind=2;
break;
case 7:
have_auth_info = 1;
port_ind=4;
break;
default:
error= "the master returned an invalid number of fields for SHOW SLAVE \
HOSTS";
goto err;
}
mysql_mutex_lock(&LOCK_slave_list);
while ((row= mysql_fetch_row(res)))
{
uint32 log_server_id;
SLAVE_INFO* si, *old_si;
log_server_id = atoi(row[0]);
if ((old_si= (SLAVE_INFO*)my_hash_search(&slave_list,
(uchar*)&log_server_id,4)))
si = old_si;
else
{
if (!(si = (SLAVE_INFO*)my_malloc(sizeof(SLAVE_INFO), MYF(MY_WME))))
{
error= "the slave is out of memory";
mysql_mutex_unlock(&LOCK_slave_list);
goto err;
}
si->server_id = log_server_id;
if (my_hash_insert(&slave_list, (uchar*)si))
{
error= "the slave is out of memory";
mysql_mutex_unlock(&LOCK_slave_list);
goto err;
}
}
strmake(si->host, row[1], sizeof(si->host)-1);
si->port = atoi(row[port_ind]);
si->rpl_recovery_rank = atoi(row[port_ind+1]);
si->master_id = atoi(row[port_ind+2]);
if (have_auth_info)
{
strmake(si->user, row[2], sizeof(si->user)-1);
strmake(si->password, row[3], sizeof(si->password)-1);
}
}
mysql_mutex_unlock(&LOCK_slave_list);
err:
if (res)
mysql_free_result(res);
if (error)
{
sql_print_error("While trying to obtain the list of slaves from the master "
"'%s:%d', user '%s' got the following error: '%s'",
mi->host, mi->port, mi->user, error);
DBUG_RETURN(1);
}
DBUG_RETURN(0);
}
#if NOT_USED
int find_recovery_captain(THD* thd, MYSQL* mysql)
{
return 0;
}
#endif
#if NOT_USED
pthread_handler_t handle_failsafe_rpl(void *arg)
{
DBUG_ENTER("handle_failsafe_rpl");
THD *thd = new THD;
thd->thread_stack = (char*)&thd;
MYSQL* recovery_captain = 0;
const char* msg;
pthread_detach_this_thread();
if (init_failsafe_rpl_thread(thd) || !(recovery_captain=mysql_init(0)))
{
sql_print_error("Could not initialize failsafe replication thread");
goto err;
}
mysql_mutex_lock(&LOCK_rpl_status);
msg= thd->enter_cond(&COND_rpl_status,
&LOCK_rpl_status, "Waiting for request");
while (!thd->killed && !abort_loop)
{
bool break_req_chain = 0;
mysql_cond_wait(&COND_rpl_status, &LOCK_rpl_status);
thd_proc_info(thd, "Processing request");
while (!break_req_chain)
{
switch (rpl_status) {
case RPL_LOST_SOLDIER:
if (find_recovery_captain(thd, recovery_captain))
rpl_status=RPL_TROOP_SOLDIER;
else
rpl_status=RPL_RECOVERY_CAPTAIN;
break_req_chain=1; /* for now until other states are implemented */
break;
default:
break_req_chain=1;
break;
}
}
}
thd->exit_cond(msg);
err:
if (recovery_captain)
mysql_close(recovery_captain);
delete thd;
DBUG_LEAVE; // Must match DBUG_ENTER()
my_thread_end();
pthread_exit(0);
return 0; // Avoid compiler warnings
}
#endif
/**
Execute a SHOW SLAVE HOSTS statement.
@param thd Pointer to THD object for the client thread executing the
statement.
@retval FALSE success
@retval TRUE failure
*/
bool show_slave_hosts(THD* thd)
{
List<Item> field_list;
Protocol *protocol= thd->protocol;
DBUG_ENTER("show_slave_hosts");
field_list.push_back(new Item_return_int("Server_id", 10,
MYSQL_TYPE_LONG));
field_list.push_back(new Item_empty_string("Host", 20));
if (opt_show_slave_auth_info)
{
field_list.push_back(new Item_empty_string("User",20));
field_list.push_back(new Item_empty_string("Password",20));
}
field_list.push_back(new Item_return_int("Port", 7, MYSQL_TYPE_LONG));
field_list.push_back(new Item_return_int("Master_id", 10,
MYSQL_TYPE_LONG));
if (protocol->send_result_set_metadata(&field_list,
Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
DBUG_RETURN(TRUE);
mysql_mutex_lock(&LOCK_slave_list);
for (uint i = 0; i < slave_list.records; ++i)
{
SLAVE_INFO* si = (SLAVE_INFO*) my_hash_element(&slave_list, i);
protocol->prepare_for_resend();
protocol->store((uint32) si->server_id);
protocol->store(si->host, &my_charset_bin);
if (opt_show_slave_auth_info)
{
protocol->store(si->user, &my_charset_bin);
protocol->store(si->password, &my_charset_bin);
}
protocol->store((uint32) si->port);
protocol->store((uint32) si->master_id);
if (protocol->write())
{
mysql_mutex_unlock(&LOCK_slave_list);
DBUG_RETURN(TRUE);
}
}
mysql_mutex_unlock(&LOCK_slave_list);
my_eof(thd);
DBUG_RETURN(FALSE);
}
#endif /* HAVE_REPLICATION */