mirror of
https://github.com/MariaDB/server.git
synced 2025-01-29 02:05:57 +01:00
BUG#36197: flush tables (or little table cache) can cause crash on slave
When flushing tables, there were a slight chance that the flush was occuring between processing of two table map events. Since the tables are opened one by one, it might result in that the tables were not valid and that sub- sequent locking of tables would cause the slave to crash. The problem is solved by opening and locking all tables at once using simple_open_n_lock_tables(). Also, the patch contain a change to open_tables() so that pre-locking only takes place when the trg_event_map is not zero, which was not the case before (this caused the lock to be placed in thd->locked_tables instead of thd->lock since the assumption was that triggers would be called later and therefore the tables should be pre-locked).
This commit is contained in:
parent
2d7104e0c0
commit
812fa8b24c
8 changed files with 113 additions and 212 deletions
31
mysql-test/include/wait_for_slave_sql_to_start.inc
Normal file
31
mysql-test/include/wait_for_slave_sql_to_start.inc
Normal file
|
@ -0,0 +1,31 @@
|
|||
###################################################
|
||||
#Author: Mats (based on file written by Jeb)
|
||||
#Date: 2008-05-06
|
||||
#Purpose: To wait for slave SQL thread to start
|
||||
#Details:
|
||||
# 1) Fill in and setup variables
|
||||
# 2) loop through looking for both
|
||||
# io and sql threads to start
|
||||
# 3) If loops too long die.
|
||||
####################################################
|
||||
connection slave;
|
||||
let $row_number= 1;
|
||||
let $run= 1;
|
||||
let $counter= 300;
|
||||
|
||||
while ($run)
|
||||
{
|
||||
let $sql_result= query_get_value("SHOW SLAVE STATUS", Slave_SQL_Running, $row_number);
|
||||
if (`SELECT '$sql_result' = 'Yes'`){
|
||||
let $run= 0;
|
||||
}
|
||||
sleep 0.1;
|
||||
if (!$counter){
|
||||
--echo "Failed while waiting for slave SQL to start"
|
||||
query_vertical SHOW SLAVE STATUS;
|
||||
exit;
|
||||
}
|
||||
dec $counter;
|
||||
}
|
||||
|
||||
|
|
@ -226,7 +226,7 @@ sect test count
|
|||
2 6 0
|
||||
2 6 183
|
||||
2 7 0
|
||||
DROP TABLE t1, logtbl;
|
||||
DROP TABLE t1, t2, logtbl;
|
||||
DROP PROCEDURE just_log;
|
||||
DROP PROCEDURE log_me;
|
||||
DROP PROCEDURE log_me_inner;
|
||||
|
|
|
@ -37,7 +37,7 @@ Replicate_Ignore_Table #
|
|||
Replicate_Wild_Do_Table
|
||||
Replicate_Wild_Ignore_Table
|
||||
Last_Errno 1146
|
||||
Last_Error Error 'Table 'test.t1' doesn't exist' on opening table `test`.`t1`
|
||||
Last_Error Error 'Table 'test.t1' doesn't exist' on opening tables
|
||||
Skip_Counter 0
|
||||
Exec_Master_Log_Pos 941
|
||||
Relay_Log_Space #
|
||||
|
@ -55,5 +55,5 @@ Master_SSL_Verify_Server_Cert No
|
|||
Last_IO_Errno #
|
||||
Last_IO_Error #
|
||||
Last_SQL_Errno 1146
|
||||
Last_SQL_Error Error 'Table 'test.t1' doesn't exist' on opening table `test`.`t1`
|
||||
Last_SQL_Error Error 'Table 'test.t1' doesn't exist' on opening tables
|
||||
drop table t1, t2;
|
||||
|
|
|
@ -247,7 +247,7 @@ sync_slave_with_master;
|
|||
SELECT * FROM logtbl WHERE sect = 2 ORDER BY sect,test;
|
||||
|
||||
connection master;
|
||||
DROP TABLE t1, logtbl;
|
||||
DROP TABLE t1, t2, logtbl;
|
||||
DROP PROCEDURE just_log;
|
||||
DROP PROCEDURE log_me;
|
||||
DROP PROCEDURE log_me_inner;
|
||||
|
|
|
@ -36,15 +36,16 @@ connection slave;
|
|||
# 4. Restart slave without privileges
|
||||
# (slave.err will contain access denied error for this START SLAVE command)
|
||||
stop slave;
|
||||
source include/wait_for_slave_to_stop.inc;
|
||||
start slave;
|
||||
source include/wait_for_slave_sql_to_start.inc;
|
||||
|
||||
# 5. Make sure Slave_IO_Running = No
|
||||
--replace_result $MASTER_MYPORT MASTER_MYPORT
|
||||
# Column 1 is replaced, since the output can be either
|
||||
# "Connecting to master" or "Waiting for master update"
|
||||
--replace_column 1 # 7 # 8 # 9 # 22 # 23 # 35 # 36 #
|
||||
--vertical_results
|
||||
show slave status;
|
||||
query_vertical show slave status;
|
||||
|
||||
# Cleanup (Note that slave IO thread is not running)
|
||||
connection slave;
|
||||
|
|
173
sql/log_event.cc
173
sql/log_event.cc
|
@ -6425,15 +6425,29 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli)
|
|||
*/
|
||||
if (!thd->lock)
|
||||
{
|
||||
bool need_reopen= 1; /* To execute the first lap of the loop below */
|
||||
|
||||
/*
|
||||
lock_tables() reads the contents of thd->lex, so they must be
|
||||
initialized. Contrary to in
|
||||
Table_map_log_event::do_apply_event() we don't call
|
||||
mysql_init_query() as that may reset the binlog format.
|
||||
Lock_tables() reads the contents of thd->lex, so they must be
|
||||
initialized.
|
||||
|
||||
We also call the mysql_reset_thd_for_next_command(), since this
|
||||
is the logical start of the next "statement". Note that this
|
||||
call might reset the value of current_stmt_binlog_row_based, so
|
||||
we need to do any changes to that value after this function.
|
||||
*/
|
||||
lex_start(thd);
|
||||
mysql_reset_thd_for_next_command(thd);
|
||||
|
||||
/*
|
||||
Check if the slave is set to use SBR. If so, it should switch
|
||||
to using RBR until the end of the "statement", i.e., next
|
||||
STMT_END_F or next error.
|
||||
*/
|
||||
if (!thd->current_stmt_binlog_row_based &&
|
||||
mysql_bin_log.is_open() && (thd->options & OPTION_BIN_LOG))
|
||||
{
|
||||
thd->set_current_stmt_binlog_row_based();
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
There are a few flags that are replicated with each row event.
|
||||
|
@ -6452,72 +6466,23 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli)
|
|||
/* A small test to verify that objects have consistent types */
|
||||
DBUG_ASSERT(sizeof(thd->options) == sizeof(OPTION_RELAXED_UNIQUE_CHECKS));
|
||||
|
||||
|
||||
while ((error= lock_tables(thd, rli->tables_to_lock,
|
||||
rli->tables_to_lock_count, &need_reopen)))
|
||||
if (simple_open_n_lock_tables(thd, rli->tables_to_lock))
|
||||
{
|
||||
if (!need_reopen)
|
||||
uint actual_error= thd->main_da.sql_errno();
|
||||
if (thd->is_slave_error || thd->is_fatal_error)
|
||||
{
|
||||
if (thd->is_slave_error || thd->is_fatal_error)
|
||||
{
|
||||
/*
|
||||
Error reporting borrowed from Query_log_event with many excessive
|
||||
simplifications (we don't honour --slave-skip-errors)
|
||||
*/
|
||||
uint actual_error= thd->main_da.sql_errno();
|
||||
rli->report(ERROR_LEVEL, actual_error,
|
||||
"Error '%s' in %s event: when locking tables",
|
||||
(actual_error ? thd->main_da.message():
|
||||
"unexpected success or fatal error"),
|
||||
get_type_str());
|
||||
thd->is_fatal_error= 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
rli->report(ERROR_LEVEL, error,
|
||||
"Error in %s event: when locking tables",
|
||||
get_type_str());
|
||||
}
|
||||
const_cast<Relay_log_info*>(rli)->clear_tables_to_lock();
|
||||
DBUG_RETURN(error);
|
||||
}
|
||||
|
||||
/*
|
||||
So we need to reopen the tables.
|
||||
|
||||
We need to flush the pending RBR event, since it keeps a
|
||||
pointer to an open table.
|
||||
|
||||
ALTERNATIVE SOLUTION (not implemented): Extract a pointer to
|
||||
the pending RBR event and reset the table pointer after the
|
||||
tables has been reopened.
|
||||
|
||||
NOTE: For this new scheme there should be no pending event:
|
||||
need to add code to assert that is the case.
|
||||
*/
|
||||
thd->binlog_flush_pending_rows_event(false);
|
||||
TABLE_LIST *tables= rli->tables_to_lock;
|
||||
close_tables_for_reopen(thd, &tables);
|
||||
|
||||
uint tables_count= rli->tables_to_lock_count;
|
||||
if ((error= open_tables(thd, &tables, &tables_count, 0)))
|
||||
{
|
||||
if (thd->is_slave_error || thd->is_fatal_error)
|
||||
{
|
||||
/*
|
||||
Error reporting borrowed from Query_log_event with many excessive
|
||||
simplifications (we don't honour --slave-skip-errors)
|
||||
*/
|
||||
uint actual_error= thd->main_da.sql_errno();
|
||||
rli->report(ERROR_LEVEL, actual_error,
|
||||
"Error '%s' on reopening tables",
|
||||
(actual_error ? thd->main_da.message() :
|
||||
"unexpected success or fatal error"));
|
||||
thd->is_slave_error= 1;
|
||||
}
|
||||
const_cast<Relay_log_info*>(rli)->clear_tables_to_lock();
|
||||
DBUG_RETURN(error);
|
||||
/*
|
||||
Error reporting borrowed from Query_log_event with many excessive
|
||||
simplifications (we don't honour --slave-skip-errors)
|
||||
*/
|
||||
rli->report(ERROR_LEVEL, actual_error,
|
||||
"Error '%s' on opening tables",
|
||||
(actual_error ? thd->main_da.message() :
|
||||
"unexpected success or fatal error"));
|
||||
thd->is_slave_error= 1;
|
||||
}
|
||||
const_cast<Relay_log_info*>(rli)->clear_tables_to_lock();
|
||||
DBUG_RETURN(actual_error);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -6570,6 +6535,8 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli)
|
|||
table=
|
||||
m_table= const_cast<Relay_log_info*>(rli)->m_table_map.get_table(m_table_id);
|
||||
|
||||
DBUG_PRINT("debug", ("m_table: 0x%lx, m_table_id: %lu", (ulong) m_table, m_table_id));
|
||||
|
||||
if (table)
|
||||
{
|
||||
/*
|
||||
|
@ -7293,71 +7260,7 @@ int Table_map_log_event::do_apply_event(Relay_log_info const *rli)
|
|||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
open_tables() reads the contents of thd->lex, so they must be
|
||||
initialized, so we should call lex_start(); to be even safer, we
|
||||
call mysql_init_query() which does a more complete set of inits.
|
||||
*/
|
||||
lex_start(thd);
|
||||
mysql_reset_thd_for_next_command(thd);
|
||||
/*
|
||||
Check if the slave is set to use SBR. If so, it should switch
|
||||
to using RBR until the end of the "statement", i.e., next
|
||||
STMT_END_F or next error.
|
||||
*/
|
||||
if (!thd->current_stmt_binlog_row_based &&
|
||||
mysql_bin_log.is_open() && (thd->options & OPTION_BIN_LOG))
|
||||
{
|
||||
thd->set_current_stmt_binlog_row_based();
|
||||
}
|
||||
|
||||
/*
|
||||
Open the table if it is not already open and add the table to
|
||||
table map. Note that for any table that should not be
|
||||
replicated, a filter is needed.
|
||||
|
||||
The creation of a new TABLE_LIST is used to up-cast the
|
||||
table_list consisting of RPL_TABLE_LIST items. This will work
|
||||
since the only case where the argument to open_tables() is
|
||||
changed, is when thd->lex->query_tables == table_list, i.e.,
|
||||
when the statement requires prelocking. Since this is not
|
||||
executed when a statement is executed, this case will not occur.
|
||||
As a precaution, an assertion is added to ensure that the bad
|
||||
case is not a fact.
|
||||
|
||||
Either way, the memory in the list is *never* released
|
||||
internally in the open_tables() function, hence we take a copy
|
||||
of the pointer to make sure that it's not lost.
|
||||
*/
|
||||
uint count;
|
||||
DBUG_ASSERT(thd->lex->query_tables != table_list);
|
||||
TABLE_LIST *tmp_table_list= table_list;
|
||||
if ((error= open_tables(thd, &tmp_table_list, &count, 0)))
|
||||
{
|
||||
if (thd->is_slave_error || thd->is_fatal_error)
|
||||
{
|
||||
/*
|
||||
Error reporting borrowed from Query_log_event with many excessive
|
||||
simplifications (we don't honour --slave-skip-errors)
|
||||
*/
|
||||
uint actual_error= thd->main_da.sql_errno();
|
||||
rli->report(ERROR_LEVEL, actual_error,
|
||||
"Error '%s' on opening table `%s`.`%s`",
|
||||
(actual_error ? thd->main_da.message() :
|
||||
"unexpected success or fatal error"),
|
||||
table_list->db, table_list->table_name);
|
||||
thd->is_slave_error= 1;
|
||||
}
|
||||
goto err;
|
||||
}
|
||||
|
||||
m_table= table_list->table;
|
||||
|
||||
/*
|
||||
This will fail later otherwise, the 'in_use' field should be
|
||||
set to the current thread.
|
||||
*/
|
||||
DBUG_ASSERT(m_table->in_use);
|
||||
|
||||
/*
|
||||
Use placement new to construct the table_def instance in the
|
||||
|
@ -7383,10 +7286,6 @@ int Table_map_log_event::do_apply_event(Relay_log_info const *rli)
|
|||
}
|
||||
|
||||
DBUG_RETURN(error);
|
||||
|
||||
err:
|
||||
my_free(memory, MYF(MY_WME));
|
||||
DBUG_RETURN(error);
|
||||
}
|
||||
|
||||
Log_event::enum_skip_reason
|
||||
|
|
|
@ -53,81 +53,46 @@ Old_rows_log_event::do_apply_event(Old_rows_log_event *ev, const Relay_log_info
|
|||
*/
|
||||
if (!thd->lock)
|
||||
{
|
||||
bool need_reopen= 1; /* To execute the first lap of the loop below */
|
||||
|
||||
/*
|
||||
lock_tables() reads the contents of thd->lex, so they must be
|
||||
initialized. Contrary to in
|
||||
Table_map_log_event::do_apply_event() we don't call
|
||||
mysql_init_query() as that may reset the binlog format.
|
||||
Lock_tables() reads the contents of thd->lex, so they must be
|
||||
initialized.
|
||||
|
||||
We also call the mysql_reset_thd_for_next_command(), since this
|
||||
is the logical start of the next "statement". Note that this
|
||||
call might reset the value of current_stmt_binlog_row_based, so
|
||||
we need to do any changes to that value after this function.
|
||||
*/
|
||||
lex_start(thd);
|
||||
mysql_reset_thd_for_next_command(thd);
|
||||
|
||||
while ((error= lock_tables(thd, rli->tables_to_lock,
|
||||
rli->tables_to_lock_count, &need_reopen)))
|
||||
/*
|
||||
Check if the slave is set to use SBR. If so, it should switch
|
||||
to using RBR until the end of the "statement", i.e., next
|
||||
STMT_END_F or next error.
|
||||
*/
|
||||
if (!thd->current_stmt_binlog_row_based &&
|
||||
mysql_bin_log.is_open() && (thd->options & OPTION_BIN_LOG))
|
||||
{
|
||||
if (!need_reopen)
|
||||
thd->set_current_stmt_binlog_row_based();
|
||||
}
|
||||
|
||||
if (simple_open_n_lock_tables(thd, rli->tables_to_lock))
|
||||
{
|
||||
uint actual_error= thd->main_da.sql_errno();
|
||||
if (thd->is_slave_error || thd->is_fatal_error)
|
||||
{
|
||||
if (thd->is_slave_error || thd->is_fatal_error)
|
||||
{
|
||||
/*
|
||||
Error reporting borrowed from Query_log_event with many excessive
|
||||
simplifications (we don't honour --slave-skip-errors)
|
||||
*/
|
||||
uint actual_error= thd->main_da.sql_errno();
|
||||
rli->report(ERROR_LEVEL, actual_error,
|
||||
"Error '%s' in %s event: when locking tables",
|
||||
(actual_error ? thd->main_da.message() :
|
||||
"unexpected success or fatal error"),
|
||||
ev->get_type_str());
|
||||
thd->is_fatal_error= 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
rli->report(ERROR_LEVEL, error,
|
||||
"Error in %s event: when locking tables",
|
||||
ev->get_type_str());
|
||||
}
|
||||
const_cast<Relay_log_info*>(rli)->clear_tables_to_lock();
|
||||
DBUG_RETURN(error);
|
||||
}
|
||||
|
||||
/*
|
||||
So we need to reopen the tables.
|
||||
|
||||
We need to flush the pending RBR event, since it keeps a
|
||||
pointer to an open table.
|
||||
|
||||
ALTERNATIVE SOLUTION (not implemented): Extract a pointer to
|
||||
the pending RBR event and reset the table pointer after the
|
||||
tables has been reopened.
|
||||
|
||||
NOTE: For this new scheme there should be no pending event:
|
||||
need to add code to assert that is the case.
|
||||
*/
|
||||
thd->binlog_flush_pending_rows_event(false);
|
||||
TABLE_LIST *tables= rli->tables_to_lock;
|
||||
close_tables_for_reopen(thd, &tables);
|
||||
|
||||
uint tables_count= rli->tables_to_lock_count;
|
||||
if ((error= open_tables(thd, &tables, &tables_count, 0)))
|
||||
{
|
||||
if (thd->is_slave_error || thd->is_fatal_error)
|
||||
{
|
||||
/*
|
||||
Error reporting borrowed from Query_log_event with many excessive
|
||||
simplifications (we don't honour --slave-skip-errors)
|
||||
*/
|
||||
uint actual_error= thd->main_da.sql_errno();
|
||||
rli->report(ERROR_LEVEL, actual_error,
|
||||
"Error '%s' on reopening tables",
|
||||
(actual_error ? thd->main_da.message() :
|
||||
"unexpected success or fatal error"));
|
||||
thd->is_slave_error= 1;
|
||||
}
|
||||
const_cast<Relay_log_info*>(rli)->clear_tables_to_lock();
|
||||
DBUG_RETURN(error);
|
||||
/*
|
||||
Error reporting borrowed from Query_log_event with many excessive
|
||||
simplifications (we don't honour --slave-skip-errors)
|
||||
*/
|
||||
rli->report(ERROR_LEVEL, actual_error,
|
||||
"Error '%s' on opening tables",
|
||||
(actual_error ? thd->main_da.message() :
|
||||
"unexpected success or fatal error"));
|
||||
thd->is_slave_error= 1;
|
||||
}
|
||||
const_cast<Relay_log_info*>(rli)->clear_tables_to_lock();
|
||||
DBUG_RETURN(actual_error);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -4367,6 +4367,11 @@ bool fix_merge_after_open(TABLE_LIST *old_child_list, TABLE_LIST **old_last,
|
|||
prelocking it won't do such precaching and will simply reuse table list
|
||||
which is already built.
|
||||
|
||||
If any table has a trigger and start->trg_event_map is non-zero
|
||||
the final lock will end up in thd->locked_tables, otherwise, the
|
||||
lock will be placed in thd->lock. See also comments in
|
||||
st_lex::set_trg_event_type_for_tables().
|
||||
|
||||
RETURN
|
||||
0 - OK
|
||||
-1 - error
|
||||
|
@ -4579,7 +4584,7 @@ int open_tables(THD *thd, TABLE_LIST **start, uint *counter, uint flags)
|
|||
process its triggers since they never will be activated.
|
||||
*/
|
||||
if (!thd->prelocked_mode && !thd->lex->requires_prelocking() &&
|
||||
tables->table->triggers &&
|
||||
tables->trg_event_map && tables->table->triggers &&
|
||||
tables->lock_type >= TL_WRITE_ALLOW_WRITE)
|
||||
{
|
||||
if (!query_tables_last_own)
|
||||
|
|
Loading…
Add table
Reference in a new issue