Merge MDEV-7888 and MDEV-7929 into 10.1.

This commit is contained in:
Kristian Nielsen 2015-04-08 11:04:24 +02:00
commit 48c10fb5f7
7 changed files with 231 additions and 3 deletions

View file

@ -1549,6 +1549,64 @@ a b
99 99
include/stop_slave.inc
SET GLOBAL slave_transaction_retries= @old_retries;
SET GLOBAL slave_parallel_threads=10;
include/start_slave.inc
*** MDEV-7888: ANALYZE TABLE does wakeup_subsequent_commits(), causing wrong binlog order and parallel replication hang ***
include/stop_slave.inc
SET @old_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug= '+d,inject_analyze_table_sleep';
SET @old_dbug= @@SESSION.debug_dbug;
SET SESSION debug_dbug="+d,binlog_force_commit_id";
SET @commit_id= 10000;
ANALYZE TABLE t2;
Table Op Msg_type Msg_text
test.t2 analyze status OK
INSERT INTO t3 VALUES (120, 0);
SET @commit_id= 10001;
INSERT INTO t3 VALUES (121, 0);
SET SESSION debug_dbug=@old_dbug;
SELECT * FROM t3 WHERE a >= 120 ORDER BY a;
a b
120 0
121 0
include/save_master_gtid.inc
include/start_slave.inc
include/sync_with_master_gtid.inc
SELECT * FROM t3 WHERE a >= 120 ORDER BY a;
a b
120 0
121 0
include/stop_slave.inc
SET GLOBAL debug_dbug= @old_debug;
include/start_slave.inc
*** MDEV-7929: record_gtid() for non-transactional event group calls wakeup_subsequent_commits() too early, causing slave hang. ***
include/stop_slave.inc
SET @old_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug= '+d,inject_record_gtid_serverid_100_sleep';
SET @old_dbug= @@SESSION.debug_dbug;
SET SESSION debug_dbug="+d,binlog_force_commit_id";
SET @old_server_id= @@SESSION.server_id;
SET SESSION server_id= 100;
SET @commit_id= 10010;
ALTER TABLE t1 COMMENT "Hulubulu!";
SET SESSION server_id= @old_server_id;
INSERT INTO t3 VALUES (130, 0);
SET @commit_id= 10011;
INSERT INTO t3 VALUES (131, 0);
SET SESSION debug_dbug=@old_dbug;
SELECT * FROM t3 WHERE a >= 130 ORDER BY a;
a b
130 0
131 0
include/save_master_gtid.inc
include/start_slave.inc
include/sync_with_master_gtid.inc
SELECT * FROM t3 WHERE a >= 130 ORDER BY a;
a b
130 0
131 0
include/stop_slave.inc
SET GLOBAL debug_dbug= @old_debug;
include/start_slave.inc
include/stop_slave.inc
SET GLOBAL slave_parallel_threads=@old_parallel_threads;

View file

@ -2158,6 +2158,100 @@ SELECT * FROM t8 ORDER BY a;
--source include/stop_slave.inc
SET GLOBAL slave_transaction_retries= @old_retries;
SET GLOBAL slave_parallel_threads=10;
--source include/start_slave.inc
--echo *** MDEV-7888: ANALYZE TABLE does wakeup_subsequent_commits(), causing wrong binlog order and parallel replication hang ***
--connection server_2
--source include/stop_slave.inc
SET @old_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug= '+d,inject_analyze_table_sleep';
--connection server_1
# Inject two group commits. The bug was that ANALYZE TABLE would call
# wakeup_subsequent_commits() too early, allowing the following transaction
# in the same group to run ahead and binlog and free the GCO. Then we get
# wrong binlog order and later access freed GCO, which causes lost wakeup
# of following GCO and thus replication hang.
# We injected a small sleep in ANALYZE to make the race easier to hit (this
# can only cause false negatives in versions with the bug, not false positives,
# so sleep is ok here. And it's in general not possible to trigger reliably
# the race with debug_sync, since the bugfix makes the race impossible).
SET @old_dbug= @@SESSION.debug_dbug;
SET SESSION debug_dbug="+d,binlog_force_commit_id";
# Group commit with cid=10000, two event groups.
SET @commit_id= 10000;
ANALYZE TABLE t2;
INSERT INTO t3 VALUES (120, 0);
# Group commit with cid=10001, one event group.
SET @commit_id= 10001;
INSERT INTO t3 VALUES (121, 0);
SET SESSION debug_dbug=@old_dbug;
SELECT * FROM t3 WHERE a >= 120 ORDER BY a;
--source include/save_master_gtid.inc
--connection server_2
--source include/start_slave.inc
--source include/sync_with_master_gtid.inc
SELECT * FROM t3 WHERE a >= 120 ORDER BY a;
--source include/stop_slave.inc
SET GLOBAL debug_dbug= @old_debug;
--source include/start_slave.inc
--echo *** MDEV-7929: record_gtid() for non-transactional event group calls wakeup_subsequent_commits() too early, causing slave hang. ***
--connection server_2
--source include/stop_slave.inc
SET @old_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug= '+d,inject_record_gtid_serverid_100_sleep';
--connection server_1
# Inject two group commits. The bug was that record_gtid for a
# non-transactional event group would commit its own transaction, which would
# cause ha_commit_trans() to call wakeup_subsequent_commits() too early. This
# in turn lead to access to freed group_commit_orderer object, losing a wakeup
# and causing slave threads to hang.
# We inject a small sleep in the corresponding record_gtid() to make the race
# easier to hit.
SET @old_dbug= @@SESSION.debug_dbug;
SET SESSION debug_dbug="+d,binlog_force_commit_id";
# Group commit with cid=10010, two event groups.
SET @old_server_id= @@SESSION.server_id;
SET SESSION server_id= 100;
SET @commit_id= 10010;
ALTER TABLE t1 COMMENT "Hulubulu!";
SET SESSION server_id= @old_server_id;
INSERT INTO t3 VALUES (130, 0);
# Group commit with cid=10011, one event group.
SET @commit_id= 10011;
INSERT INTO t3 VALUES (131, 0);
SET SESSION debug_dbug=@old_dbug;
SELECT * FROM t3 WHERE a >= 130 ORDER BY a;
--source include/save_master_gtid.inc
--connection server_2
--source include/start_slave.inc
--source include/sync_with_master_gtid.inc
SELECT * FROM t3 WHERE a >= 130 ORDER BY a;
--source include/stop_slave.inc
SET GLOBAL debug_dbug= @old_debug;
--source include/start_slave.inc

View file

@ -5932,6 +5932,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate)
if (direct)
{
int res;
uint64 commit_id= 0;
DBUG_PRINT("info", ("direct is set"));
if ((res= thd->wait_for_prior_commit()))
DBUG_RETURN(res);
@ -5939,7 +5940,16 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate)
my_org_b_tell= my_b_tell(file);
mysql_mutex_lock(&LOCK_log);
prev_binlog_id= current_binlog_id;
if (write_gtid_event(thd, true, using_trans, 0))
DBUG_EXECUTE_IF("binlog_force_commit_id",
{
const LEX_STRING name= { C_STRING_WITH_LEN("commit_id") };
bool null_value;
user_var_entry *entry=
(user_var_entry*) my_hash_search(&thd->user_vars,
(uchar*) name.str, name.length);
commit_id= entry->val_int(&null_value);
});
if (write_gtid_event(thd, true, using_trans, commit_id))
goto err;
}
else

View file

@ -517,6 +517,7 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id,
element *elem;
ulonglong thd_saved_option= thd->variables.option_bits;
Query_tables_list lex_backup;
wait_for_commit* suspended_wfc;
DBUG_ENTER("record_gtid");
if (unlikely(!loaded))
@ -540,6 +541,28 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id,
DBUG_RETURN(1);
} );
/*
If we are applying a non-transactional event group, we will be committing
here a transaction, but that does not imply that the event group has
completed or has been binlogged. So we should not trigger
wakeup_subsequent_commits() here.
Note: An alternative here could be to put a call to mark_start_commit() in
stmt_done() before the call to record_and_update_gtid(). This would
prevent later calling mark_start_commit() after we have run
wakeup_subsequent_commits() from committing the GTID update transaction
(which must be avoided to avoid accessing freed group_commit_orderer
object). It would also allow following event groups to start slightly
earlier. And in the cases where record_gtid() is called without an active
transaction, the current statement should have been binlogged already, so
binlog order is preserved.
But this is rather subtle, and potentially fragile. And it does not really
seem worth it; non-transactional loads are unlikely to benefit much from
parallel replication in any case. So for now, we go with the simple
suspend/resume of wakeup_subsequent_commits() here in record_gtid().
*/
suspended_wfc= thd->suspend_subsequent_commits();
thd->lex->reset_n_backup_query_tables_list(&lex_backup);
tlist.init_one_table(STRING_WITH_LEN("mysql"),
rpl_gtid_slave_state_table_name.str,
@ -691,6 +714,12 @@ end:
}
thd->lex->restore_backup_query_tables_list(&lex_backup);
thd->variables.option_bits= thd_saved_option;
thd->resume_subsequent_commits(suspended_wfc);
DBUG_EXECUTE_IF("inject_record_gtid_serverid_100_sleep",
{
if (gtid->server_id == 100)
my_sleep(500000);
});
DBUG_RETURN(err);
}

View file

@ -170,8 +170,24 @@ finish_event_group(rpl_parallel_thread *rpt, uint64 sub_id,
/* Now free any GCOs in which all transactions have committed. */
group_commit_orderer *tmp_gco= rgi->gco;
while (tmp_gco &&
(!tmp_gco->next_gco || tmp_gco->last_sub_id > sub_id))
(!tmp_gco->next_gco || tmp_gco->last_sub_id > sub_id ||
tmp_gco->next_gco->wait_count > entry->count_committing_event_groups))
{
/*
We must not free a GCO before the wait_count of the following GCO has
been reached and wakeup has been sent. Otherwise we will lose the
wakeup and hang (there were several such bugs in the past).
The intention is that this is ensured already since we only free when
the last event group in the GCO has committed
(tmp_gco->last_sub_id <= sub_id). However, if we have a bug, we have
extra check on next_gco->wait_count to hopefully avoid hanging; we
have here an assertion in debug builds that this check does not in
fact trigger.
*/
DBUG_ASSERT(!tmp_gco->next_gco || tmp_gco->last_sub_id > sub_id);
tmp_gco= tmp_gco->prev_gco;
}
while (tmp_gco)
{
group_commit_orderer *prev_gco= tmp_gco->prev_gco;

View file

@ -320,6 +320,7 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
int result_code;
int compl_result_code;
bool need_repair_or_alter= 0;
wait_for_commit* suspended_wfc;
DBUG_ENTER("mysql_admin_table");
DBUG_PRINT("enter", ("extra_open_options: %u", extra_open_options));
@ -337,6 +338,13 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
DBUG_RETURN(TRUE);
/*
This function calls trans_commit() during its operation, but that does not
imply that the operation is complete or binlogged. So we have to suspend
temporarily the wakeup_subsequent_commits() calls (if used).
*/
suspended_wfc= thd->suspend_subsequent_commits();
mysql_ha_rm_tables(thd, tables);
/*
@ -464,7 +472,7 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
if (!table->table->part_info)
{
my_error(ER_PARTITION_MGMT_ON_NONPARTITIONED, MYF(0));
DBUG_RETURN(TRUE);
goto err2;
}
if (set_part_state(alter_info, table->table->part_info, PART_ADMIN))
{
@ -1045,6 +1053,8 @@ send_result_message:
}
my_eof(thd);
thd->resume_subsequent_commits(suspended_wfc);
DBUG_EXECUTE_IF("inject_analyze_table_sleep", my_sleep(500000););
DBUG_RETURN(FALSE);
err:
@ -1058,6 +1068,8 @@ err:
}
close_thread_tables(thd); // Shouldn't be needed
thd->mdl_context.release_transactional_locks();
err2:
thd->resume_subsequent_commits(suspended_wfc);
DBUG_RETURN(TRUE);
}

View file

@ -3774,6 +3774,15 @@ public:
if (wait_for_commit_ptr)
wait_for_commit_ptr->wakeup_subsequent_commits(wakeup_error);
}
wait_for_commit *suspend_subsequent_commits() {
wait_for_commit *suspended= wait_for_commit_ptr;
wait_for_commit_ptr= NULL;
return suspended;
}
void resume_subsequent_commits(wait_for_commit *suspended) {
DBUG_ASSERT(!wait_for_commit_ptr);
wait_for_commit_ptr= suspended;
}
private: