mirror of
https://github.com/MariaDB/server.git
synced 2025-01-22 14:54:20 +01:00
branches/zip
rb://130 Enable Group Commit functionality that was broken in 5.0 when distributed transactions were introduced. Reviewed by: Heikki
This commit is contained in:
parent
b667060360
commit
bfa7cf72aa
3 changed files with 43 additions and 34 deletions
|
@ -2449,7 +2449,12 @@ retry:
|
||||||
trx->mysql_log_file_name = mysql_bin_log_file_name();
|
trx->mysql_log_file_name = mysql_bin_log_file_name();
|
||||||
trx->mysql_log_offset = (ib_int64_t) mysql_bin_log_file_pos();
|
trx->mysql_log_offset = (ib_int64_t) mysql_bin_log_file_pos();
|
||||||
|
|
||||||
|
/* Don't do write + flush right now. For group commit
|
||||||
|
to work we want to do the flush after releasing the
|
||||||
|
prepare_commit_mutex. */
|
||||||
|
trx->flush_log_later = TRUE;
|
||||||
innobase_commit_low(trx);
|
innobase_commit_low(trx);
|
||||||
|
trx->flush_log_later = FALSE;
|
||||||
|
|
||||||
if (innobase_commit_concurrency > 0) {
|
if (innobase_commit_concurrency > 0) {
|
||||||
pthread_mutex_lock(&commit_cond_m);
|
pthread_mutex_lock(&commit_cond_m);
|
||||||
|
@ -2463,6 +2468,8 @@ retry:
|
||||||
pthread_mutex_unlock(&prepare_commit_mutex);
|
pthread_mutex_unlock(&prepare_commit_mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Now do a write + flush of logs. */
|
||||||
|
trx_commit_complete_for_mysql(trx);
|
||||||
trx->active_trans = 0;
|
trx->active_trans = 0;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -8934,33 +8941,6 @@ innobase_xa_prepare(
|
||||||
|
|
||||||
DBUG_ASSERT(hton == innodb_hton_ptr);
|
DBUG_ASSERT(hton == innodb_hton_ptr);
|
||||||
|
|
||||||
if (thd_sql_command(thd) != SQLCOM_XA_PREPARE &&
|
|
||||||
(all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
|
|
||||||
{
|
|
||||||
|
|
||||||
/* For ibbackup to work the order of transactions in binlog
|
|
||||||
and InnoDB must be the same. Consider the situation
|
|
||||||
|
|
||||||
thread1> prepare; write to binlog; ...
|
|
||||||
<context switch>
|
|
||||||
thread2> prepare; write to binlog; commit
|
|
||||||
thread1> ... commit
|
|
||||||
|
|
||||||
To ensure this will not happen we're taking the mutex on
|
|
||||||
prepare, and releasing it on commit.
|
|
||||||
|
|
||||||
Note: only do it for normal commits, done via ha_commit_trans.
|
|
||||||
If 2pc protocol is executed by external transaction
|
|
||||||
coordinator, it will be just a regular MySQL client
|
|
||||||
executing XA PREPARE and XA COMMIT commands.
|
|
||||||
In this case we cannot know how many minutes or hours
|
|
||||||
will be between XA PREPARE and XA COMMIT, and we don't want
|
|
||||||
to block for undefined period of time.
|
|
||||||
*/
|
|
||||||
pthread_mutex_lock(&prepare_commit_mutex);
|
|
||||||
trx->active_trans = 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* we use support_xa value as it was seen at transaction start
|
/* we use support_xa value as it was seen at transaction start
|
||||||
time, not the current session variable value. Any possible changes
|
time, not the current session variable value. Any possible changes
|
||||||
to the session variable take effect only in the next transaction */
|
to the session variable take effect only in the next transaction */
|
||||||
|
@ -9013,6 +8993,33 @@ innobase_xa_prepare(
|
||||||
|
|
||||||
srv_active_wake_master_thread();
|
srv_active_wake_master_thread();
|
||||||
|
|
||||||
|
if (thd_sql_command(thd) != SQLCOM_XA_PREPARE &&
|
||||||
|
(all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
|
||||||
|
{
|
||||||
|
|
||||||
|
/* For ibbackup to work the order of transactions in binlog
|
||||||
|
and InnoDB must be the same. Consider the situation
|
||||||
|
|
||||||
|
thread1> prepare; write to binlog; ...
|
||||||
|
<context switch>
|
||||||
|
thread2> prepare; write to binlog; commit
|
||||||
|
thread1> ... commit
|
||||||
|
|
||||||
|
To ensure this will not happen we're taking the mutex on
|
||||||
|
prepare, and releasing it on commit.
|
||||||
|
|
||||||
|
Note: only do it for normal commits, done via ha_commit_trans.
|
||||||
|
If 2pc protocol is executed by external transaction
|
||||||
|
coordinator, it will be just a regular MySQL client
|
||||||
|
executing XA PREPARE and XA COMMIT commands.
|
||||||
|
In this case we cannot know how many minutes or hours
|
||||||
|
will be between XA PREPARE and XA COMMIT, and we don't want
|
||||||
|
to block for undefined period of time.
|
||||||
|
*/
|
||||||
|
pthread_mutex_lock(&prepare_commit_mutex);
|
||||||
|
trx->active_trans = 2;
|
||||||
|
}
|
||||||
|
|
||||||
return(error);
|
return(error);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -497,10 +497,12 @@ struct trx_struct{
|
||||||
FALSE, one can save CPU time and about
|
FALSE, one can save CPU time and about
|
||||||
150 bytes in the undo log size as then
|
150 bytes in the undo log size as then
|
||||||
we skip XA steps */
|
we skip XA steps */
|
||||||
unsigned flush_log_later:1;/* when we commit the transaction
|
unsigned flush_log_later:1;/* In 2PC, we hold the
|
||||||
in MySQL's binlog write, we will
|
prepare_commit mutex across
|
||||||
flush the log to disk later in
|
both phases. In that case, we
|
||||||
a separate call */
|
defer flush of the logs to disk
|
||||||
|
until after we release the
|
||||||
|
mutex. */
|
||||||
unsigned must_flush_log_later:1;/* this flag is set to TRUE in
|
unsigned must_flush_log_later:1;/* this flag is set to TRUE in
|
||||||
trx_commit_off_kernel() if
|
trx_commit_off_kernel() if
|
||||||
flush_log_later was TRUE, and there
|
flush_log_later was TRUE, and there
|
||||||
|
|
|
@ -891,11 +891,11 @@ trx_commit_off_kernel(
|
||||||
there are > 2 users in the database. Then at least 2 users can
|
there are > 2 users in the database. Then at least 2 users can
|
||||||
gather behind one doing the physical log write to disk.
|
gather behind one doing the physical log write to disk.
|
||||||
|
|
||||||
If we are calling trx_commit() under MySQL's binlog mutex, we
|
If we are calling trx_commit() under prepare_commit_mutex, we
|
||||||
will delay possible log write and flush to a separate function
|
will delay possible log write and flush to a separate function
|
||||||
trx_commit_complete_for_mysql(), which is only called when the
|
trx_commit_complete_for_mysql(), which is only called when the
|
||||||
thread has released the binlog mutex. This is to make the
|
thread has released the mutex. This is to make the
|
||||||
group commit algorithm to work. Otherwise, the MySQL binlog
|
group commit algorithm to work. Otherwise, the prepare_commit
|
||||||
mutex would serialize all commits and prevent a group of
|
mutex would serialize all commits and prevent a group of
|
||||||
transactions from gathering. */
|
transactions from gathering. */
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue