branches/zip: Enabled group commit functionality with XA.

Reviewed by: Heikki
This commit is contained in:
inaam 2007-09-10 17:06:44 +00:00
parent 7c680d2f3e
commit 24741ad8f7
3 changed files with 43 additions and 32 deletions

View file

@ -1911,7 +1911,12 @@ retry:
trx->mysql_log_file_name = mysql_bin_log_file_name();
trx->mysql_log_offset = (ib_longlong) mysql_bin_log_file_pos();
/* Don't do write + flush right now. For group commit
to work we want to do the flush after releasing the
prepare_commit_mutex. */
trx->flush_log_later = TRUE;
innobase_commit_low(trx);
trx->flush_log_later = FALSE;
if (srv_commit_concurrency > 0) {
pthread_mutex_lock(&commit_cond_m);
@ -1925,6 +1930,8 @@ retry:
pthread_mutex_unlock(&prepare_commit_mutex);
}
/* Now do a write + flush of logs. */
trx_commit_complete_for_mysql(trx);
trx->active_trans = 0;
} else {
@ -7733,32 +7740,7 @@ innobase_xa_prepare(
int error = 0;
trx_t* trx = check_trx_exists(thd);
if (thd_sql_command(thd) != SQLCOM_XA_PREPARE &&
(all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
{
/* For ibbackup to work the order of transactions in binlog
and InnoDB must be the same. Consider the situation
thread1> prepare; write to binlog; ...
<context switch>
thread2> prepare; write to binlog; commit
thread1> ... commit
To ensure this will not happen we're taking the mutex on
prepare, and releasing it on commit.
Note: only do it for normal commits, done via ha_commit_trans.
If 2pc protocol is executed by external transaction
coordinator, it will be just a regular MySQL client
executing XA PREPARE and XA COMMIT commands.
In this case we cannot know how many minutes or hours
will be between XA PREPARE and XA COMMIT, and we don't want
to block for undefined period of time.
*/
pthread_mutex_lock(&prepare_commit_mutex);
trx->active_trans = 2;
}
if (!THDVAR(thd, support_xa)) {
@ -7811,6 +7793,33 @@ innobase_xa_prepare(
srv_active_wake_master_thread();
if (thd_sql_command(thd) != SQLCOM_XA_PREPARE &&
(all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
{
/* For ibbackup to work the order of transactions in binlog
and InnoDB must be the same. Consider the situation
thread1> prepare; write to binlog; ...
<context switch>
thread2> prepare; write to binlog; commit
thread1> ... commit
To ensure this will not happen we're taking the mutex on
prepare, and releasing it on commit.
Note: only do it for normal commits, done via ha_commit_trans.
If 2pc protocol is executed by external transaction
coordinator, it will be just a regular MySQL client
executing XA PREPARE and XA COMMIT commands.
In this case we cannot know how many minutes or hours
will be between XA PREPARE and XA COMMIT, and we don't want
to block for undefined period of time.
*/
pthread_mutex_lock(&prepare_commit_mutex);
trx->active_trans = 2;
}
return(error);
}

View file

@ -461,10 +461,12 @@ struct trx_struct{
FALSE, one can save CPU time and about
150 bytes in the undo log size as then
we skip XA steps */
unsigned flush_log_later:1;/* when we commit the transaction
in MySQL's binlog write, we will
flush the log to disk later in
a separate call */
unsigned flush_log_later:1;/* In 2PC, we hold the
prepare_commit mutex across
both phases. In that case, we
defer flush of the logs to disk
until after we release the
mutex. */
unsigned must_flush_log_later:1;/* this flag is set to TRUE in
trx_commit_off_kernel() if
flush_log_later was TRUE, and there

View file

@ -842,11 +842,11 @@ trx_commit_off_kernel(
there are > 2 users in the database. Then at least 2 users can
gather behind one doing the physical log write to disk.
If we are calling trx_commit() under MySQL's binlog mutex, we
If we are calling trx_commit() under prepare_commit_mutex, we
will delay possible log write and flush to a separate function
trx_commit_complete_for_mysql(), which is only called when the
thread has released the binlog mutex. This is to make the
group commit algorithm to work. Otherwise, the MySQL binlog
thread has released the mutex. This is to make the
group commit algorithm to work. Otherwise, the prepare_commit
mutex would serialize all commits and prevent a group of
transactions from gathering. */