diff --git a/handler/ha_innodb.cc b/handler/ha_innodb.cc index faab76010fc..aa54e0fa30e 100644 --- a/handler/ha_innodb.cc +++ b/handler/ha_innodb.cc @@ -1911,12 +1911,7 @@ retry: trx->mysql_log_file_name = mysql_bin_log_file_name(); trx->mysql_log_offset = (ib_longlong) mysql_bin_log_file_pos(); - /* Don't do write + flush right now. For group commit - to work we want to do the flush after releasing the - prepare_commit_mutex. */ - trx->flush_log_later = TRUE; innobase_commit_low(trx); - trx->flush_log_later = FALSE; if (srv_commit_concurrency > 0) { pthread_mutex_lock(&commit_cond_m); @@ -1930,8 +1925,6 @@ retry: pthread_mutex_unlock(&prepare_commit_mutex); } - /* Now do a write + flush of logs. */ - trx_commit_complete_for_mysql(trx); trx->active_trans = 0; } else { @@ -7740,7 +7733,32 @@ innobase_xa_prepare( int error = 0; trx_t* trx = check_trx_exists(thd); + if (thd_sql_command(thd) != SQLCOM_XA_PREPARE && + (all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) + { + /* For ibbackup to work the order of transactions in binlog + and InnoDB must be the same. Consider the situation + + thread1> prepare; write to binlog; ... + + thread2> prepare; write to binlog; commit + thread1> ... commit + + To ensure this will not happen we're taking the mutex on + prepare, and releasing it on commit. + + Note: only do it for normal commits, done via ha_commit_trans. + If 2pc protocol is executed by external transaction + coordinator, it will be just a regular MySQL client + executing XA PREPARE and XA COMMIT commands. + In this case we cannot know how many minutes or hours + will be between XA PREPARE and XA COMMIT, and we don't want + to block for undefined period of time. + */ + pthread_mutex_lock(&prepare_commit_mutex); + trx->active_trans = 2; + } if (!THDVAR(thd, support_xa)) { @@ -7793,33 +7811,6 @@ innobase_xa_prepare( srv_active_wake_master_thread(); - if (thd_sql_command(thd) != SQLCOM_XA_PREPARE && - (all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) - { - - /* For ibbackup to work the order of transactions in binlog - and InnoDB must be the same. Consider the situation - - thread1> prepare; write to binlog; ... - - thread2> prepare; write to binlog; commit - thread1> ... commit - - To ensure this will not happen we're taking the mutex on - prepare, and releasing it on commit. - - Note: only do it for normal commits, done via ha_commit_trans. - If 2pc protocol is executed by external transaction - coordinator, it will be just a regular MySQL client - executing XA PREPARE and XA COMMIT commands. - In this case we cannot know how many minutes or hours - will be between XA PREPARE and XA COMMIT, and we don't want - to block for undefined period of time. - */ - pthread_mutex_lock(&prepare_commit_mutex); - trx->active_trans = 2; - } - return(error); } diff --git a/include/trx0trx.h b/include/trx0trx.h index 505e540638a..7b107348f2b 100644 --- a/include/trx0trx.h +++ b/include/trx0trx.h @@ -461,12 +461,10 @@ struct trx_struct{ FALSE, one can save CPU time and about 150 bytes in the undo log size as then we skip XA steps */ - unsigned flush_log_later:1;/* In 2PC, we hold the - prepare_commit mutex across - both phases. In that case, we - defer flush of the logs to disk - until after we release the - mutex. */ + unsigned flush_log_later:1;/* when we commit the transaction + in MySQL's binlog write, we will + flush the log to disk later in + a separate call */ unsigned must_flush_log_later:1;/* this flag is set to TRUE in trx_commit_off_kernel() if flush_log_later was TRUE, and there diff --git a/trx/trx0trx.c b/trx/trx0trx.c index 6d29de92683..1213852fc5a 100644 --- a/trx/trx0trx.c +++ b/trx/trx0trx.c @@ -842,11 +842,11 @@ trx_commit_off_kernel( there are > 2 users in the database. Then at least 2 users can gather behind one doing the physical log write to disk. - If we are calling trx_commit() under prepare_commit_mutex, we + If we are calling trx_commit() under MySQL's binlog mutex, we will delay possible log write and flush to a separate function trx_commit_complete_for_mysql(), which is only called when the - thread has released the mutex. This is to make the - group commit algorithm to work. Otherwise, the prepare_commit + thread has released the binlog mutex. This is to make the + group commit algorithm to work. Otherwise, the MySQL binlog mutex would serialize all commits and prevent a group of transactions from gathering. */