mirror of
https://github.com/MariaDB/server.git
synced 2025-01-29 02:05:57 +01:00
MWL#116: Efficient group commit
Tweak the commit_ordered() semantics. Now it is only called for transactions that go through 2-phase commit. This avoids forcing engines to make commits visible before they are durable. Also take LOCK_commit_ordered() around START TRANSACTION WITH CONSISTENT SNAPSHOT, to get a truly consistent snapshot.
This commit is contained in:
parent
498f10a2be
commit
8bc445360e
4 changed files with 117 additions and 98 deletions
|
@ -1251,32 +1251,7 @@ int ha_commit_one_phase(THD *thd, bool all)
|
|||
enclosing 'all' transaction is rolled back.
|
||||
*/
|
||||
bool is_real_trans=all || thd->transaction.all.ha_list == 0;
|
||||
Ha_trx_info *ha_info= trans->ha_list;
|
||||
DBUG_ENTER("ha_commit_one_phase");
|
||||
#ifdef USING_TRANSACTIONS
|
||||
if (ha_info)
|
||||
{
|
||||
if (is_real_trans)
|
||||
{
|
||||
bool locked= false;
|
||||
for (; ha_info; ha_info= ha_info->next())
|
||||
{
|
||||
handlerton *ht= ha_info->ht();
|
||||
if (ht->commit_ordered)
|
||||
{
|
||||
if (ha_info->is_trx_read_write() && !locked)
|
||||
{
|
||||
pthread_mutex_lock(&LOCK_commit_ordered);
|
||||
locked= 1;
|
||||
}
|
||||
ht->commit_ordered(ht, thd, all);
|
||||
}
|
||||
}
|
||||
if (locked)
|
||||
pthread_mutex_unlock(&LOCK_commit_ordered);
|
||||
}
|
||||
}
|
||||
#endif /* USING_TRANSACTIONS */
|
||||
DBUG_RETURN(commit_one_phase_2(thd, all, trans, is_real_trans));
|
||||
}
|
||||
|
||||
|
@ -1901,7 +1876,13 @@ int ha_start_consistent_snapshot(THD *thd)
|
|||
{
|
||||
bool warn= true;
|
||||
|
||||
/*
|
||||
Holding the LOCK_commit_ordered mutex ensures that for any transaction
|
||||
we either see it committed in all engines, or in none.
|
||||
*/
|
||||
pthread_mutex_lock(&LOCK_commit_ordered);
|
||||
plugin_foreach(thd, snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &warn);
|
||||
pthread_mutex_unlock(&LOCK_commit_ordered);
|
||||
|
||||
/*
|
||||
Same idea as when one wants to CREATE TABLE in one engine which does not
|
||||
|
|
|
@ -667,6 +667,11 @@ struct handlerton
|
|||
full transaction is committed, not for each commit of statement
|
||||
transaction in a multi-statement transaction.
|
||||
|
||||
Not that like prepare(), commit_ordered() is only called when 2-phase
|
||||
commit takes place. Ie. when no binary log and only a single engine
|
||||
participates in a transaction, one commit() is called, no
|
||||
commit_orderd(). So engines must be prepared for this.
|
||||
|
||||
The calls to commit_ordered() in multiple parallel transactions is
|
||||
guaranteed to happen in the same order in every participating
|
||||
handler. This can be used to ensure the same commit order among multiple
|
||||
|
@ -684,11 +689,9 @@ struct handlerton
|
|||
doing any time-consuming or blocking operations in commit_ordered() will
|
||||
limit scalability.
|
||||
|
||||
Handlers can rely on commit_ordered() calls for transactions that updated
|
||||
data to be serialised (no two calls can run in parallel, so no extra
|
||||
locking on the handler part is required to ensure this). However, calls
|
||||
for SELECT-only transactions are not serialised, so can occur in parallel
|
||||
with each other and with at most one write-transaction.
|
||||
Handlers can rely on commit_ordered() calls to be serialised (no two
|
||||
calls can run in parallel, so no extra locking on the handler part is
|
||||
required to ensure this).
|
||||
|
||||
Note that commit_ordered() can be called from a different thread than the
|
||||
one handling the transaction! So it can not do anything that depends on
|
||||
|
@ -700,7 +703,8 @@ struct handlerton
|
|||
must be saved and returned from the commit() method instead.
|
||||
|
||||
The commit_ordered method is optional, and can be left unset if not
|
||||
needed in a particular handler.
|
||||
needed in a particular handler (then there will be no ordering guarantees
|
||||
wrt. other engines and binary log).
|
||||
*/
|
||||
void (*commit_ordered)(handlerton *hton, THD *thd, bool all);
|
||||
int (*rollback)(handlerton *hton, THD *thd, bool all);
|
||||
|
|
|
@ -1700,10 +1700,10 @@ innobase_query_caching_of_table_permitted(
|
|||
/* The call of row_search_.. will start a new transaction if it is
|
||||
not yet started */
|
||||
|
||||
if (trx->active_trans == 0) {
|
||||
if ((trx->active_trans & TRX_ACTIVE_IN_MYSQL) == 0) {
|
||||
|
||||
innobase_register_trx_and_stmt(innodb_hton_ptr, thd);
|
||||
trx->active_trans = 1;
|
||||
trx->active_trans |= TRX_ACTIVE_IN_MYSQL;
|
||||
}
|
||||
|
||||
if (row_search_check_if_query_cache_permitted(trx, norm_name)) {
|
||||
|
@ -1973,11 +1973,11 @@ ha_innobase::init_table_handle_for_HANDLER(void)
|
|||
|
||||
/* Set the MySQL flag to mark that there is an active transaction */
|
||||
|
||||
if (prebuilt->trx->active_trans == 0) {
|
||||
if ((prebuilt->trx->active_trans & TRX_ACTIVE_IN_MYSQL) == 0) {
|
||||
|
||||
innobase_register_trx_and_stmt(ht, user_thd);
|
||||
|
||||
prebuilt->trx->active_trans = 1;
|
||||
prebuilt->trx->active_trans |= TRX_ACTIVE_IN_MYSQL;
|
||||
}
|
||||
|
||||
/* We did the necessary inits in this function, no need to repeat them
|
||||
|
@ -2704,58 +2704,21 @@ innobase_start_trx_and_assign_read_view(
|
|||
|
||||
/* Set the MySQL flag to mark that there is an active transaction */
|
||||
|
||||
if (trx->active_trans == 0) {
|
||||
if ((trx->active_trans & TRX_ACTIVE_IN_MYSQL) == 0) {
|
||||
innobase_register_trx_and_stmt(hton, thd);
|
||||
trx->active_trans = 1;
|
||||
trx->active_trans |= TRX_ACTIVE_IN_MYSQL;
|
||||
}
|
||||
|
||||
DBUG_RETURN(0);
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
Perform the first, fast part of InnoDB commit.
|
||||
|
||||
Doing it in this call ensures that we get the same commit order here
|
||||
as in binlog and any other participating transactional storage engines.
|
||||
|
||||
Note that we want to do as little as really needed here, as we run
|
||||
under a global mutex. The expensive fsync() is done later, in
|
||||
innobase_commit(), without a lock so group commit can take place.
|
||||
|
||||
Note also that this method can be called from a different thread than
|
||||
the one handling the rest of the transaction. */
|
||||
static
|
||||
void
|
||||
innobase_commit_ordered(
|
||||
innobase_commit_ordered_2(
|
||||
/*============*/
|
||||
handlerton *hton, /*!< in: Innodb handlerton */
|
||||
THD* thd, /*!< in: MySQL thread handle of the user for whom
|
||||
the transaction should be committed */
|
||||
bool all) /*!< in: TRUE - commit transaction
|
||||
FALSE - the current SQL statement ended */
|
||||
trx_t* trx) /*!< in: Innodb transaction */
|
||||
{
|
||||
trx_t* trx;
|
||||
DBUG_ENTER("innobase_commit_ordered");
|
||||
DBUG_ASSERT(hton == innodb_hton_ptr);
|
||||
|
||||
trx = check_trx_exists(thd);
|
||||
|
||||
if (trx->active_trans == 0
|
||||
&& trx->conc_state != TRX_NOT_STARTED) {
|
||||
/* We cannot throw error here; instead we will catch this error
|
||||
again in innobase_commit() and report it from there. */
|
||||
DBUG_VOID_RETURN;
|
||||
}
|
||||
/* Since we will reserve the kernel mutex, we have to release
|
||||
the search system latch first to obey the latching order. */
|
||||
|
||||
if (trx->has_search_latch) {
|
||||
trx_search_latch_release_if_reserved(trx);
|
||||
}
|
||||
|
||||
/* commit_ordered is only called when committing the whole transaction
|
||||
(or an SQL statement when autocommit is on). */
|
||||
DBUG_ASSERT(all || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)));
|
||||
|
||||
/* We need current binlog position for ibbackup to work.
|
||||
Note, the position is current because commit_ordered is guaranteed
|
||||
|
@ -2807,6 +2770,60 @@ retry:
|
|||
DBUG_VOID_RETURN;
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
Perform the first, fast part of InnoDB commit.
|
||||
|
||||
Doing it in this call ensures that we get the same commit order here
|
||||
as in binlog and any other participating transactional storage engines.
|
||||
|
||||
Note that we want to do as little as really needed here, as we run
|
||||
under a global mutex. The expensive fsync() is done later, in
|
||||
innobase_commit(), without a lock so group commit can take place.
|
||||
|
||||
Note also that this method can be called from a different thread than
|
||||
the one handling the rest of the transaction. */
|
||||
static
|
||||
void
|
||||
innobase_commit_ordered(
|
||||
/*============*/
|
||||
handlerton *hton, /*!< in: Innodb handlerton */
|
||||
THD* thd, /*!< in: MySQL thread handle of the user for whom
|
||||
the transaction should be committed */
|
||||
bool all) /*!< in: TRUE - commit transaction
|
||||
FALSE - the current SQL statement ended */
|
||||
{
|
||||
trx_t* trx;
|
||||
DBUG_ENTER("innobase_commit_ordered");
|
||||
DBUG_ASSERT(hton == innodb_hton_ptr);
|
||||
|
||||
trx = check_trx_exists(thd);
|
||||
|
||||
/* Since we will reserve the kernel mutex, we have to release
|
||||
the search system latch first to obey the latching order. */
|
||||
|
||||
if (trx->has_search_latch) {
|
||||
trx_search_latch_release_if_reserved(trx);
|
||||
}
|
||||
|
||||
if ((trx->active_trans & TRX_ACTIVE_IN_MYSQL) == 0
|
||||
&& trx->conc_state != TRX_NOT_STARTED) {
|
||||
/* We cannot throw error here; instead we will catch this error
|
||||
again in innobase_commit() and report it from there. */
|
||||
DBUG_VOID_RETURN;
|
||||
}
|
||||
|
||||
/* commit_ordered is only called when committing the whole transaction
|
||||
(or an SQL statement when autocommit is on). */
|
||||
DBUG_ASSERT(all ||
|
||||
(!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)));
|
||||
|
||||
innobase_commit_ordered_2(trx);
|
||||
|
||||
trx->active_trans |= TRX_ACTIVE_COMMIT_ORDERED;
|
||||
|
||||
DBUG_VOID_RETURN;
|
||||
}
|
||||
|
||||
/*****************************************************************//**
|
||||
Commits a transaction in an InnoDB database or marks an SQL statement
|
||||
ended.
|
||||
|
@ -2829,7 +2846,15 @@ innobase_commit(
|
|||
|
||||
trx = check_trx_exists(thd);
|
||||
|
||||
/* The flag trx->active_trans is set to 1 in
|
||||
/* Since we will reserve the kernel mutex, we have to release
|
||||
the search system latch first to obey the latching order. */
|
||||
|
||||
if (trx->has_search_latch &&
|
||||
(trx->active_trans & TRX_ACTIVE_COMMIT_ORDERED) == 0) {
|
||||
trx_search_latch_release_if_reserved(trx);
|
||||
}
|
||||
|
||||
/* The flag TRX_ACTIVE_IN_MYSQL in trx->active_trans is set in
|
||||
|
||||
1. ::external_lock(),
|
||||
2. ::start_stmt(),
|
||||
|
@ -2839,20 +2864,26 @@ innobase_commit(
|
|||
6. innobase_start_trx_and_assign_read_view(),
|
||||
7. ::transactional_table_lock()
|
||||
|
||||
and it is only set to 0 in a commit or a rollback. If it is 0 we know
|
||||
and it is only cleared in a commit or a rollback. If it is unset we know
|
||||
there cannot be resources to be freed and we could return immediately.
|
||||
For the time being, we play safe and do the cleanup though there should
|
||||
be nothing to clean up. */
|
||||
|
||||
if (trx->active_trans == 0
|
||||
if ((trx->active_trans & TRX_ACTIVE_IN_MYSQL) == 0
|
||||
&& trx->conc_state != TRX_NOT_STARTED) {
|
||||
|
||||
sql_print_error("trx->active_trans == 0, but"
|
||||
" trx->conc_state != TRX_NOT_STARTED");
|
||||
}
|
||||
|
||||
if (all
|
||||
|| (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
|
||||
|
||||
/* Run the fast part of commit if we did not already. */
|
||||
if ((trx->active_trans & TRX_ACTIVE_COMMIT_ORDERED) == 0) {
|
||||
innobase_commit_ordered_2(trx);
|
||||
}
|
||||
|
||||
/* We were instructed to commit the whole transaction, or
|
||||
this is an SQL statement end and autocommit is on */
|
||||
|
||||
|
@ -3076,7 +3107,7 @@ innobase_savepoint(
|
|||
innobase_release_stat_resources(trx);
|
||||
|
||||
/* cannot happen outside of transaction */
|
||||
DBUG_ASSERT(trx->active_trans);
|
||||
DBUG_ASSERT(trx->active_trans & TRX_ACTIVE_IN_MYSQL);
|
||||
|
||||
/* TODO: use provided savepoint data area to store savepoint data */
|
||||
char name[64];
|
||||
|
@ -3106,7 +3137,7 @@ innobase_close_connection(
|
|||
|
||||
ut_a(trx);
|
||||
|
||||
if (trx->active_trans == 0
|
||||
if ((trx->active_trans & TRX_ACTIVE_IN_MYSQL) == 0
|
||||
&& trx->conc_state != TRX_NOT_STARTED) {
|
||||
|
||||
sql_print_error("trx->active_trans == 0, but"
|
||||
|
@ -5021,10 +5052,9 @@ no_commit:
|
|||
no need to re-acquire locks on it. */
|
||||
|
||||
/* Altering to InnoDB format */
|
||||
innobase_commit_ordered(ht, user_thd, 1);
|
||||
innobase_commit(ht, user_thd, 1);
|
||||
/* Note that this transaction is still active. */
|
||||
prebuilt->trx->active_trans = 1;
|
||||
prebuilt->trx->active_trans |= TRX_ACTIVE_IN_MYSQL;
|
||||
/* We will need an IX lock on the destination table. */
|
||||
prebuilt->sql_stat_start = TRUE;
|
||||
} else {
|
||||
|
@ -5038,10 +5068,9 @@ no_commit:
|
|||
|
||||
/* Commit the transaction. This will release the table
|
||||
locks, so they have to be acquired again. */
|
||||
innobase_commit_ordered(ht, user_thd, 1);
|
||||
innobase_commit(ht, user_thd, 1);
|
||||
/* Note that this transaction is still active. */
|
||||
prebuilt->trx->active_trans = 1;
|
||||
prebuilt->trx->active_trans |= TRX_ACTIVE_IN_MYSQL;
|
||||
/* Re-acquire the table lock on the source table. */
|
||||
row_lock_table_for_mysql(prebuilt, src_table, mode);
|
||||
/* We will need an IX lock on the destination table. */
|
||||
|
@ -8929,10 +8958,10 @@ ha_innobase::start_stmt(
|
|||
trx->detailed_error[0] = '\0';
|
||||
|
||||
/* Set the MySQL flag to mark that there is an active transaction */
|
||||
if (trx->active_trans == 0) {
|
||||
if ((trx->active_trans & TRX_ACTIVE_IN_MYSQL) == 0) {
|
||||
|
||||
innobase_register_trx_and_stmt(ht, thd);
|
||||
trx->active_trans = 1;
|
||||
trx->active_trans |= TRX_ACTIVE_IN_MYSQL;
|
||||
} else {
|
||||
innobase_register_stmt(ht, thd);
|
||||
}
|
||||
|
@ -9030,10 +9059,10 @@ ha_innobase::external_lock(
|
|||
|
||||
/* Set the MySQL flag to mark that there is an active
|
||||
transaction */
|
||||
if (trx->active_trans == 0) {
|
||||
if ((trx->active_trans & TRX_ACTIVE_IN_MYSQL) == 0) {
|
||||
|
||||
innobase_register_trx_and_stmt(ht, thd);
|
||||
trx->active_trans = 1;
|
||||
trx->active_trans |= TRX_ACTIVE_IN_MYSQL;
|
||||
} else if (trx->n_mysql_tables_in_use == 0) {
|
||||
innobase_register_stmt(ht, thd);
|
||||
}
|
||||
|
@ -9131,8 +9160,7 @@ ha_innobase::external_lock(
|
|||
prebuilt->used_in_HANDLER = FALSE;
|
||||
|
||||
if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
|
||||
if (trx->active_trans != 0) {
|
||||
innobase_commit_ordered(ht, thd, TRUE);
|
||||
if ((trx->active_trans & TRX_ACTIVE_IN_MYSQL) != 0) {
|
||||
innobase_commit(ht, thd, TRUE);
|
||||
}
|
||||
} else {
|
||||
|
@ -9217,10 +9245,10 @@ ha_innobase::transactional_table_lock(
|
|||
/* MySQL is setting a new transactional table lock */
|
||||
|
||||
/* Set the MySQL flag to mark that there is an active transaction */
|
||||
if (trx->active_trans == 0) {
|
||||
if ((trx->active_trans & TRX_ACTIVE_IN_MYSQL) == 0) {
|
||||
|
||||
innobase_register_trx_and_stmt(ht, thd);
|
||||
trx->active_trans = 1;
|
||||
trx->active_trans |= TRX_ACTIVE_IN_MYSQL;
|
||||
}
|
||||
|
||||
if (THDVAR(thd, table_locks) && thd_in_lock_tables(thd)) {
|
||||
|
@ -10272,7 +10300,8 @@ innobase_xa_prepare(
|
|||
|
||||
innobase_release_stat_resources(trx);
|
||||
|
||||
if (trx->active_trans == 0 && trx->conc_state != TRX_NOT_STARTED) {
|
||||
if ((trx->active_trans & TRX_ACTIVE_IN_MYSQL) == 0 &&
|
||||
trx->conc_state != TRX_NOT_STARTED) {
|
||||
|
||||
sql_print_error("trx->active_trans == 0, but trx->conc_state != "
|
||||
"TRX_NOT_STARTED");
|
||||
|
@ -10284,7 +10313,7 @@ innobase_xa_prepare(
|
|||
/* We were instructed to prepare the whole transaction, or
|
||||
this is an SQL statement end and autocommit is on */
|
||||
|
||||
ut_ad(trx->active_trans);
|
||||
ut_ad(trx->active_trans & TRX_ACTIVE_IN_MYSQL);
|
||||
|
||||
error = (int) trx_prepare_for_mysql(trx);
|
||||
} else {
|
||||
|
|
|
@ -511,9 +511,10 @@ struct trx_struct{
|
|||
in that case we must flush the log
|
||||
in trx_commit_complete_for_mysql() */
|
||||
ulint duplicates; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
|
||||
ulint active_trans; /*!< 1 - if a transaction in MySQL
|
||||
is active. 2 - if prepare_commit_mutex
|
||||
was taken */
|
||||
ulint active_trans; /*!< TRX_ACTIVE_IN_MYSQL - set if a
|
||||
transaction in MySQL is active.
|
||||
TRX_ACTIVE_COMMIT_ORDERED - set if
|
||||
innobase_commit_ordered has run */
|
||||
ulint has_search_latch;
|
||||
/* TRUE if this trx has latched the
|
||||
search system latch in S-mode */
|
||||
|
@ -824,6 +825,10 @@ Multiple flags can be combined with bitwise OR. */
|
|||
#define TRX_SIG_OTHER_SESS 1 /* sent by another session (which
|
||||
must hold rights to this) */
|
||||
|
||||
/* Flag bits for trx_struct.active_trans */
|
||||
#define TRX_ACTIVE_IN_MYSQL (1<<0)
|
||||
#define TRX_ACTIVE_COMMIT_ORDERED (1<<1)
|
||||
|
||||
/** Commit node states */
|
||||
enum commit_node_state {
|
||||
COMMIT_NODE_SEND = 1, /*!< about to send a commit signal to
|
||||
|
|
Loading…
Add table
Reference in a new issue