From 0dbccdd2996b54a0f65da650a5d2c63f2e6a017d Mon Sep 17 00:00:00 2001 From: Kristian Nielsen <knielsen@knielsen-hq.org> Date: Thu, 8 Aug 2024 12:35:55 +0200 Subject: [PATCH] MDEV-34705: Binlog in Engine Skip prepare step in InnoDB when it handles the binlog, but re-enable InnoDB fsync at commit. Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org> --- sql/handler.cc | 4 ++++ storage/innobase/handler/ha_innodb.cc | 4 ++++ storage/innobase/include/trx0trx.h | 3 +++ storage/innobase/trx/trx0trx.cc | 5 ++++- 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/sql/handler.cc b/sql/handler.cc index 2792321386c..631c527b387 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -1964,6 +1964,10 @@ int ha_commit_trans(THD *thd, bool all) */ if (! hi->is_trx_read_write()) continue; + /* We do not need to 2pc the binlog with the engine that implements it. */ + /* ToDo: This needs refinement, at least to handle the case when we are not binlogging. And maybe the logic could happen more elegantly in a different place, higher in the call stack? */ + if (ht == opt_binlog_engine_hton) + continue; /* Sic: we know that prepare() is not NULL since otherwise trans->no_2pc would have been set. diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 78147fef7e0..ada368fd0ce 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -2810,6 +2810,7 @@ trx_deregister_from_2pc( { trx->is_registered= false; trx->active_commit_ordered= false; + trx->active_prepare= false; } /** @@ -17337,7 +17338,10 @@ innobase_xa_prepare( case TRX_STATE_ACTIVE: thd_get_xid(thd, &reinterpret_cast<MYSQL_XID&>(trx->xid)); if (prepare_trx) + { trx_prepare_for_mysql(trx); + trx->active_prepare= true; + } else { lock_unlock_table_autoinc(trx); diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index 889437aa1d2..65b6a7580f0 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -823,7 +823,10 @@ public: is set to false after commit or rollback. */ /** whether this is holding the prepare mutex */ + /* ToDo: This need a better mechanism. It is currently done to know that we did not do a prepare step before commit_ordered, due to binlog being stored in InnoDB; and therefore we need to do an fsync of the log in commit to make the commit durable. */ bool active_commit_ordered; + /** whether innobase_xa_prepare() was done. */ + bool active_prepare; /*------------------------------*/ bool flush_log_later;/* In 2PC, we hold the prepare_commit mutex across diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 7aa57d91238..f8970bc3a1b 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -106,6 +106,8 @@ trx_init( trx->active_commit_ordered = false; + trx->active_prepare = false; + trx->isolation_level = TRX_ISO_REPEATABLE_READ; trx->check_foreigns = true; @@ -407,6 +409,7 @@ void trx_t::free() bulk_insert */); MEM_NOACCESS(&is_registered, sizeof is_registered); MEM_NOACCESS(&active_commit_ordered, sizeof active_commit_ordered); + MEM_NOACCESS(&active_prepare, sizeof active_prepare); MEM_NOACCESS(&flush_log_later, sizeof flush_log_later); MEM_NOACCESS(&duplicates, sizeof duplicates); MEM_NOACCESS(&dict_operation, sizeof dict_operation); @@ -1743,7 +1746,7 @@ void trx_commit_complete_for_mysql(trx_t *trx) case 0: return; case 1: - if (trx->active_commit_ordered) + if (trx->active_commit_ordered && trx->active_prepare) return; } trx_flush_log_if_needed(lsn, trx);