From 0dbccdd2996b54a0f65da650a5d2c63f2e6a017d Mon Sep 17 00:00:00 2001
From: Kristian Nielsen <knielsen@knielsen-hq.org>
Date: Thu, 8 Aug 2024 12:35:55 +0200
Subject: [PATCH] MDEV-34705: Binlog in Engine

Skip prepare step in InnoDB when it handles the binlog, but re-enable
InnoDB fsync at commit.

Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
---
 sql/handler.cc                        | 4 ++++
 storage/innobase/handler/ha_innodb.cc | 4 ++++
 storage/innobase/include/trx0trx.h    | 3 +++
 storage/innobase/trx/trx0trx.cc       | 5 ++++-
 4 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/sql/handler.cc b/sql/handler.cc
index 2792321386c..631c527b387 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -1964,6 +1964,10 @@ int ha_commit_trans(THD *thd, bool all)
     */
     if (! hi->is_trx_read_write())
       continue;
+    /* We do not need to 2pc the binlog with the engine that implements it. */
+    /* ToDo: This needs refinement, at least to handle the case when we are not binlogging. And maybe the logic could happen more elegantly in a different place, higher in the call stack? */
+    if (ht == opt_binlog_engine_hton)
+      continue;
     /*
       Sic: we know that prepare() is not NULL since otherwise
       trans->no_2pc would have been set.
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 78147fef7e0..ada368fd0ce 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -2810,6 +2810,7 @@ trx_deregister_from_2pc(
 {
   trx->is_registered= false;
   trx->active_commit_ordered= false;
+  trx->active_prepare= false;
 }
 
 /**
@@ -17337,7 +17338,10 @@ innobase_xa_prepare(
   case TRX_STATE_ACTIVE:
     thd_get_xid(thd, &reinterpret_cast<MYSQL_XID&>(trx->xid));
     if (prepare_trx)
+    {
       trx_prepare_for_mysql(trx);
+      trx->active_prepare= true;
+    }
     else
     {
       lock_unlock_table_autoinc(trx);
diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
index 889437aa1d2..65b6a7580f0 100644
--- a/storage/innobase/include/trx0trx.h
+++ b/storage/innobase/include/trx0trx.h
@@ -823,7 +823,10 @@ public:
 					is set to false  after commit or
 					rollback. */
 	/** whether this is holding the prepare mutex */
+        /* ToDo: This need a better mechanism. It is currently done to know that we did not do a prepare step before commit_ordered, due to binlog being stored in InnoDB; and therefore we need to do an fsync of the log in commit to make the commit durable. */
 	bool		active_commit_ordered;
+	/** whether innobase_xa_prepare() was done. */
+	bool		active_prepare;
 	/*------------------------------*/
 	bool		flush_log_later;/* In 2PC, we hold the
 					prepare_commit mutex across
diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc
index 7aa57d91238..f8970bc3a1b 100644
--- a/storage/innobase/trx/trx0trx.cc
+++ b/storage/innobase/trx/trx0trx.cc
@@ -106,6 +106,8 @@ trx_init(
 
 	trx->active_commit_ordered = false;
 
+	trx->active_prepare = false;
+
 	trx->isolation_level = TRX_ISO_REPEATABLE_READ;
 
 	trx->check_foreigns = true;
@@ -407,6 +409,7 @@ void trx_t::free()
                                    bulk_insert */);
   MEM_NOACCESS(&is_registered, sizeof is_registered);
   MEM_NOACCESS(&active_commit_ordered, sizeof active_commit_ordered);
+  MEM_NOACCESS(&active_prepare, sizeof active_prepare);
   MEM_NOACCESS(&flush_log_later, sizeof flush_log_later);
   MEM_NOACCESS(&duplicates, sizeof duplicates);
   MEM_NOACCESS(&dict_operation, sizeof dict_operation);
@@ -1743,7 +1746,7 @@ void trx_commit_complete_for_mysql(trx_t *trx)
   case 0:
     return;
   case 1:
-    if (trx->active_commit_ordered)
+    if (trx->active_commit_ordered && trx->active_prepare)
       return;
   }
   trx_flush_log_if_needed(lsn, trx);