MDEV-38589: SELECT unnecessarily waits for log write

The design of "binlog group commit" involves carrying some state across
transaction boundaries. This includes trx_t::commit_lsn, which keeps track
of how much write-ahead log needs to be written. Unfortunately, this
field was not reset in a commit where a log write was elided. That would
cause an unnecessary wait in a subsequent read-only transaction that
happened to reuse the same transaction object.

trx_deregister_from_2pc(): Reset trx->commit_lsn so that
an earlier write that was executed in the same client connection
will not result in an unnecessary wait during a subsequent read
operation.

trx_commit_complete_for_mysql(): Unless we are inside a binlog
group commit, reset trx->commit_lsn.

unlock_and_close_files(): Reset trx->commit_lsn after durably
writing the log, and remove a redundant log write call from some
callers.

trx_t::rollback_finish(): Clear commit_lsn, because a rolled-back
transaction will not need to be durably written.

trx_t::clear_and_free(): Wrapper function to suppress a debug check
in trx_t::free().

Also, remove some redundant ut_ad(!trx->will_lock) that will be checked
in trx_t::free().

Reviewed by: Vladislav Vaintroub
This commit is contained in:
Marko Mäkelä 2026-01-28 13:24:09 +02:00
commit 7614f8fb5d
10 changed files with 39 additions and 25 deletions

View file

@ -1424,7 +1424,7 @@ err_exit:
ut_strerr(error));
trx->rollback();
row_mysql_unlock_data_dictionary(trx);
trx->free();
trx->clear_and_free();
srv_file_per_table= srv_file_per_table_backup;
return error;
}
@ -1463,7 +1463,7 @@ err_exit:
trx->commit();
row_mysql_unlock_data_dictionary(trx);
trx->free();
trx->clear_and_free();
srv_file_per_table= srv_file_per_table_backup;
lock(SRW_LOCK_CALL);

View file

@ -242,7 +242,7 @@ dberr_t dict_stats_save_defrag_summary(dict_index_t *index, THD *thd)
trx->rollback();
row_mysql_unlock_data_dictionary(trx);
trx->free();
trx->clear_and_free();
stats.close();
return ret;
@ -359,7 +359,7 @@ dict_stats_save_defrag_stats(
trx->rollback();
row_mysql_unlock_data_dictionary(trx);
trx->free();
trx->clear_and_free();
stats.close();
return ret;

View file

@ -2987,7 +2987,7 @@ free_and_exit:
trx->dict_operation_lock_mode = false;
dict_sys.unlock();
unlocked_free_and_exit:
trx->free();
trx->clear_and_free();
stats.close();
return ret;
}

View file

@ -2701,7 +2701,7 @@ func_exit:
}
}
trx->free();
trx->clear_and_free();
return(error);
}
@ -2777,7 +2777,7 @@ fts_update_sync_doc_id(
fts_sql_rollback(trx);
}
trx->free();
trx->clear_and_free();
}
return(error);
@ -2997,7 +2997,7 @@ fts_commit_table(
fts_sql_commit(trx);
trx->free();
trx->clear_and_free();
return(error);
}
@ -4211,7 +4211,7 @@ fts_sync_commit(
/* Avoid assertion in trx_t::free(). */
trx->dict_operation_lock_mode = false;
trx->free();
trx->clear_and_free();
return(error);
}
@ -4261,7 +4261,7 @@ fts_sync_rollback(
/* Avoid assertion in trx_t::free(). */
trx->dict_operation_lock_mode = false;
trx->free();
trx->clear_and_free();
}
/** Run SYNC on the table, i.e., write out data from the cache to the
@ -5937,7 +5937,7 @@ cleanup:
fts_sql_rollback(trx);
}
trx->free();
trx->clear_and_free();
}
if (!cache->stopword_info.cached_stopword) {

View file

@ -1736,7 +1736,7 @@ fts_optimize_free(
mem_heap_t* heap = static_cast<mem_heap_t*>(optim->self_heap->arg);
trx_commit_for_mysql(optim->trx);
optim->trx->free();
optim->trx->clear_and_free();
optim->trx = NULL;
fts_doc_ids_free(optim->to_delete);

View file

@ -1460,7 +1460,7 @@ static void innodb_drop_database(handlerton*, char *path)
trx->commit();
row_mysql_unlock_data_dictionary(trx);
trx->free();
trx->clear_and_free();
if (!stats_failed)
stats.close();
@ -2002,7 +2002,7 @@ fail:
all_fail:
mtr.commit();
trx->free();
trx->clear_and_free();
ut_free(pcur.old_rec_buf);
ut_d(purge_sys.resume_FTS());
}
@ -2793,6 +2793,7 @@ trx_deregister_from_2pc(
{
trx->is_registered= false;
trx->active_commit_ordered= false;
trx->commit_lsn= 0;
}
/**
@ -2922,7 +2923,6 @@ static int innobase_rollback_by_xid(handlerton*, XID *xid) noexcept
trx_deregister_from_2pc(trx);
THD* thd= trx->mysql_thd;
dberr_t err= trx_rollback_for_mysql(trx);
ut_ad(!trx->will_lock);
trx->free();
return convert_error_code_to_mysql(err, 0, thd);
}
@ -13304,7 +13304,7 @@ ha_innobase::create(const char *name, TABLE *form, HA_CREATE_INFO *create_info,
log_write_up_to(trx->commit_lsn, true);
info.table()->release();
}
trx->free();
trx->clear_and_free();
}
}
else if (!error && m_prebuilt)
@ -13742,6 +13742,7 @@ err_exit:
for (pfs_os_file_t d : deleted)
os_file_close(d);
log_write_up_to(trx->commit_lsn, true);
trx->commit_lsn= 0;
if (trx != parent_trx)
trx->free();
if (!fts)
@ -13906,7 +13907,7 @@ int ha_innobase::truncate()
m_prebuilt->stored_select_lock_type= stored_lock;
}
trx->free();
trx->clear_and_free();
#ifdef BTR_CUR_HASH_ADAPT
if (UT_LIST_GET_LEN(ib_table->freed_indexes))
@ -14066,7 +14067,7 @@ int ha_innobase::truncate()
}
}
trx->free();
trx->clear_and_free();
if (!stats_failed)
stats.close();
mem_heap_free(heap);
@ -14266,7 +14267,7 @@ ha_innobase::rename_table(
log_write_up_to(trx->commit_lsn, true);
}
trx->flush_log_later = false;
trx->free();
trx->clear_and_free();
if (!stats_fail) {
stats.close();
}
@ -17268,7 +17269,6 @@ innobase_commit_by_xid(
innobase_commit_low(trx);
ut_ad(trx->mysql_thd == NULL);
trx_deregister_from_2pc(trx);
ut_ad(!trx->will_lock); /* trx cache requirement */
trx->free();
return(XA_OK);

View file

@ -4388,8 +4388,11 @@ static void unlock_and_close_files(const std::vector<pfs_os_file_t> &deleted,
row_mysql_unlock_data_dictionary(trx);
for (pfs_os_file_t d : deleted)
os_file_close(d);
if (trx->commit_lsn)
log_write_up_to(trx->commit_lsn, true);
if (lsn_t lsn= trx->commit_lsn)
{
trx->commit_lsn= 0;
log_write_up_to(lsn, true);
}
}
/** Commit a DDL transaction and unlink any deleted files. */
@ -9335,6 +9338,7 @@ inline bool rollback_inplace_alter_table(Alter_inplace_info *ha_alter_info,
{
free_and_exit:
DBUG_ASSERT(ctx->prebuilt == prebuilt);
ut_d(ctx->trx->commit_lsn= 0);
ctx->trx->free();
ctx->trx= nullptr;

View file

@ -1097,8 +1097,10 @@ public:
bool has_stats_table_lock() const;
/** Free the memory to trx_pools */
void free();
void free() noexcept;
/** Clear commit_lsn and free the memory */
void clear_and_free() noexcept { ut_d(commit_lsn= 0;) free(); }
void assert_freed() const
{

View file

@ -61,6 +61,7 @@ bool trx_t::rollback_finish() noexcept
if (UNIV_LIKELY(error_state == DB_SUCCESS))
{
commit();
commit_lsn= 0;
return true;
}
@ -82,6 +83,7 @@ bool trx_t::rollback_finish() noexcept
undo= nullptr;
}
commit_low();
commit_lsn= 0;
return commit_cleanup();
}

View file

@ -357,7 +357,7 @@ trx_t *trx_create()
}
/** Free the memory to trx_pools */
void trx_t::free()
void trx_t::free() noexcept
{
autoinc_locks.fake_defined();
#ifdef HAVE_MEM_CHECK
@ -378,8 +378,10 @@ void trx_t::free()
ut_ad(magic_n == TRX_MAGIC_N);
ut_ad(!read_only);
ut_ad(!lock.wait_lock);
ut_ad(!commit_lsn);
dict_operation= false;
commit_lsn= 0;
trx_sys.deregister_trx(this);
check_unique_secondary= true;
check_foreigns= true;
@ -1255,6 +1257,8 @@ static void trx_flush_log_if_needed(lsn_t lsn, trx_t *trx)
if (log_sys.get_flushed_lsn(std::memory_order_relaxed) >= lsn)
return;
ut_ad(!trx->mysql_thd || !trx->mysql_thd->tx_read_only);
const bool flush=
(srv_file_flush_method != SRV_NOSYNC &&
(srv_flush_log_at_trx_commit & 1));
@ -1745,12 +1749,14 @@ void trx_commit_complete_for_mysql(trx_t *trx)
return;
switch (srv_flush_log_at_trx_commit) {
case 0:
return;
goto func_exit;
case 1:
if (trx->active_commit_ordered)
return;
}
trx_flush_log_if_needed(lsn, trx);
func_exit:
trx->commit_lsn= 0;
}
/**********************************************************************//**