MDEV-35281 SR transaction crashes with innodb_snapshot_isolation

Ignore snapshot isolation conflict during fragment removal, before
streaming transaction commits. This happens when a streaming
transaction creates a read view that precedes the INSERTion of
fragments into the streaming_log table. Fragments are INSERTed
using a different transaction. These fragment are then removed
as part of COMMIT of the streaming transaction. This fragment
removal operation could fail when the fragments were not part
the transaction's read view, thus violating snapshot isolation.
This commit is contained in:
Daniele Sciascia 2024-10-29 10:47:20 +01:00
parent f5aed74573
commit e821c9fa7c
5 changed files with 74 additions and 4 deletions

View file

@ -0,0 +1,9 @@
connection node_2;
connection node_1;
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY);
SET SESSION wsrep_trx_fragment_size=1;
SET SESSION innodb_snapshot_isolation=ON;
START TRANSACTION WITH CONSISTENT SNAPSHOT;
INSERT INTO t1 VALUES (1);
COMMIT;
DROP TABLE t1;

View file

@ -0,0 +1,37 @@
#
# MDEV-35281 - SR transaction crashes with innodb_snapshot_isolation
#
# Test outline: a simple SR transaction fails to remove
# its fragments from streaming_log table, with error
# HA_ERR_RECORD_CHANGED.
# This happens with the following sequence of events:
# 1. Start a streaming replication transaction
# 2. The transaction creates a read view in InnoDB
# (this must happen before a fragment is replicated)
# 3. The transaction replicates a fragment.
# Internally, a new transaction is created to INSERT
# a row representing the fragment into the streaming_log
# table and is committed immediately.
# 4. The streaming replication transaction COMMITs.
# Before committing, the transaction replicates
# a commit fragment and DELETEs its fragments that
# were created in the streaming_log table.
# If bug is present, fragment removal from the
# streaming_log table violates snapshot isolation,
# thus the operation fails with HA_ERR_RECORD_CHANGED.
# (One or more records from the streaming_log table
# are removed, while these were not visible to
# the transaction).
--source include/galera_cluster.inc
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY);
SET SESSION wsrep_trx_fragment_size=1;
SET SESSION innodb_snapshot_isolation=ON;
START TRANSACTION WITH CONSISTENT SNAPSHOT;
INSERT INTO t1 VALUES (1);
COMMIT;
DROP TABLE t1;

View file

@ -5561,9 +5561,11 @@ public:
query_id_t wsrep_last_query_id;
XID wsrep_xid;
/** This flag denotes that record locking should be skipped during INSERT
and gap locking during SELECT. Only used by the streaming replication thread
that only modifies the wsrep_schema.SR table. */
/** This flag denotes that record locking should be skipped during INSERT,
gap locking during SELECT, and write-write conflicts due to innodb
snapshot isolation during DELETE.
Only used by the streaming replication thread that only modifies the
mysql.wsrep_streaming_log table. */
my_bool wsrep_skip_locking;
mysql_cond_t COND_wsrep_thd;

View file

@ -183,6 +183,25 @@ private:
my_bool m_wsrep_ignore_table;
};
class wsrep_skip_locking
{
public:
wsrep_skip_locking(THD *thd)
: m_thd(thd)
, m_wsrep_skip_locking(thd->wsrep_skip_locking)
{
thd->wsrep_skip_locking= true;
}
~wsrep_skip_locking()
{
m_thd->wsrep_skip_locking= m_wsrep_skip_locking;
}
private:
THD *m_thd;
my_bool m_wsrep_skip_locking;
};
class thd_server_status
{
public:
@ -1235,6 +1254,7 @@ int Wsrep_schema::remove_fragments(THD* thd,
Wsrep_schema_impl::wsrep_ignore_table wsrep_ignore_table(thd);
Wsrep_schema_impl::binlog_off binlog_off(thd);
Wsrep_schema_impl::sql_safe_updates sql_safe_updates(thd);
Wsrep_schema_impl::wsrep_skip_locking skip_locking(thd);
Query_tables_list query_tables_list_backup;
Open_tables_backup open_tables_backup;

View file

@ -6375,7 +6375,9 @@ lock_clust_rec_read_check_and_lock(
&& trx->snapshot_isolation
&& trx->read_view.is_open()
&& !trx->read_view.changes_visible(
trx_read_trx_id(rec + row_trx_id_offset(rec, index)))) {
trx_read_trx_id(rec + row_trx_id_offset(rec, index)))
&& IF_WSREP(!(trx->is_wsrep()
&& wsrep_thd_skip_locking(trx->mysql_thd)), true)) {
return DB_RECORD_CHANGED;
}