Merge 10.6 into 10.8

This commit is contained in:
Marko Mäkelä 2023-02-28 10:36:17 +02:00
commit 6ac44ac3ab
26 changed files with 405 additions and 228 deletions

View file

@ -1,7 +1,11 @@
SET @save_frequency=@@GLOBAL.innodb_purge_rseg_truncate_frequency;
SET @save_dbug=@@GLOBAL.debug_dbug;
SET GLOBAL innodb_purge_rseg_truncate_frequency=1;
CREATE TABLE t1(f1 INT NOT NULL, f2 int not null,
f3 int generated always as (f2 * 2) VIRTUAL,
primary key(f1), INDEX (f3))ENGINE=InnoDB;
connect con1,localhost,root,,,;
InnoDB 0 transactions not purged
START TRANSACTION WITH CONSISTENT SNAPSHOT;
connection default;
INSERT INTO t1(f1, f2) VALUES(1,2);
@ -18,5 +22,6 @@ commit;
disconnect con1;
disconnect con2;
connection default;
set global debug_dbug=default;
SET GLOBAL innodb_purge_rseg_truncate_frequency=@save_frequency;
SET GLOBAL debug_dbug=@save_dbug;
DROP TABLE t1;

View file

@ -1,9 +1,14 @@
--source include/have_innodb.inc
--source include/have_debug.inc
SET @save_frequency=@@GLOBAL.innodb_purge_rseg_truncate_frequency;
SET @save_dbug=@@GLOBAL.debug_dbug;
SET GLOBAL innodb_purge_rseg_truncate_frequency=1;
CREATE TABLE t1(f1 INT NOT NULL, f2 int not null,
f3 int generated always as (f2 * 2) VIRTUAL,
primary key(f1), INDEX (f3))ENGINE=InnoDB;
connect(con1,localhost,root,,,);
--source ../innodb/include/wait_all_purged.inc
START TRANSACTION WITH CONSISTENT SNAPSHOT;
connection default;
@ -26,5 +31,6 @@ commit;
disconnect con1;
disconnect con2;
connection default;
set global debug_dbug=default;
SET GLOBAL innodb_purge_rseg_truncate_frequency=@save_frequency;
SET GLOBAL debug_dbug=@save_dbug;
DROP TABLE t1;

View file

@ -1,4 +1,7 @@
SET @save_freq=@@GLOBAL.innodb_purge_rseg_truncate_frequency;
SET GLOBAL innodb_purge_rseg_truncate_frequency=1;
CREATE TABLE t (a int PRIMARY KEY, b int NOT NULL UNIQUE) engine = InnoDB;
InnoDB 0 transactions not purged
connect prevent_purge,localhost,root,,;
start transaction with consistent snapshot;
connect con_del_1,localhost,root,,;
@ -34,3 +37,4 @@ disconnect con_del_2;
connection default;
SET DEBUG_SYNC = 'RESET';
DROP TABLE t;
SET GLOBAL innodb_purge_rseg_truncate_frequency=@save_freq;

View file

@ -7,6 +7,7 @@ SET GLOBAL innodb_purge_rseg_truncate_frequency = 1;
SET GLOBAL innodb_purge_rseg_truncate_frequency = 1;
CREATE TABLE t1(a INT PRIMARY KEY, b INT NOT NULL)
ROW_FORMAT=REDUNDANT ENGINE=InnoDB;
InnoDB 0 transactions not purged
connect prevent_purge,localhost,root;
START TRANSACTION WITH CONSISTENT SNAPSHOT;
connection default;
@ -19,7 +20,11 @@ UPDATE t1 SET b=4 WHERE a=3;
disconnect prevent_purge;
connection default;
InnoDB 0 transactions not purged
connection con1;
ROLLBACK;
disconnect con1;
connection default;
InnoDB 0 transactions not purged
FLUSH TABLE t1 FOR EXPORT;
Clustered index root page contents:
N_RECS=3; LEVEL=0

View file

@ -3,6 +3,7 @@ SET GLOBAL innodb_purge_rseg_truncate_frequency=1;
CREATE TABLE t1(id INT PRIMARY key, val VARCHAR(16000)) ENGINE=InnoDB;
INSERT INTO t1 (id,val) SELECT 2*seq,'x' FROM seq_0_to_1023;
connect con1,localhost,root,,;
InnoDB 0 transactions not purged
START TRANSACTION WITH CONSISTENT SNAPSHOT;
connection default;
DELETE FROM t1 WHERE id=1788;

View file

@ -9,12 +9,10 @@ SET GLOBAL innodb_purge_rseg_truncate_frequency= 1;
CREATE PROCEDURE insert_n(start int, end int)
BEGIN
DECLARE i INT DEFAULT start;
START TRANSACTION;
WHILE i <= end do
INSERT INTO t1 VALUES (1, 2, 3) ON DUPLICATE KEY UPDATE c = i;
SET i = i + 1;
END WHILE;
COMMIT;
END~~
CREATE FUNCTION num_pages_get()
RETURNS INT
@ -30,6 +28,7 @@ END~~
#
CREATE TABLE t1 (a INT, b INT, c INT, PRIMARY KEY(a,b), KEY (b,c))
ENGINE=InnoDB STATS_PERSISTENT=0;
InnoDB 0 transactions not purged
BEGIN;
SELECT * FROM t1;
a b c
@ -38,20 +37,24 @@ a b c
#
connect con2, localhost, root,,;
connection con2;
BEGIN;
INSERT INTO t1 VALUES (1, 2, 3) ON DUPLICATE KEY UPDATE c = NULL;
CALL insert_n(1, 50);;
connect con3, localhost, root,,;
connection con3;
BEGIN;
CALL insert_n(51, 100);;
connection con2;
COMMIT;
connection con3;
INSERT INTO t1 VALUES (1, 2, 1) ON DUPLICATE KEY UPDATE c = NULL;
COMMIT;
connection default;
#
# Connect to default and record how many pages were accessed
# when selecting the record using the secondary key.
#
InnoDB 4 transactions not purged
InnoDB 2 transactions not purged
SET @num_pages_1 = num_pages_get();
SELECT * FROM t1 force index (b);
a b c

View file

@ -0,0 +1,34 @@
CREATE TABLE t (pk int PRIMARY KEY, c varchar(10)) ENGINE=InnoDB;
INSERT INTO t VALUES (10, "0123456789");
connection default;
BEGIN;
SELECT * FROM t WHERE c = 10 FOR UPDATE;
pk c
connect trx2, localhost,root,,;
BEGIN;
SET DEBUG_SYNC="lock_wait_start SIGNAL trx2_start_waiting";
SET DEBUG_SYNC="lock_wait_end SIGNAL trx2_wait_end WAIT_FOR trx2_cont_upd";
SET DEBUG_SYNC="lock_rec_store_on_page_infimum_end SIGNAL trx2_moved_locks WAIT_FOR trx2_cont";
UPDATE t SET c = NULL WHERE pk = 10;
connect trx3, localhost,root,,;
SET DEBUG_SYNC="now WAIT_FOR trx2_start_waiting";
SET innodb_lock_wait_timeout=1;
BEGIN;
SET DEBUG_SYNC="lock_wait_start SIGNAL trx3_start_waiting WAIT_FOR trx3_cont_waiting";
SET DEBUG_SYNC="lock_sys_t_cancel_enter SIGNAL trx3_cancel_enter WAIT_FOR trx3_cont_cancel_waiting";
UPDATE t SET c = "abcdefghij" WHERE pk = 10;
connection default;
SET DEBUG_SYNC="now WAIT_FOR trx3_start_waiting";
COMMIT;
SET DEBUG_SYNC="now WAIT_FOR trx2_wait_end";
SET DEBUG_SYNC="now SIGNAL trx3_cont_waiting";
SET DEBUG_SYNC="now WAIT_FOR trx3_cancel_enter";
SET DEBUG_SYNC="now SIGNAL trx2_cont_upd";
SET DEBUG_SYNC="now WAIT_FOR trx2_moved_locks";
SET DEBUG_SYNC="now SIGNAL trx3_cont_cancel_waiting";
SET DEBUG_SYNC="now SIGNAL trx2_cont";
disconnect trx2;
disconnect trx3;
connection default;
SET DEBUG_SYNC="RESET";
DROP TABLE t;

View file

@ -3,8 +3,11 @@
source include/have_debug.inc;
source include/have_debug_sync.inc;
SET @save_freq=@@GLOBAL.innodb_purge_rseg_truncate_frequency;
SET GLOBAL innodb_purge_rseg_truncate_frequency=1;
CREATE TABLE t (a int PRIMARY KEY, b int NOT NULL UNIQUE) engine = InnoDB;
--source include/wait_all_purged.inc
--connect(prevent_purge,localhost,root,,)
start transaction with consistent snapshot;
@ -80,4 +83,5 @@ INSERT INTO t VALUES(30, 20);
SET DEBUG_SYNC = 'RESET';
DROP TABLE t;
SET GLOBAL innodb_purge_rseg_truncate_frequency=@save_freq;
--source include/wait_until_count_sessions.inc

View file

@ -14,6 +14,7 @@ SET GLOBAL innodb_purge_rseg_truncate_frequency = 1;
CREATE TABLE t1(a INT PRIMARY KEY, b INT NOT NULL)
ROW_FORMAT=REDUNDANT ENGINE=InnoDB;
--source include/wait_all_purged.inc
--connect (prevent_purge,localhost,root)
START TRANSACTION WITH CONSISTENT SNAPSHOT;
@ -33,7 +34,12 @@ UPDATE t1 SET b=4 WHERE a=3;
# Initiate a full purge, which should reset the DB_TRX_ID except for a=3.
--source include/wait_all_purged.inc
# Initiate a ROLLBACK of the update, which should reset the DB_TRX_ID for a=3.
--connection con1
ROLLBACK;
--disconnect con1
--connection default
# Reset the DB_TRX_ID for the hidden ADD COLUMN metadata record.
--source include/wait_all_purged.inc
FLUSH TABLE t1 FOR EXPORT;
# The following is based on innodb.table_flags:

View file

@ -9,6 +9,7 @@ CREATE TABLE t1(id INT PRIMARY key, val VARCHAR(16000)) ENGINE=InnoDB;
INSERT INTO t1 (id,val) SELECT 2*seq,'x' FROM seq_0_to_1023;
connect(con1,localhost,root,,);
source include/wait_all_purged.inc;
# Prevent purge.
START TRANSACTION WITH CONSISTENT SNAPSHOT;
connection default;

View file

@ -13,12 +13,10 @@ DELIMITER ~~;
CREATE PROCEDURE insert_n(start int, end int)
BEGIN
DECLARE i INT DEFAULT start;
START TRANSACTION;
WHILE i <= end do
INSERT INTO t1 VALUES (1, 2, 3) ON DUPLICATE KEY UPDATE c = i;
SET i = i + 1;
END WHILE;
COMMIT;
END~~
CREATE FUNCTION num_pages_get()
@ -37,6 +35,7 @@ DELIMITER ;~~
--echo #
CREATE TABLE t1 (a INT, b INT, c INT, PRIMARY KEY(a,b), KEY (b,c))
ENGINE=InnoDB STATS_PERSISTENT=0;
--source include/wait_all_purged.inc
BEGIN;
SELECT * FROM t1;
@ -45,18 +44,22 @@ SELECT * FROM t1;
--echo #
connect (con2, localhost, root,,);
connection con2;
BEGIN;
INSERT INTO t1 VALUES (1, 2, 3) ON DUPLICATE KEY UPDATE c = NULL;
--send CALL insert_n(1, 50);
connect (con3, localhost, root,,);
connection con3;
BEGIN;
--send CALL insert_n(51, 100);
connection con2;
reap;
COMMIT;
connection con3;
reap;
INSERT INTO t1 VALUES (1, 2, 1) ON DUPLICATE KEY UPDATE c = NULL;
COMMIT;
connection default;
@ -64,7 +67,7 @@ connection default;
--echo # Connect to default and record how many pages were accessed
--echo # when selecting the record using the secondary key.
--echo #
--let $wait_all_purged=4
--let $wait_all_purged=2
--source include/wait_all_purged.inc
SET @num_pages_1 = num_pages_get();
SELECT * FROM t1 force index (b);

View file

@ -0,0 +1,58 @@
--source include/have_innodb.inc
--source include/count_sessions.inc
--source include/have_debug.inc
--source include/have_debug_sync.inc
CREATE TABLE t (pk int PRIMARY KEY, c varchar(10)) ENGINE=InnoDB;
INSERT INTO t VALUES (10, "0123456789");
--connection default
BEGIN;
SELECT * FROM t WHERE c = 10 FOR UPDATE;
--connect(trx2, localhost,root,,)
BEGIN;
SET DEBUG_SYNC="lock_wait_start SIGNAL trx2_start_waiting";
SET DEBUG_SYNC="lock_wait_end SIGNAL trx2_wait_end WAIT_FOR trx2_cont_upd";
SET DEBUG_SYNC="lock_rec_store_on_page_infimum_end SIGNAL trx2_moved_locks WAIT_FOR trx2_cont";
#################
# We need to update clustered record without changing ordering fields and
# changing the size of non-ordering fields to cause locks moving from deleted
# record to infimum.
###
--send UPDATE t SET c = NULL WHERE pk = 10
--connect(trx3, localhost,root,,)
SET DEBUG_SYNC="now WAIT_FOR trx2_start_waiting";
#################
# The condition wariable waiting in lock_wait() must be finished by timeout
###
SET innodb_lock_wait_timeout=1;
BEGIN;
SET DEBUG_SYNC="lock_wait_start SIGNAL trx3_start_waiting WAIT_FOR trx3_cont_waiting";
SET DEBUG_SYNC="lock_sys_t_cancel_enter SIGNAL trx3_cancel_enter WAIT_FOR trx3_cont_cancel_waiting";
--send UPDATE t SET c = "abcdefghij" WHERE pk = 10
--connection default
SET DEBUG_SYNC="now WAIT_FOR trx3_start_waiting";
COMMIT;
SET DEBUG_SYNC="now WAIT_FOR trx2_wait_end";
SET DEBUG_SYNC="now SIGNAL trx3_cont_waiting";
SET DEBUG_SYNC="now WAIT_FOR trx3_cancel_enter";
SET DEBUG_SYNC="now SIGNAL trx2_cont_upd";
SET DEBUG_SYNC="now WAIT_FOR trx2_moved_locks";
#################
# If the bug is not fixed, there will be assertion failure here, because trx2
# moved trx3 lock from deleted record to infimum when trx3 tried to cancel the
# lock.
###
SET DEBUG_SYNC="now SIGNAL trx3_cont_cancel_waiting";
SET DEBUG_SYNC="now SIGNAL trx2_cont";
--disconnect trx2
--disconnect trx3
--connection default
SET DEBUG_SYNC="RESET";
DROP TABLE t;
--source include/wait_until_count_sessions.inc

View file

@ -31,5 +31,33 @@ set DEBUG_SYNC= 'now SIGNAL fts_drop_index';
connection con1;
drop table t1, t2;
connection default;
set DEBUG_SYNC=RESET;
SET @@GLOBAL.debug_dbug = @saved_dbug;
disconnect con1;
#
# MDEV-25984 Assertion `max_doc_id > 0' failed in fts_init_doc_id()
#
call mtr.add_suppression("InnoDB: \\(Lock wait timeout\\) while getting next doc id for table `test`.`t1`");
CREATE TABLE t1(f1 CHAR(100), f2 INT, fulltext(f1))ENGINE=InnoDB;
INSERT INTO t1 VALUES("mariadb", 1), ("innodb", 1);
# restart
SET DEBUG_SYNC='innodb_rollback_after_fts_lock SIGNAL insert_dml WAIT_FOR ddl_continue';
ALTER TABLE t1 ADD UNIQUE INDEX(f2);
connect con1,localhost,root,,,;
SET DEBUG_SYNC='now WAIT_FOR insert_dml';
SET DEBUG_SYNC='fts_cmp_set_sync_doc_id_retry SIGNAL ddl_continue WAIT_FOR dml_finish';
INSERT INTO t1 VALUES("index", 2);
connection default;
ERROR 23000: Duplicate entry '1' for key 'f2'
SET DEBUG_SYNC="now SIGNAL dml_finish";
connection con1;
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`f1` char(100) DEFAULT NULL,
`f2` int(11) DEFAULT NULL,
FULLTEXT KEY `f1` (`f1`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
connection default;
disconnect con1;
DROP TABLE t1;
set DEBUG_SYNC=RESET;

View file

@ -129,8 +129,9 @@ test
select * from t1 where a like "te_t";
a
test
select * from t1 where match a against ("te*" in boolean mode)+0;
select * from t1 where match a against ("te*" in boolean mode);
a
test
drop table t1;
#
# Bug #49734: Crash on EXPLAIN EXTENDED UNION ... ORDER BY

View file

@ -48,5 +48,33 @@ connection con1;
reap;
drop table t1, t2;
connection default;
set DEBUG_SYNC=RESET;
SET @@GLOBAL.debug_dbug = @saved_dbug;
disconnect con1;
--echo #
--echo # MDEV-25984 Assertion `max_doc_id > 0' failed in fts_init_doc_id()
--echo #
call mtr.add_suppression("InnoDB: \\(Lock wait timeout\\) while getting next doc id for table `test`.`t1`");
CREATE TABLE t1(f1 CHAR(100), f2 INT, fulltext(f1))ENGINE=InnoDB;
INSERT INTO t1 VALUES("mariadb", 1), ("innodb", 1);
--source include/restart_mysqld.inc
SET DEBUG_SYNC='innodb_rollback_after_fts_lock SIGNAL insert_dml WAIT_FOR ddl_continue';
SEND ALTER TABLE t1 ADD UNIQUE INDEX(f2);
connect(con1,localhost,root,,,);
SET DEBUG_SYNC='now WAIT_FOR insert_dml';
SET DEBUG_SYNC='fts_cmp_set_sync_doc_id_retry SIGNAL ddl_continue WAIT_FOR dml_finish';
send INSERT INTO t1 VALUES("index", 2);
connection default;
--error ER_DUP_ENTRY
reap;
SET DEBUG_SYNC="now SIGNAL dml_finish";
connection con1;
reap;
SHOW CREATE TABLE t1;
connection default;
disconnect con1;
DROP TABLE t1;
set DEBUG_SYNC=RESET;

View file

@ -152,10 +152,7 @@ insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test");
select * from t1 where a like "abc%";
select * from t1 where a like "test%";
select * from t1 where a like "te_t";
# InnoDB_FTS: we don't support the postfix "+0"
# Work around MDEV-29871 (FIXME: remove this)
--echo select * from t1 where match a against ("te*" in boolean mode)+0;
--echo a
select * from t1 where match a against ("te*" in boolean mode);
drop table t1;

View file

@ -2575,7 +2575,6 @@ fts_cmp_set_sync_doc_id(
que_t* graph = NULL;
fts_cache_t* cache = table->fts->cache;
char table_name[MAX_FULL_NAME_LEN];
retry:
ut_a(table->fts->doc_col != ULINT_UNDEFINED);
fts_table.suffix = "CONFIG";
@ -2583,7 +2582,8 @@ retry:
fts_table.type = FTS_COMMON_TABLE;
fts_table.table = table;
trx = trx_create();
trx= trx_create();
retry:
trx_start_internal(trx);
trx->op_info = "update the next FTS document id";
@ -2663,7 +2663,8 @@ func_exit:
"for table " << table->name;
fts_sql_rollback(trx);
if (error == DB_DEADLOCK) {
if (error == DB_DEADLOCK || error == DB_LOCK_WAIT_TIMEOUT) {
DEBUG_SYNC_C("fts_cmp_set_sync_doc_id_retry");
std::this_thread::sleep_for(FTS_DEADLOCK_RETRY_WAIT);
goto retry;
}

View file

@ -9035,6 +9035,7 @@ inline bool rollback_inplace_alter_table(Alter_inplace_info *ha_alter_info,
ut_a(!lock_table_for_trx(dict_sys.sys_fields, ctx->trx, LOCK_X));
}
innodb_lock_wait_timeout= save_timeout;
DEBUG_SYNC_C("innodb_rollback_after_fts_lock");
row_mysql_lock_data_dictionary(ctx->trx);
ctx->rollback_instant();
innobase_rollback_sec_index(ctx->old_table, table,

View file

@ -891,8 +891,8 @@ public:
/** Cancel a waiting lock request.
@tparam check_victim whether to check for DB_DEADLOCK
@param lock waiting lock request
@param trx active transaction
@param lock waiting lock request
@retval DB_SUCCESS if no lock existed
@retval DB_DEADLOCK if trx->lock.was_chosen_as_deadlock_victim was set
@retval DB_LOCK_WAIT if the lock was canceled */

View file

@ -65,53 +65,44 @@ struct alignas(CPU_LEVEL1_DCACHE_LINESIZE) trx_rseg_t
/** length of the TRX_RSEG_HISTORY list (number of transactions) */
uint32_t history_size;
/** Last known transaction that has not been purged yet,
or 0 if everything has been purged. */
trx_id_t needs_purge;
private:
/** Reference counter to track rseg allocated transactions,
with SKIP and NEEDS_PURGE flags. */
/** Reference counter to track is_persistent() transactions,
with SKIP flag. */
std::atomic<uint32_t> ref;
/** Whether undo tablespace truncation is pending */
static constexpr uint32_t SKIP= 1;
/** Whether the log segment needs purge */
static constexpr uint32_t NEEDS_PURGE= 2;
/** Transaction reference count multiplier */
static constexpr uint32_t REF= 4;
static constexpr uint32_t REF= 2;
uint32_t ref_load() const { return ref.load(std::memory_order_relaxed); }
/** Set a bit in ref */
template<bool needs_purge> void ref_set()
/** Set the SKIP bit */
void ref_set_skip()
{
static_assert(SKIP == 1U << 0, "compatibility");
static_assert(NEEDS_PURGE == 1U << 1, "compatibility");
static_assert(SKIP == 1U, "compatibility");
#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
if (needs_purge)
__asm__ __volatile__("lock btsl $1, %0" : "+m" (ref));
else
__asm__ __volatile__("lock btsl $0, %0" : "+m" (ref));
__asm__ __volatile__("lock btsl $0, %0" : "+m" (ref));
#elif defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
_interlockedbittestandset(reinterpret_cast<volatile long*>(&ref),
needs_purge);
_interlockedbittestandset(reinterpret_cast<volatile long*>(&ref), 0);
#else
ref.fetch_or(needs_purge ? NEEDS_PURGE : SKIP, std::memory_order_relaxed);
ref.fetch_or(SKIP, std::memory_order_relaxed);
#endif
}
/** Clear a bit in ref */
template<bool needs_purge> void ref_reset()
void ref_reset_skip()
{
static_assert(SKIP == 1U << 0, "compatibility");
static_assert(NEEDS_PURGE == 1U << 1, "compatibility");
static_assert(SKIP == 1U, "compatibility");
#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
if (needs_purge)
__asm__ __volatile__("lock btrl $1, %0" : "+m" (ref));
else
__asm__ __volatile__("lock btrl $0, %0" : "+m" (ref));
__asm__ __volatile__("lock btrl $0, %0" : "+m" (ref));
#elif defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
_interlockedbittestandreset(reinterpret_cast<volatile long*>(&ref),
needs_purge);
_interlockedbittestandreset(reinterpret_cast<volatile long*>(&ref), 0);
#else
ref.fetch_and(needs_purge ? ~NEEDS_PURGE : ~SKIP,
std::memory_order_relaxed);
ref.fetch_and(~SKIP, std::memory_order_relaxed);
#endif
}
@ -125,26 +116,20 @@ public:
void destroy();
/** Note that undo tablespace truncation was started. */
void set_skip_allocation() { ut_ad(is_persistent()); ref_set<false>(); }
void set_skip_allocation() { ut_ad(is_persistent()); ref_set_skip(); }
/** Note that undo tablespace truncation was completed. */
void clear_skip_allocation()
{
ut_ad(is_persistent());
#if defined DBUG_OFF
ref_reset<false>();
ref_reset_skip();
#else
ut_d(auto r=) ref.fetch_and(~SKIP, std::memory_order_relaxed);
ut_ad(r == SKIP);
#endif
}
/** Note that the rollback segment requires purge. */
void set_needs_purge() { ref_set<true>(); }
/** Note that the rollback segment will not require purge. */
void clear_needs_purge() { ref_reset<true>(); }
/** @return whether the segment is marked for undo truncation */
bool skip_allocation() const { return ref_load() & SKIP; }
/** @return whether the segment needs purge */
bool needs_purge() const { return ref_load() & NEEDS_PURGE; }
/** Increment the reference count */
void acquire()
{ ut_d(auto r=) ref.fetch_add(REF); ut_ad(!(r & SKIP)); }

View file

@ -246,12 +246,10 @@ trx_undo_free_at_shutdown(trx_t *trx);
@param[in,out] rseg rollback segment
@param[in] id rollback segment slot
@param[in] page_no undo log segment page number
@param[in,out] max_trx_id the largest observed transaction ID
@return the undo log
@retval nullptr on error */
trx_undo_t *
trx_undo_mem_create_at_db_start(trx_rseg_t *rseg, ulint id, uint32_t page_no,
trx_id_t &max_trx_id);
trx_undo_mem_create_at_db_start(trx_rseg_t *rseg, ulint id, uint32_t page_no);
#endif /* !UNIV_INNOCHECKSUM */
@ -493,6 +491,8 @@ or 0 if the transaction has not been committed */
/** Before MariaDB 10.3.1, when purge did not reset DB_TRX_ID of
surviving user records, this used to be called TRX_UNDO_DEL_MARKS.
This field is redundant; it is only being read by some debug assertions.
The value 1 indicates that purge needs to process the undo log segment.
The value 0 indicates that all of it has been processed, and
trx_purge_free_segment() has been invoked, so the log is not safe to access.

View file

@ -46,12 +46,12 @@ Created 5/7/1996 Heikki Tuuri
#include "srv0mon.h"
#include "que0que.h"
#include "scope.h"
#include <debug_sync.h>
#include <set>
#ifdef WITH_WSREP
#include <mysql/service_wsrep.h>
#include <debug_sync.h>
#endif /* WITH_WSREP */
/** The value of innodb_deadlock_detect */
@ -1882,6 +1882,7 @@ check_trx_error:
if (row_lock_wait)
lock_sys.wait_resume(trx->mysql_thd, suspend_time, my_hrtime_coarse());
/* Cache trx->lock.wait_lock to avoid unnecessary atomic variable load */
if (lock_t *lock= trx->lock.wait_lock)
{
lock_sys_t::cancel<false>(trx, lock);
@ -1905,6 +1906,12 @@ void lock_wait_end(trx_t *trx)
ut_d(const auto state= trx->state);
ut_ad(state == TRX_STATE_COMMITTED_IN_MEMORY || state == TRX_STATE_ACTIVE ||
state == TRX_STATE_PREPARED);
/* lock_wait() checks trx->lock.was_chosen_as_deadlock_victim flag before
requesting lock_sys.wait_mutex, and if the flag is set, it returns error,
what causes transaction rollback, which can reset trx->lock.wait_thr before
deadlock resolution starts cancelling victim's waiting lock. That's why we
don't check trx->lock.wait_thr here if the function was called from deadlock
resolution function. */
ut_ad(from_deadlock || trx->lock.wait_thr);
if (trx->lock.was_chosen_as_deadlock_victim)
@ -3193,6 +3200,8 @@ lock_rec_store_on_page_infimum(
ut_ad(block->page.frame == page_align(rec));
const page_id_t id{block->page.id()};
ut_d(SCOPE_EXIT(
[]() { DEBUG_SYNC_C("lock_rec_store_on_page_infimum_end"); }));
LockGuard g{lock_sys.rec_hash, id};
lock_rec_move(g.cell(), *block, id, g.cell(), id,
@ -5770,17 +5779,30 @@ void lock_sys_t::cancel_lock_wait_for_trx(trx_t *trx)
/** Cancel a waiting lock request.
@tparam check_victim whether to check for DB_DEADLOCK
@param lock waiting lock request
@param trx active transaction
@param lock waiting lock request
@retval DB_SUCCESS if no lock existed
@retval DB_DEADLOCK if trx->lock.was_chosen_as_deadlock_victim was set
@retval DB_LOCK_WAIT if the lock was canceled */
template<bool check_victim>
dberr_t lock_sys_t::cancel(trx_t *trx, lock_t *lock)
{
DEBUG_SYNC_C("lock_sys_t_cancel_enter");
mysql_mutex_assert_owner(&lock_sys.wait_mutex);
ut_ad(trx->lock.wait_lock == lock);
ut_ad(trx->state == TRX_STATE_ACTIVE);
/* trx->lock.wait_lock may be changed by other threads as long as
we are not holding lock_sys.latch.
So, trx->lock.wait_lock==lock does not necessarily hold, but both
pointers should be valid, because other threads cannot assign
trx->lock.wait_lock=nullptr (or invalidate *lock) while we are
holding lock_sys.wait_mutex. Also, the type of trx->lock.wait_lock
(record or table lock) cannot be changed by other threads. So, it is
safe to call lock->is_table() while not holding lock_sys.latch. If
we have to release and reacquire lock_sys.wait_mutex, we must reread
trx->lock.wait_lock. We must also reread trx->lock.wait_lock after
lock_sys.latch acquiring, as it can be changed to not-null in lock moving
functions even if we hold lock_sys.wait_mutex. */
dberr_t err= DB_SUCCESS;
/* This would be too large for a memory transaction, except in the
DB_DEADLOCK case, which was already tested in lock_trx_handle_wait(). */
@ -5802,6 +5824,15 @@ dberr_t lock_sys_t::cancel(trx_t *trx, lock_t *lock)
}
else
{
/* This function is invoked from the thread which executes the
transaction. Table locks are requested before record locks. Some other
transaction can't change trx->lock.wait_lock from table to record for the
current transaction at this point, because the current transaction has not
requested record locks yet. There is no need to move any table locks by
other threads. And trx->lock.wait_lock can't be set to null while we are
holding lock_sys.wait_mutex. That's why there is no need to reload
trx->lock.wait_lock here. */
ut_ad(lock == trx->lock.wait_lock);
resolve_table_lock:
dict_table_t *table= lock->un_member.tab_lock.table;
if (!table->lock_mutex_trylock())
@ -5812,6 +5843,7 @@ resolve_table_lock:
mysql_mutex_unlock(&lock_sys.wait_mutex);
table->lock_mutex_lock();
mysql_mutex_lock(&lock_sys.wait_mutex);
/* Cache trx->lock.wait_lock under the corresponding latches. */
lock= trx->lock.wait_lock;
if (!lock)
goto retreat;
@ -5821,6 +5853,10 @@ resolve_table_lock:
goto retreat;
}
}
else
/* Cache trx->lock.wait_lock under the corresponding latches if
it was not cached yet */
lock= trx->lock.wait_lock;
if (lock->is_waiting())
lock_cancel_waiting_and_release(lock);
/* Even if lock->is_waiting() did not hold above, we must return
@ -5844,6 +5880,7 @@ retreat:
mysql_mutex_unlock(&lock_sys.wait_mutex);
lock_sys.wr_lock(SRW_LOCK_CALL);
mysql_mutex_lock(&lock_sys.wait_mutex);
/* Cache trx->lock.wait_lock under the corresponding latches. */
lock= trx->lock.wait_lock;
/* Even if waiting lock was cancelled while lock_sys.wait_mutex was
unlocked, we need to return deadlock error if transaction was chosen
@ -5855,6 +5892,9 @@ retreat:
}
else
{
/* Cache trx->lock.wait_lock under the corresponding latches if
it was not cached yet */
lock= trx->lock.wait_lock;
resolve_record_lock:
if (lock->is_waiting())
lock_cancel_waiting_and_release(lock);
@ -5876,6 +5916,7 @@ resolve_record_lock:
void lock_sys_t::cancel(trx_t *trx)
{
mysql_mutex_lock(&lock_sys.wait_mutex);
/* Cache trx->lock.wait_lock to avoid unnecessary atomic variable load */
if (lock_t *lock= trx->lock.wait_lock)
{
/* Dictionary transactions must be immune to KILL, because they
@ -5943,6 +5984,7 @@ dberr_t lock_trx_handle_wait(trx_t *trx)
mysql_mutex_lock(&lock_sys.wait_mutex);
if (trx->lock.was_chosen_as_deadlock_victim)
err= DB_DEADLOCK;
/* Cache trx->lock.wait_lock to avoid unnecessary atomic variable load */
else if (lock_t *wait_lock= trx->lock.wait_lock)
err= lock_sys_t::cancel<true>(trx, wait_lock);
lock_sys.deadlock_check();

View file

@ -263,6 +263,7 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr)
+ undo->hdr_offset;
ut_ad(mach_read_from_2(undo_header + TRX_UNDO_NEEDS_PURGE) <= 1);
ut_ad(rseg->needs_purge > trx->id);
if (UNIV_UNLIKELY(mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT
+ rseg_header->page.frame))) {
@ -356,7 +357,6 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr)
rseg->last_page_no = undo->hdr_page_no;
rseg->set_last_commit(undo->hdr_offset,
trx->rw_trx_hash_element->no);
rseg->set_needs_purge();
}
rseg->history_size++;
@ -387,24 +387,21 @@ static dberr_t trx_purge_remove_log_hdr(buf_block_t *rseg, buf_block_t* log,
MY_ATTRIBUTE((nonnull, warn_unused_result))
/** Free an undo log segment, and remove the header from the history list.
@param[in,out] mtr mini-transaction
@param[in,out] rseg rollback segment
@param[in] hdr_addr file address of log_hdr
@return error code */
static dberr_t trx_purge_free_segment(trx_rseg_t *rseg, fil_addr_t hdr_addr)
static dberr_t
trx_purge_free_segment(mtr_t &mtr, trx_rseg_t* rseg, fil_addr_t hdr_addr)
{
const page_id_t hdr_page_id{rseg->space->id, hdr_addr.page};
mtr_t mtr;
mtr.commit();
mtr.start();
/* We only need the latch to maintain rseg->curr_size. To follow the
latching order, we must acquire it before acquiring any related
page latch. */
rseg->latch.wr_lock(SRW_LOCK_CALL);
const page_id_t hdr_page_id{rseg->space->id, hdr_addr.page};
dberr_t err;
buf_block_t *rseg_hdr= rseg->get(&mtr, &err);
if (!rseg_hdr)
goto func_exit;
return err;
if (buf_block_t *block= buf_page_get_gen(hdr_page_id, 0, RW_X_LATCH,
nullptr, BUF_GET_POSSIBLY_FREED,
&mtr, &err))
@ -419,12 +416,10 @@ static dberr_t trx_purge_free_segment(trx_rseg_t *rseg, fil_addr_t hdr_addr)
while (!fseg_free_step_not_header(TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER +
block->page.frame, &mtr))
{
rseg->latch.wr_unlock();
rseg_hdr->fix();
block->fix();
mtr.commit();
mtr.start();
rseg->latch.wr_lock(SRW_LOCK_CALL);
rseg_hdr->page.lock.x_lock();
block->page.lock.x_lock();
mtr.memo_push(rseg_hdr, MTR_MEMO_PAGE_X_FIX);
@ -443,13 +438,10 @@ static dberr_t trx_purge_free_segment(trx_rseg_t *rseg, fil_addr_t hdr_addr)
could become inaccessible garbage in the file space. */
err= trx_purge_remove_log_hdr(rseg_hdr, block, hdr_addr.boffset, &mtr);
if (UNIV_UNLIKELY(err != DB_SUCCESS))
goto func_exit;
return err;
byte *hist= TRX_RSEG + TRX_RSEG_HISTORY_SIZE + rseg_hdr->page.frame;
if (UNIV_UNLIKELY(mach_read_from_4(hist) < seg_size))
{
err= DB_CORRUPTION;
goto func_exit;
}
return DB_CORRUPTION;
mtr.write<4>(*rseg_hdr, hist, mach_read_from_4(hist) - seg_size);
/* Here we assume that a file segment with just the header page
@ -464,9 +456,6 @@ static dberr_t trx_purge_free_segment(trx_rseg_t *rseg, fil_addr_t hdr_addr)
rseg->curr_size -= seg_size;
}
func_exit:
rseg->latch.wr_unlock();
mtr.commit();
return err;
}
@ -484,8 +473,6 @@ trx_purge_truncate_rseg_history(
mtr_t mtr;
mtr.start();
ut_ad(rseg.is_persistent());
rseg.latch.wr_lock(SRW_LOCK_CALL);
dberr_t err;
buf_block_t* rseg_hdr = rseg.get(&mtr, &err);
@ -501,7 +488,6 @@ trx_purge_truncate_rseg_history(
loop:
if (hdr_addr.page == FIL_NULL) {
func_exit:
rseg.latch.wr_unlock();
mtr.commit();
return err;
}
@ -533,38 +519,30 @@ func_exit:
prev_hdr_addr.boffset = static_cast<uint16_t>(prev_hdr_addr.boffset
- TRX_UNDO_HISTORY_NODE);
if (mach_read_from_2(TRX_UNDO_SEG_HDR + TRX_UNDO_STATE
+ block->page.frame)
if (!rseg.is_referenced()
&& rseg.needs_purge <= (purge_sys.head.trx_no
? purge_sys.head.trx_no
: purge_sys.tail.trx_no)
&& mach_read_from_2(TRX_UNDO_SEG_HDR + TRX_UNDO_STATE
+ block->page.frame)
== TRX_UNDO_TO_PURGE
&& !mach_read_from_2(block->page.frame + hdr_addr.boffset
+ TRX_UNDO_NEXT_LOG)) {
/* We can free the whole log segment */
rseg.latch.wr_unlock();
mtr.commit();
/* calls the trx_purge_remove_log_hdr()
inside trx_purge_free_segment(). */
err = trx_purge_free_segment(&rseg, hdr_addr);
if (err != DB_SUCCESS) {
return err;
}
/* We can free the whole log segment.
This will call trx_purge_remove_log_hdr(). */
err = trx_purge_free_segment(mtr, &rseg, hdr_addr);
} else {
/* Remove the log hdr from the rseg history. */
rseg.history_size--;
err = trx_purge_remove_log_hdr(rseg_hdr, block,
hdr_addr.boffset, &mtr);
if (err != DB_SUCCESS) {
goto func_exit;
}
rseg.history_size--;
rseg.latch.wr_unlock();
mtr.commit();
}
mtr.commit();
if (err != DB_SUCCESS) {
return err;
}
mtr.start();
rseg.latch.wr_lock(SRW_LOCK_CALL);
hdr_addr = prev_hdr_addr;
@ -640,8 +618,13 @@ TRANSACTIONAL_TARGET static void trx_purge_truncate_history()
dberr_t err= DB_SUCCESS;
for (auto &rseg : trx_sys.rseg_array)
if (rseg.space)
{
ut_ad(rseg.is_persistent());
rseg.latch.wr_lock(SRW_LOCK_CALL);
if (dberr_t e= trx_purge_truncate_rseg_history(rseg, head))
err= e;
rseg.latch.wr_unlock();
}
if (err != DB_SUCCESS || srv_undo_tablespaces_active < 2)
return;
@ -693,43 +676,34 @@ TRANSACTIONAL_TARGET static void trx_purge_truncate_history()
{
if (rseg.space != &space)
continue;
#ifdef SUX_LOCK_GENERIC
rseg.latch.rd_lock(SRW_LOCK_CALL);
#else
transactional_shared_lock_guard<srw_spin_lock> g{rseg.latch};
#endif
ut_ad(rseg.skip_allocation());
if (rseg.is_referenced())
if (rseg.is_referenced() || rseg.needs_purge > head.trx_no)
{
not_free:
#ifdef SUX_LOCK_GENERIC
rseg.latch.rd_unlock();
#endif
return;
}
if (rseg.curr_size != 1)
ut_ad(UT_LIST_GET_LEN(rseg.undo_list) == 0);
/* Check if all segments are cached and safe to remove. */
ulint cached= 0;
for (const trx_undo_t *undo= UT_LIST_GET_FIRST(rseg.undo_cached); undo;
undo= UT_LIST_GET_NEXT(undo_list, undo))
{
/* Check if all segments are cached and safe to remove. */
ulint cached= 0;
for (trx_undo_t *undo= UT_LIST_GET_FIRST(rseg.undo_cached); undo;
undo= UT_LIST_GET_NEXT(undo_list, undo))
{
if (head.trx_no < undo->trx_id)
goto not_free;
else
cached+= undo->size;
}
ut_ad(rseg.curr_size > cached);
if (rseg.curr_size > cached + 1)
if (head.trx_no < undo->trx_id)
goto not_free;
else
cached+= undo->size;
}
#ifdef SUX_LOCK_GENERIC
ut_ad(rseg.curr_size > cached);
if (rseg.curr_size > cached + 1)
goto not_free;
rseg.latch.rd_unlock();
#endif
}
ib::info() << "Truncating " << file->name;
@ -848,7 +822,9 @@ not_free:
if (rseg.space != &space)
continue;
dberr_t err;
ut_ad(!rseg.is_referenced());
ut_ad(rseg.needs_purge <= head.trx_no);
buf_block_t *rblock= trx_rseg_header_create(&space,
&rseg - trx_sys.rseg_array,
trx_sys.get_max_trx_id(),
@ -883,10 +859,6 @@ not_free:
log_buffer_flush_to_disk();
DBUG_SUICIDE(););
for (auto &rseg : trx_sys.rseg_array)
if (rseg.space == &space)
rseg.clear_skip_allocation();
ib::info() << "Truncated " << file->name;
purge_sys.truncate.last= purge_sys.truncate.current;
ut_ad(&space == purge_sys.truncate.current);
@ -946,7 +918,6 @@ static void trx_purge_rseg_get_next_history_log(
/* Read the previous log header. */
mtr.start();
byte needs_purge= 0;
trx_id_t trx_no= 0;
if (const buf_block_t* undo_page=
@ -957,7 +928,6 @@ static void trx_purge_rseg_get_next_history_log(
trx_no= mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO);
ut_ad(mach_read_from_2(log_hdr + TRX_UNDO_NEEDS_PURGE) <= 1);
needs_purge= log_hdr[TRX_UNDO_NEEDS_PURGE + 1];
}
mtr.commit();
@ -969,11 +939,6 @@ static void trx_purge_rseg_get_next_history_log(
purge_sys.rseg->last_page_no= prev_log_addr.page;
purge_sys.rseg->set_last_commit(prev_log_addr.boffset, trx_no);
if (needs_purge)
purge_sys.rseg->set_needs_purge();
else
purge_sys.rseg->clear_needs_purge();
/* Purge can also produce events, however these are already ordered
in the rollback segment and any user generated event will be greater
than the events that Purge produces. ie. Purge can never produce
@ -995,7 +960,7 @@ static void trx_purge_read_undo_rec()
purge_sys.hdr_offset = purge_sys.rseg->last_offset();
page_no = purge_sys.hdr_page_no = purge_sys.rseg->last_page_no;
if (purge_sys.rseg->needs_purge()) {
if (purge_sys.rseg->needs_purge) {
mtr_t mtr;
mtr.start();
const buf_block_t* undo_page;

View file

@ -399,7 +399,7 @@ void trx_rseg_t::reinit(uint32_t page)
}
ut_ad(!is_referenced());
clear_needs_purge();
needs_purge= 0;
last_commit_and_offset= 0;
last_page_no= FIL_NULL;
curr_size= 1;
@ -407,10 +407,9 @@ void trx_rseg_t::reinit(uint32_t page)
/** Read the undo log lists.
@param[in,out] rseg rollback segment
@param[in,out] max_trx_id maximum observed transaction identifier
@param[in] rseg_header rollback segment header
@return error code */
static dberr_t trx_undo_lists_init(trx_rseg_t *rseg, trx_id_t &max_trx_id,
static dberr_t trx_undo_lists_init(trx_rseg_t *rseg,
const buf_block_t *rseg_header)
{
ut_ad(srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN);
@ -420,8 +419,8 @@ static dberr_t trx_undo_lists_init(trx_rseg_t *rseg, trx_id_t &max_trx_id,
uint32_t page_no= trx_rsegf_get_nth_undo(rseg_header, i);
if (page_no != FIL_NULL)
{
const trx_undo_t *undo= trx_undo_mem_create_at_db_start(rseg, i, page_no,
max_trx_id);
const trx_undo_t *undo=
trx_undo_mem_create_at_db_start(rseg, i, page_no);
if (!undo)
return DB_CORRUPTION;
rseg->curr_size+= undo->size;
@ -434,11 +433,9 @@ static dberr_t trx_undo_lists_init(trx_rseg_t *rseg, trx_id_t &max_trx_id,
/** Restore the state of a persistent rollback segment.
@param[in,out] rseg persistent rollback segment
@param[in,out] max_trx_id maximum observed transaction identifier
@param[in,out] mtr mini-transaction
@return error code */
static dberr_t trx_rseg_mem_restore(trx_rseg_t *rseg, trx_id_t &max_trx_id,
mtr_t *mtr)
static dberr_t trx_rseg_mem_restore(trx_rseg_t *rseg, mtr_t *mtr)
{
if (!rseg->space)
return DB_TABLESPACE_NOT_FOUND;
@ -454,8 +451,8 @@ static dberr_t trx_rseg_mem_restore(trx_rseg_t *rseg, trx_id_t &max_trx_id,
trx_id_t id= mach_read_from_8(TRX_RSEG + TRX_RSEG_MAX_TRX_ID +
rseg_hdr->page.frame);
if (id > max_trx_id)
max_trx_id= id;
if (id > rseg->needs_purge)
rseg->needs_purge= id;
const byte *binlog_name=
TRX_RSEG + TRX_RSEG_BINLOG_NAME + rseg_hdr->page.frame;
@ -491,7 +488,7 @@ static dberr_t trx_rseg_mem_restore(trx_rseg_t *rseg, trx_id_t &max_trx_id,
rseg->curr_size = mach_read_from_4(TRX_RSEG + TRX_RSEG_HISTORY_SIZE +
rseg_hdr->page.frame) + 1;
err= trx_undo_lists_init(rseg, max_trx_id, rseg_hdr);
err= trx_undo_lists_init(rseg, rseg_hdr);
if (err != DB_SUCCESS);
else if (auto len= flst_get_len(TRX_RSEG + TRX_RSEG_HISTORY +
rseg_hdr->page.frame))
@ -512,19 +509,16 @@ static dberr_t trx_rseg_mem_restore(trx_rseg_t *rseg, trx_id_t &max_trx_id,
trx_id_t id= mach_read_from_8(block->page.frame + node_addr.boffset +
TRX_UNDO_TRX_ID);
if (id > max_trx_id)
max_trx_id= id;
if (id > rseg->needs_purge)
rseg->needs_purge= id;
id= mach_read_from_8(block->page.frame + node_addr.boffset +
TRX_UNDO_TRX_NO);
if (id > max_trx_id)
max_trx_id= id;
if (id > rseg->needs_purge)
rseg->needs_purge= id;
rseg->set_last_commit(node_addr.boffset, id);
unsigned purge= mach_read_from_2(block->page.frame + node_addr.boffset +
TRX_UNDO_NEEDS_PURGE);
ut_ad(purge <= 1);
if (purge != 0)
rseg->set_needs_purge();
ut_ad(mach_read_from_2(block->page.frame + node_addr.boffset +
TRX_UNDO_NEEDS_PURGE) <= 1);
if (rseg->last_page_no != FIL_NULL)
/* There is no need to cover this operation by the purge
@ -597,9 +591,11 @@ dberr_t trx_rseg_array_init()
sys, rseg_id)),
page_no);
ut_ad(rseg.is_persistent());
if ((err = trx_rseg_mem_restore(
&rseg, max_trx_id, &mtr))
!= DB_SUCCESS) {
err = trx_rseg_mem_restore(&rseg, &mtr);
if (rseg.needs_purge > max_trx_id) {
max_trx_id = rseg.needs_purge;
}
if (err != DB_SUCCESS) {
mtr.commit();
break;
}

View file

@ -651,6 +651,7 @@ static dberr_t trx_resurrect(trx_undo_t *undo, trx_rseg_t *rseg,
uint64_t *rows_to_undo)
{
trx_state_t state;
ut_ad(rseg->needs_purge >= undo->trx_id);
/*
This is single-threaded startup code, we do not need the
protection of trx->mutex here.
@ -673,6 +674,7 @@ static dberr_t trx_resurrect(trx_undo_t *undo, trx_rseg_t *rseg,
return DB_SUCCESS;
}
rseg->acquire();
trx_t *trx= trx_create();
trx->state= state;
ut_d(trx->start_file= __FILE__);
@ -681,12 +683,6 @@ static dberr_t trx_resurrect(trx_undo_t *undo, trx_rseg_t *rseg,
trx->rsegs.m_redo.undo= undo;
trx->undo_no= undo->top_undo_no + 1;
trx->rsegs.m_redo.rseg= rseg;
/*
For transactions with active data will not have rseg size = 1
or will not qualify for purge limit criteria. So it is safe to increment
this trx_ref_count w/o mutex protection.
*/
trx->rsegs.m_redo.rseg->acquire();
trx->xid= undo->xid;
trx->id= undo->trx_id;
trx->is_recovered= true;
@ -759,6 +755,7 @@ corrupted:
ut_ad(trx->is_recovered);
ut_ad(trx->rsegs.m_redo.rseg == &rseg);
ut_ad(rseg.is_referenced());
ut_ad(rseg.needs_purge);
trx->rsegs.m_redo.undo = undo;
if (undo->top_undo_no >= trx->undo_no) {
@ -794,20 +791,18 @@ corrupted:
/** Assign a persistent rollback segment in a round-robin fashion,
evenly distributed between 0 and innodb_undo_logs-1
@return persistent rollback segment
@retval NULL if innodb_read_only */
static trx_rseg_t* trx_assign_rseg_low()
@param trx transaction */
static void trx_assign_rseg_low(trx_t *trx)
{
if (high_level_read_only) {
ut_ad(!srv_available_undo_logs);
return(NULL);
}
ut_ad(!trx->rsegs.m_redo.rseg);
ut_ad(srv_available_undo_logs == TRX_SYS_N_RSEGS);
/* The first slot is always assigned to the system tablespace. */
ut_ad(trx_sys.rseg_array[0].space == fil_system.sys_space);
trx_sys.register_rw(trx);
ut_ad(trx->id);
/* Choose a rollback segment evenly distributed between 0 and
innodb_undo_logs-1 in a round-robin fashion, skipping those
undo tablespaces that are scheduled for truncation. */
@ -821,7 +816,7 @@ static trx_rseg_t* trx_assign_rseg_low()
bool look_for_rollover = false;
#endif /* UNIV_DEBUG */
bool allocated = false;
bool allocated;
do {
for (;;) {
@ -871,9 +866,7 @@ static trx_rseg_t* trx_assign_rseg_low()
allocated = rseg->acquire_if_available();
} while (!allocated);
ut_ad(rseg->is_referenced());
ut_ad(rseg->is_persistent());
return(rseg);
trx->rsegs.m_redo.rseg = rseg;
}
/** Assign a rollback segment for modifying temporary tables.
@ -956,15 +949,11 @@ trx_start_low(
if (!trx->read_only
&& (!trx->mysql_thd || read_write || trx->dict_operation)) {
/* Temporary rseg is assigned only if the transaction
updates a temporary table */
trx->rsegs.m_redo.rseg = trx_assign_rseg_low();
ut_ad(trx->rsegs.m_redo.rseg != 0
|| srv_read_only_mode
|| srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
trx_sys.register_rw(trx);
if (!high_level_read_only) {
trx_assign_rseg_low(trx);
}
} else {
if (!trx->is_autocommit_non_locking()) {
@ -1055,25 +1044,21 @@ trx_write_serialisation_history(
trx_undo_t*& undo = trx->rsegs.m_redo.undo;
if (!undo) {
return;
}
ut_ad(!trx->read_only);
ut_ad(!undo || undo->rseg == rseg);
rseg->latch.wr_lock(SRW_LOCK_CALL);
/* Assign the transaction serialisation number and add any
undo log to the purge queue. */
trx_serialise(trx);
if (undo) {
rseg->latch.wr_lock(SRW_LOCK_CALL);
ut_ad(undo->rseg == rseg);
trx_serialise(trx);
UT_LIST_REMOVE(rseg->undo_list, undo);
trx_purge_add_undo_to_history(trx, undo, mtr);
MONITOR_INC(MONITOR_TRX_COMMIT_UNDO);
rseg->latch.wr_unlock();
}
rseg->latch.wr_unlock();
MONITOR_INC(MONITOR_TRX_COMMIT_UNDO);
rseg->release();
}
/********************************************************************
@ -1309,10 +1294,6 @@ TRANSACTIONAL_INLINE inline void trx_t::commit_in_memory(const mtr_t *mtr)
release_locks();
}
if (trx_rseg_t *rseg= rsegs.m_redo.rseg)
/* This is safe due to us having detached the persistent undo log. */
rseg->release();
if (mtr)
{
if (trx_undo_t *&undo= rsegs.m_noredo.undo)
@ -1450,6 +1431,13 @@ TRANSACTIONAL_TARGET void trx_t::commit_low(mtr_t *mtr)
mtr->commit();
}
else if (trx_rseg_t *rseg= rsegs.m_redo.rseg)
{
ut_ad(id);
ut_ad(!rsegs.m_redo.undo);
rseg->release();
}
#ifdef ENABLED_DEBUG_SYNC
if (debug_sync)
DEBUG_SYNC_C("before_trx_state_committed_in_memory");
@ -2166,11 +2154,7 @@ trx_set_rw_mode(
return;
}
trx->rsegs.m_redo.rseg = trx_assign_rseg_low();
ut_ad(trx->rsegs.m_redo.rseg != 0);
trx_sys.register_rw(trx);
ut_ad(trx->id);
trx_assign_rseg_low(trx);
/* So that we can see our own changes. */
if (trx->read_view.is_open()) {

View file

@ -1011,12 +1011,10 @@ static void trx_undo_seg_free(const trx_undo_t *undo)
@param[in,out] rseg rollback segment
@param[in] id rollback segment slot
@param[in] page_no undo log segment page number
@param[in,out] max_trx_id the largest observed transaction ID
@return the undo log
@retval nullptr on error */
trx_undo_t *
trx_undo_mem_create_at_db_start(trx_rseg_t *rseg, ulint id, uint32_t page_no,
trx_id_t &max_trx_id)
trx_undo_mem_create_at_db_start(trx_rseg_t *rseg, ulint id, uint32_t page_no)
{
mtr_t mtr;
XID xid;
@ -1054,10 +1052,21 @@ corrupted_type:
const trx_ulogf_t* const undo_header = block->page.frame + offset;
uint16_t state = mach_read_from_2(TRX_UNDO_SEG_HDR + TRX_UNDO_STATE
+ block->page.frame);
const trx_id_t trx_id= mach_read_from_8(undo_header + TRX_UNDO_TRX_ID);
if (trx_id >> 48) {
sql_print_error("InnoDB: corrupted TRX_ID %llx", trx_id);
goto corrupted;
}
/* We will increment rseg->needs_purge, like trx_undo_reuse_cached()
would do it, to avoid trouble on rollback or XA COMMIT. */
trx_id_t trx_no = trx_id + 1;
switch (state) {
case TRX_UNDO_ACTIVE:
case TRX_UNDO_PREPARED:
if (UNIV_LIKELY(type != 1)) {
trx_no = trx_id + 1;
break;
}
sql_print_error("InnoDB: upgrade from older version than"
@ -1080,13 +1089,14 @@ corrupted_type:
goto corrupted_type;
}
read_trx_no:
trx_id_t id = mach_read_from_8(TRX_UNDO_TRX_NO + undo_header);
if (id >> 48) {
sql_print_error("InnoDB: corrupted TRX_NO %llx", id);
trx_no = mach_read_from_8(TRX_UNDO_TRX_NO + undo_header);
if (trx_no >> 48) {
sql_print_error("InnoDB: corrupted TRX_NO %llx",
trx_no);
goto corrupted;
}
if (id > max_trx_id) {
max_trx_id = id;
if (trx_no < trx_id) {
trx_no = trx_id;
}
}
@ -1099,13 +1109,8 @@ corrupted_type:
xid.null();
}
trx_id_t trx_id = mach_read_from_8(undo_header + TRX_UNDO_TRX_ID);
if (trx_id >> 48) {
sql_print_error("InnoDB: corrupted TRX_ID %llx", trx_id);
goto corrupted;
}
if (trx_id > max_trx_id) {
max_trx_id = trx_id;
if (trx_no > rseg->needs_purge) {
rseg->needs_purge = trx_no;
}
trx_undo_t* undo = trx_undo_mem_create(
@ -1296,6 +1301,22 @@ buf_block_t*
trx_undo_reuse_cached(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** pundo,
mtr_t* mtr)
{
if (rseg->is_persistent()) {
ut_ad(rseg->is_referenced());
if (rseg->needs_purge <= trx->id) {
/* trx_purge_truncate_history() compares
rseg->needs_purge <= head.trx_no
so we need to compensate for that.
The rseg->needs_purge after crash
recovery would be at least trx->id + 1,
because that is the minimum possible value
assigned by trx_serialise() on commit. */
rseg->needs_purge = trx->id + 1;
}
} else {
ut_ad(!rseg->is_referenced());
}
trx_undo_t* undo = UT_LIST_GET_FIRST(rseg->undo_cached);
if (!undo) {
return NULL;
@ -1395,9 +1416,8 @@ buf_block_t*
trx_undo_assign_low(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo,
dberr_t* err, mtr_t* mtr)
{
ut_d(const bool is_temp = rseg == trx->rsegs.m_noredo.rseg);
ut_ad(rseg == trx->rsegs.m_redo.rseg
|| rseg == trx->rsegs.m_noredo.rseg);
ut_d(const bool is_temp = rseg == trx->rsegs.m_noredo.rseg);
ut_ad(is_temp || rseg == trx->rsegs.m_redo.rseg);
ut_ad(undo == (is_temp
? &trx->rsegs.m_noredo.undo
: &trx->rsegs.m_redo.undo));
@ -1417,7 +1437,6 @@ trx_undo_assign_low(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo,
);
rseg->latch.wr_lock(SRW_LOCK_CALL);
buf_block_t* block = trx_undo_reuse_cached(trx, rseg, undo, mtr);
if (!block) {