2014-02-26 19:11:54 +01:00
|
|
|
/*****************************************************************************
|
|
|
|
|
2017-05-15 17:17:16 +03:00
|
|
|
Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
|
2021-06-21 12:34:07 +03:00
|
|
|
Copyright (c) 2017, 2021, MariaDB Corporation.
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under
|
|
|
|
the terms of the GNU General Public License as published by the Free Software
|
|
|
|
Foundation; version 2 of the License.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, but WITHOUT
|
|
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License along with
|
|
|
|
this program; if not, write to the Free Software Foundation, Inc.,
|
2019-05-11 19:25:02 +03:00
|
|
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
*****************************************************************************/
|
|
|
|
|
|
|
|
/**************************************************//**
|
|
|
|
@file trx/trx0purge.cc
|
|
|
|
Purge old versions
|
|
|
|
|
|
|
|
Created 3/26/1996 Heikki Tuuri
|
|
|
|
*******************************************************/
|
|
|
|
|
|
|
|
#include "trx0purge.h"
|
|
|
|
#include "fsp0fsp.h"
|
|
|
|
#include "fut0fut.h"
|
2016-08-12 11:17:45 +03:00
|
|
|
#include "mach0data.h"
|
|
|
|
#include "mtr0log.h"
|
|
|
|
#include "os0thread.h"
|
2014-02-26 19:11:54 +01:00
|
|
|
#include "que0que.h"
|
|
|
|
#include "row0purge.h"
|
|
|
|
#include "row0upd.h"
|
2016-08-12 11:17:45 +03:00
|
|
|
#include "srv0mon.h"
|
2014-02-26 19:11:54 +01:00
|
|
|
#include "srv0srv.h"
|
|
|
|
#include "srv0start.h"
|
2016-08-12 11:17:45 +03:00
|
|
|
#include "sync0sync.h"
|
|
|
|
#include "trx0rec.h"
|
|
|
|
#include "trx0roll.h"
|
|
|
|
#include "trx0rseg.h"
|
|
|
|
#include "trx0trx.h"
|
2018-02-20 15:10:03 +02:00
|
|
|
#include <mysql/service_wsrep.h>
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2020-05-05 13:24:58 +03:00
|
|
|
#include <unordered_map>
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/** Maximum allowable purge history length. <=0 means 'infinite'. */
|
2016-08-12 11:17:45 +03:00
|
|
|
ulong srv_max_purge_lag = 0;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/** Max DML user threads delay in micro-seconds. */
|
2016-08-12 11:17:45 +03:00
|
|
|
ulong srv_max_purge_lag_delay = 0;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/** The global data structure coordinating a purge */
|
2018-02-22 09:30:41 +02:00
|
|
|
purge_sys_t purge_sys;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/** A dummy undo record used as a return value when we have a whole undo log
|
|
|
|
which needs no purge */
|
2016-08-12 11:17:45 +03:00
|
|
|
trx_undo_rec_t trx_purge_dummy_rec;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
#ifdef UNIV_DEBUG
|
2016-08-12 11:17:45 +03:00
|
|
|
my_bool srv_purge_view_update_only_debug;
|
2014-02-26 19:11:54 +01:00
|
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
/** Sentinel value */
|
2018-02-21 18:04:25 +02:00
|
|
|
static const TrxUndoRsegs NullElement;
|
2016-08-12 11:17:45 +03:00
|
|
|
|
2018-02-21 18:04:25 +02:00
|
|
|
/** Default constructor */
|
2018-02-22 09:30:41 +02:00
|
|
|
TrxUndoRsegsIterator::TrxUndoRsegsIterator()
|
2018-02-21 18:04:25 +02:00
|
|
|
: m_rsegs(NullElement), m_iter(m_rsegs.begin())
|
2016-08-12 11:17:45 +03:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2017-03-09 20:40:48 +02:00
|
|
|
/** Sets the next rseg to purge in purge_sys.
|
2018-02-21 18:04:25 +02:00
|
|
|
Executed in the purge coordinator thread.
|
2017-03-09 22:06:22 +02:00
|
|
|
@return whether anything is to be purged */
|
2018-02-21 18:04:25 +02:00
|
|
|
inline bool TrxUndoRsegsIterator::set_next()
|
2016-08-12 11:17:45 +03:00
|
|
|
{
|
2018-02-22 09:30:41 +02:00
|
|
|
mutex_enter(&purge_sys.pq_mutex);
|
2016-08-12 11:17:45 +03:00
|
|
|
|
|
|
|
/* Only purge consumes events from the priority queue, user
|
|
|
|
threads only produce the events. */
|
|
|
|
|
|
|
|
/* Check if there are more rsegs to process in the
|
|
|
|
current element. */
|
2018-02-21 18:04:25 +02:00
|
|
|
if (m_iter != m_rsegs.end()) {
|
2016-08-12 11:17:45 +03:00
|
|
|
/* We are still processing rollback segment from
|
|
|
|
the same transaction and so expected transaction
|
2018-02-21 18:04:25 +02:00
|
|
|
number shouldn't increase. Undo the increment of
|
2018-02-21 19:02:32 +02:00
|
|
|
expected commit done by caller assuming rollback
|
2016-08-12 11:17:45 +03:00
|
|
|
segments from given transaction are done. */
|
2021-06-21 12:34:07 +03:00
|
|
|
purge_sys.tail.trx_no = (*m_iter)->last_trx_no();
|
2018-02-22 09:30:41 +02:00
|
|
|
} else if (!purge_sys.purge_queue.empty()) {
|
|
|
|
m_rsegs = purge_sys.purge_queue.top();
|
|
|
|
purge_sys.purge_queue.pop();
|
|
|
|
ut_ad(purge_sys.purge_queue.empty()
|
|
|
|
|| purge_sys.purge_queue.top() != m_rsegs);
|
2018-02-21 18:04:25 +02:00
|
|
|
m_iter = m_rsegs.begin();
|
2016-08-12 11:17:45 +03:00
|
|
|
} else {
|
|
|
|
/* Queue is empty, reset iterator. */
|
2018-02-22 09:30:41 +02:00
|
|
|
purge_sys.rseg = NULL;
|
|
|
|
mutex_exit(&purge_sys.pq_mutex);
|
2018-02-21 18:04:25 +02:00
|
|
|
m_rsegs = NullElement;
|
|
|
|
m_iter = m_rsegs.begin();
|
2017-03-09 22:06:22 +02:00
|
|
|
return false;
|
2016-08-12 11:17:45 +03:00
|
|
|
}
|
|
|
|
|
2018-02-22 09:30:41 +02:00
|
|
|
purge_sys.rseg = *m_iter++;
|
|
|
|
mutex_exit(&purge_sys.pq_mutex);
|
|
|
|
mutex_enter(&purge_sys.rseg->mutex);
|
2016-08-12 11:17:45 +03:00
|
|
|
|
2018-02-22 09:30:41 +02:00
|
|
|
ut_a(purge_sys.rseg->last_page_no != FIL_NULL);
|
2021-06-21 12:34:07 +03:00
|
|
|
ut_ad(purge_sys.rseg->last_trx_no() == m_rsegs.trx_no);
|
2016-08-12 11:17:45 +03:00
|
|
|
|
2017-05-23 11:09:47 +03:00
|
|
|
/* We assume in purge of externally stored fields that space id is
|
|
|
|
in the range of UNDO tablespace space ids */
|
2018-03-26 17:23:47 +03:00
|
|
|
ut_ad(purge_sys.rseg->space->id == TRX_SYS_SPACE
|
|
|
|
|| srv_is_undo_tablespace(purge_sys.rseg->space->id));
|
2016-08-12 11:17:45 +03:00
|
|
|
|
2021-06-21 12:34:07 +03:00
|
|
|
ut_a(purge_sys.tail.trx_no <= purge_sys.rseg->last_trx_no());
|
2016-08-12 11:17:45 +03:00
|
|
|
|
2021-06-21 12:34:07 +03:00
|
|
|
purge_sys.tail.trx_no = purge_sys.rseg->last_trx_no();
|
|
|
|
purge_sys.hdr_offset = purge_sys.rseg->last_offset();
|
2018-02-22 09:30:41 +02:00
|
|
|
purge_sys.hdr_page_no = purge_sys.rseg->last_page_no;
|
2016-08-12 11:17:45 +03:00
|
|
|
|
2018-02-22 09:30:41 +02:00
|
|
|
mutex_exit(&purge_sys.rseg->mutex);
|
2016-08-12 11:17:45 +03:00
|
|
|
|
2017-03-09 22:06:22 +02:00
|
|
|
return(true);
|
2016-08-12 11:17:45 +03:00
|
|
|
}
|
|
|
|
|
2017-03-09 20:40:48 +02:00
|
|
|
/** Build a purge 'query' graph. The actual purge is performed by executing
|
2014-02-26 19:11:54 +01:00
|
|
|
this query graph.
|
2016-08-12 11:17:45 +03:00
|
|
|
@return own: the query graph */
|
2014-02-26 19:11:54 +01:00
|
|
|
static
|
|
|
|
que_t*
|
2018-02-15 09:50:03 +02:00
|
|
|
purge_graph_build()
|
2014-02-26 19:11:54 +01:00
|
|
|
{
|
2017-03-09 20:40:48 +02:00
|
|
|
ut_a(srv_n_purge_threads > 0);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2018-03-29 22:38:26 +04:00
|
|
|
trx_t* trx = trx_create();
|
2018-02-15 09:50:03 +02:00
|
|
|
ut_ad(!trx->id);
|
2019-07-25 12:08:50 +03:00
|
|
|
trx->start_time = time(NULL);
|
|
|
|
trx->start_time_micro = microsecond_interval_timer();
|
2017-03-09 20:40:48 +02:00
|
|
|
trx->state = TRX_STATE_ACTIVE;
|
|
|
|
trx->op_info = "purge trx";
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2017-03-09 20:40:48 +02:00
|
|
|
mem_heap_t* heap = mem_heap_create(512);
|
|
|
|
que_fork_t* fork = que_fork_create(
|
|
|
|
NULL, NULL, QUE_FORK_PURGE, heap);
|
|
|
|
fork->trx = trx;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2020-06-03 16:19:13 +02:00
|
|
|
for (auto i = innodb_purge_threads_MAX; i; i--) {
|
2017-03-09 20:40:48 +02:00
|
|
|
que_thr_t* thr = que_thr_create(fork, heap, NULL);
|
2019-03-12 13:56:58 +02:00
|
|
|
thr->child = new(mem_heap_alloc(heap, sizeof(purge_node_t)))
|
2019-03-11 17:17:24 +02:00
|
|
|
purge_node_t(thr);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return(fork);
|
|
|
|
}
|
|
|
|
|
2018-02-22 09:30:41 +02:00
|
|
|
/** Initialise the purge system. */
|
|
|
|
void purge_sys_t::create()
|
2017-03-09 20:40:48 +02:00
|
|
|
{
|
2018-02-22 09:30:41 +02:00
|
|
|
ut_ad(this == &purge_sys);
|
MDEV-16678 Prefer MDL to dict_sys.latch for innodb background tasks
This is joint work with Thirunarayanan Balathandayuthapani.
The MDL interface between InnoDB and the rest of the server
(in storage/innobase/dict/dict0dict.cc and in include/)
is my work, while most everything else is Thiru's.
The collection of InnoDB persistent statistics and the
defragmentation were not refactored to use MDL. They will
keep relying on lower-level interlocking with
fil_check_pending_operations().
The purge of transaction history and the background operations on
fulltext indexes will use MDL. We will revert
commit 2c4844c9e76427525e8c39a2d72686085efe89c3
(MDEV-17813) because thanks to MDL, purge cannot conflict
with DDL operations anymore. For a similar reason, we will remove
the MDEV-16222 test case from gcol.innodb_virtual_debug_purge.
Purge is essentially replacing all use of the global dict_sys.latch
with MDL. Purge will skip the undo log records for tables whose names
start with #sql-ib or #sql2. Theoretically, such tables might
be renamed back to visible table names if TRUNCATE fails to
create a new table, or the final rename in ALTER TABLE...ALGORITHM=COPY
fails. In that case, purge could permanently leave some garbage
in the table. Such garbage will be tolerated; the table would not
be considered corrupted.
To avoid repeated MDL releases and acquisitions,
trx_purge_attach_undo_recs() will sort undo log records by table_id,
and purge_node_t will keep the MDL and table handle open for multiple
successive undo log records.
get_purge_table(): A new accessor, used during the purge of
history for indexed virtual columns. This interface should ideally
not exist at all.
thd_mdl_context(): Accessor of THD::mdl_context.
Wrapped in a new thd_mdl_service.
dict_get_db_name_len(): Define inline.
dict_acquire_mdl_shared(): Acquire explicit shared MDL on a table name
if needed.
dict_table_open_on_id(): Return MDL_ticket, if requested.
dict_table_close(): Release MDL ticket, if requested.
dict_fts_index_syncing(), dict_index_t::index_fts_syncing: Remove.
row_drop_table_for_mysql() no longer needs to check these, because
MDL guarantees that a fulltext index sync will not be in progress
while MDL_EXCLUSIVE is protecting a DDL operation.
dict_table_t::parse_name(): Parse the table name for acquiring MDL.
purge_node_t::undo_recs: Change the type to std::list<trx_purge_rec_t*>
(different container, and storing also roll_ptr).
purge_node_t: Add mdl_ticket, last_table_id, purge_thd, mdl_hold_recs
for acquiring MDL and for keeping the table open across multiple
undo log records.
purge_vcol_info_t, row_purge_store_vsec_cur(), row_purge_restore_vsec_cur():
Remove. We will acquire the MDL earlier.
purge_sys_t::heap: Added, for reading undo log records.
fts_sync_during_ddl(): Invoked during ALGORITHM=INPLACE operations
to ensure that fts_sync_table() will not conflict with MDL_EXCLUSIVE.
Uses fts_t::sync_message for bookkeeping.
2019-12-10 15:42:50 +02:00
|
|
|
ut_ad(!heap);
|
2018-05-15 14:39:50 +03:00
|
|
|
ut_ad(!enabled());
|
|
|
|
m_paused= 0;
|
2018-02-22 09:30:41 +02:00
|
|
|
query= purge_graph_build();
|
|
|
|
next_stored= false;
|
|
|
|
rseg= NULL;
|
|
|
|
page_no= 0;
|
|
|
|
offset= 0;
|
|
|
|
hdr_page_no= 0;
|
|
|
|
hdr_offset= 0;
|
|
|
|
rw_lock_create(trx_purge_latch_key, &latch, SYNC_PURGE_LATCH);
|
|
|
|
mutex_create(LATCH_ID_PURGE_SYS_PQ, &pq_mutex);
|
2018-09-10 18:01:54 +03:00
|
|
|
truncate.current= NULL;
|
|
|
|
truncate.last= NULL;
|
MDEV-16678 Prefer MDL to dict_sys.latch for innodb background tasks
This is joint work with Thirunarayanan Balathandayuthapani.
The MDL interface between InnoDB and the rest of the server
(in storage/innobase/dict/dict0dict.cc and in include/)
is my work, while most everything else is Thiru's.
The collection of InnoDB persistent statistics and the
defragmentation were not refactored to use MDL. They will
keep relying on lower-level interlocking with
fil_check_pending_operations().
The purge of transaction history and the background operations on
fulltext indexes will use MDL. We will revert
commit 2c4844c9e76427525e8c39a2d72686085efe89c3
(MDEV-17813) because thanks to MDL, purge cannot conflict
with DDL operations anymore. For a similar reason, we will remove
the MDEV-16222 test case from gcol.innodb_virtual_debug_purge.
Purge is essentially replacing all use of the global dict_sys.latch
with MDL. Purge will skip the undo log records for tables whose names
start with #sql-ib or #sql2. Theoretically, such tables might
be renamed back to visible table names if TRUNCATE fails to
create a new table, or the final rename in ALTER TABLE...ALGORITHM=COPY
fails. In that case, purge could permanently leave some garbage
in the table. Such garbage will be tolerated; the table would not
be considered corrupted.
To avoid repeated MDL releases and acquisitions,
trx_purge_attach_undo_recs() will sort undo log records by table_id,
and purge_node_t will keep the MDL and table handle open for multiple
successive undo log records.
get_purge_table(): A new accessor, used during the purge of
history for indexed virtual columns. This interface should ideally
not exist at all.
thd_mdl_context(): Accessor of THD::mdl_context.
Wrapped in a new thd_mdl_service.
dict_get_db_name_len(): Define inline.
dict_acquire_mdl_shared(): Acquire explicit shared MDL on a table name
if needed.
dict_table_open_on_id(): Return MDL_ticket, if requested.
dict_table_close(): Release MDL ticket, if requested.
dict_fts_index_syncing(), dict_index_t::index_fts_syncing: Remove.
row_drop_table_for_mysql() no longer needs to check these, because
MDL guarantees that a fulltext index sync will not be in progress
while MDL_EXCLUSIVE is protecting a DDL operation.
dict_table_t::parse_name(): Parse the table name for acquiring MDL.
purge_node_t::undo_recs: Change the type to std::list<trx_purge_rec_t*>
(different container, and storing also roll_ptr).
purge_node_t: Add mdl_ticket, last_table_id, purge_thd, mdl_hold_recs
for acquiring MDL and for keeping the table open across multiple
undo log records.
purge_vcol_info_t, row_purge_store_vsec_cur(), row_purge_restore_vsec_cur():
Remove. We will acquire the MDL earlier.
purge_sys_t::heap: Added, for reading undo log records.
fts_sync_during_ddl(): Invoked during ALGORITHM=INPLACE operations
to ensure that fts_sync_table() will not conflict with MDL_EXCLUSIVE.
Uses fts_t::sync_message for bookkeeping.
2019-12-10 15:42:50 +02:00
|
|
|
heap= mem_heap_create(4096);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
2018-02-22 09:30:41 +02:00
|
|
|
/** Close the purge subsystem on shutdown. */
|
|
|
|
void purge_sys_t::close()
|
2014-02-26 19:11:54 +01:00
|
|
|
{
|
2019-11-13 18:14:44 +01:00
|
|
|
ut_ad(this == &purge_sys);
|
MDEV-16678 Prefer MDL to dict_sys.latch for innodb background tasks
This is joint work with Thirunarayanan Balathandayuthapani.
The MDL interface between InnoDB and the rest of the server
(in storage/innobase/dict/dict0dict.cc and in include/)
is my work, while most everything else is Thiru's.
The collection of InnoDB persistent statistics and the
defragmentation were not refactored to use MDL. They will
keep relying on lower-level interlocking with
fil_check_pending_operations().
The purge of transaction history and the background operations on
fulltext indexes will use MDL. We will revert
commit 2c4844c9e76427525e8c39a2d72686085efe89c3
(MDEV-17813) because thanks to MDL, purge cannot conflict
with DDL operations anymore. For a similar reason, we will remove
the MDEV-16222 test case from gcol.innodb_virtual_debug_purge.
Purge is essentially replacing all use of the global dict_sys.latch
with MDL. Purge will skip the undo log records for tables whose names
start with #sql-ib or #sql2. Theoretically, such tables might
be renamed back to visible table names if TRUNCATE fails to
create a new table, or the final rename in ALTER TABLE...ALGORITHM=COPY
fails. In that case, purge could permanently leave some garbage
in the table. Such garbage will be tolerated; the table would not
be considered corrupted.
To avoid repeated MDL releases and acquisitions,
trx_purge_attach_undo_recs() will sort undo log records by table_id,
and purge_node_t will keep the MDL and table handle open for multiple
successive undo log records.
get_purge_table(): A new accessor, used during the purge of
history for indexed virtual columns. This interface should ideally
not exist at all.
thd_mdl_context(): Accessor of THD::mdl_context.
Wrapped in a new thd_mdl_service.
dict_get_db_name_len(): Define inline.
dict_acquire_mdl_shared(): Acquire explicit shared MDL on a table name
if needed.
dict_table_open_on_id(): Return MDL_ticket, if requested.
dict_table_close(): Release MDL ticket, if requested.
dict_fts_index_syncing(), dict_index_t::index_fts_syncing: Remove.
row_drop_table_for_mysql() no longer needs to check these, because
MDL guarantees that a fulltext index sync will not be in progress
while MDL_EXCLUSIVE is protecting a DDL operation.
dict_table_t::parse_name(): Parse the table name for acquiring MDL.
purge_node_t::undo_recs: Change the type to std::list<trx_purge_rec_t*>
(different container, and storing also roll_ptr).
purge_node_t: Add mdl_ticket, last_table_id, purge_thd, mdl_hold_recs
for acquiring MDL and for keeping the table open across multiple
undo log records.
purge_vcol_info_t, row_purge_store_vsec_cur(), row_purge_restore_vsec_cur():
Remove. We will acquire the MDL earlier.
purge_sys_t::heap: Added, for reading undo log records.
fts_sync_during_ddl(): Invoked during ALGORITHM=INPLACE operations
to ensure that fts_sync_table() will not conflict with MDL_EXCLUSIVE.
Uses fts_t::sync_message for bookkeeping.
2019-12-10 15:42:50 +02:00
|
|
|
if (!heap)
|
2019-10-29 22:37:12 +01:00
|
|
|
return;
|
2018-05-15 14:39:50 +03:00
|
|
|
|
2018-12-27 18:32:09 +04:00
|
|
|
ut_ad(!enabled());
|
2018-05-15 14:39:50 +03:00
|
|
|
trx_t* trx = query->trx;
|
|
|
|
que_graph_free(query);
|
|
|
|
ut_ad(!trx->id);
|
|
|
|
ut_ad(trx->state == TRX_STATE_ACTIVE);
|
|
|
|
trx->state= TRX_STATE_NOT_STARTED;
|
2020-08-21 19:18:34 +03:00
|
|
|
trx->free();
|
2018-05-15 14:39:50 +03:00
|
|
|
rw_lock_free(&latch);
|
|
|
|
mutex_free(&pq_mutex);
|
MDEV-16678 Prefer MDL to dict_sys.latch for innodb background tasks
This is joint work with Thirunarayanan Balathandayuthapani.
The MDL interface between InnoDB and the rest of the server
(in storage/innobase/dict/dict0dict.cc and in include/)
is my work, while most everything else is Thiru's.
The collection of InnoDB persistent statistics and the
defragmentation were not refactored to use MDL. They will
keep relying on lower-level interlocking with
fil_check_pending_operations().
The purge of transaction history and the background operations on
fulltext indexes will use MDL. We will revert
commit 2c4844c9e76427525e8c39a2d72686085efe89c3
(MDEV-17813) because thanks to MDL, purge cannot conflict
with DDL operations anymore. For a similar reason, we will remove
the MDEV-16222 test case from gcol.innodb_virtual_debug_purge.
Purge is essentially replacing all use of the global dict_sys.latch
with MDL. Purge will skip the undo log records for tables whose names
start with #sql-ib or #sql2. Theoretically, such tables might
be renamed back to visible table names if TRUNCATE fails to
create a new table, or the final rename in ALTER TABLE...ALGORITHM=COPY
fails. In that case, purge could permanently leave some garbage
in the table. Such garbage will be tolerated; the table would not
be considered corrupted.
To avoid repeated MDL releases and acquisitions,
trx_purge_attach_undo_recs() will sort undo log records by table_id,
and purge_node_t will keep the MDL and table handle open for multiple
successive undo log records.
get_purge_table(): A new accessor, used during the purge of
history for indexed virtual columns. This interface should ideally
not exist at all.
thd_mdl_context(): Accessor of THD::mdl_context.
Wrapped in a new thd_mdl_service.
dict_get_db_name_len(): Define inline.
dict_acquire_mdl_shared(): Acquire explicit shared MDL on a table name
if needed.
dict_table_open_on_id(): Return MDL_ticket, if requested.
dict_table_close(): Release MDL ticket, if requested.
dict_fts_index_syncing(), dict_index_t::index_fts_syncing: Remove.
row_drop_table_for_mysql() no longer needs to check these, because
MDL guarantees that a fulltext index sync will not be in progress
while MDL_EXCLUSIVE is protecting a DDL operation.
dict_table_t::parse_name(): Parse the table name for acquiring MDL.
purge_node_t::undo_recs: Change the type to std::list<trx_purge_rec_t*>
(different container, and storing also roll_ptr).
purge_node_t: Add mdl_ticket, last_table_id, purge_thd, mdl_hold_recs
for acquiring MDL and for keeping the table open across multiple
undo log records.
purge_vcol_info_t, row_purge_store_vsec_cur(), row_purge_restore_vsec_cur():
Remove. We will acquire the MDL earlier.
purge_sys_t::heap: Added, for reading undo log records.
fts_sync_during_ddl(): Invoked during ALGORITHM=INPLACE operations
to ensure that fts_sync_table() will not conflict with MDL_EXCLUSIVE.
Uses fts_t::sync_message for bookkeeping.
2019-12-10 15:42:50 +02:00
|
|
|
mem_heap_free(heap);
|
|
|
|
heap= nullptr;
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*================ UNDO LOG HISTORY LIST =============================*/
|
|
|
|
|
2017-08-11 12:47:54 +03:00
|
|
|
/** Prepend the history list with an undo log.
|
|
|
|
Remove the undo log segment from the rseg slot if it is too big for reuse.
|
|
|
|
@param[in] trx transaction
|
|
|
|
@param[in,out] undo undo log
|
|
|
|
@param[in,out] mtr mini-transaction */
|
2014-02-26 19:11:54 +01:00
|
|
|
void
|
2017-08-11 12:47:54 +03:00
|
|
|
trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr)
|
2014-02-26 19:11:54 +01:00
|
|
|
{
|
2018-02-20 15:10:03 +02:00
|
|
|
DBUG_PRINT("trx", ("commit(" TRX_ID_FMT "," TRX_ID_FMT ")",
|
2020-05-27 08:50:24 +03:00
|
|
|
trx->id, trx_id_t{trx->rw_trx_hash_element->no}));
|
2021-06-21 12:34:07 +03:00
|
|
|
ut_ad(undo == trx->rsegs.m_redo.undo);
|
2017-08-11 12:47:54 +03:00
|
|
|
trx_rseg_t* rseg = trx->rsegs.m_redo.rseg;
|
|
|
|
ut_ad(undo->rseg == rseg);
|
2019-12-03 10:19:45 +02:00
|
|
|
buf_block_t* rseg_header = trx_rsegf_get(
|
MDEV-12219 Discard temporary undo logs at transaction commit
Starting with MySQL 5.7, temporary tables in InnoDB are handled
differently from persistent tables. Because temporary tables are
private to a connection, concurrency control and multi-versioning
(MVCC) are not applicable. For performance reasons, purge is
disabled as well. Rollback is supported for temporary tables;
that is why we have the temporary undo logs in the first place.
Because MVCC and purge are disabled for temporary tables, we should
discard all temporary undo logs already at transaction commit,
just like we discard the persistent insert_undo logs. Before this
change, update_undo logs were being preserved.
trx_temp_undo_t: A wrapper for temporary undo logs, comprising
a rollback segment and a single temporary undo log.
trx_rsegs_t::m_noredo: Use trx_temp_undo_t.
(Instead of insert_undo, update_undo, there will be a single undo.)
trx_is_noredo_rseg_updated(), trx_is_rseg_assigned(): Remove.
trx_undo_add_page(): Remove the parameter undo_ptr.
Acquire and release the rollback segment mutex inside the function.
trx_undo_free_last_page(): Remove the parameter trx.
trx_undo_truncate_end(): Remove the parameter trx, and add the
parameter is_temp. Clean up the code a bit.
trx_undo_assign_undo(): Split the parameter undo_ptr into rseg, undo.
trx_undo_commit_cleanup(): Renamed from trx_undo_insert_cleanup().
Replace the parameter undo_ptr with undo.
This will discard the temporary undo or insert_undo log at
commit/rollback.
trx_purge_add_update_undo_to_history(), trx_undo_update_cleanup():
Remove 3 parameters. Always operate on the persistent update_undo.
trx_serialise(): Renamed from trx_serialisation_number_get().
trx_write_serialisation_history(): Simplify the code flow.
If there are no persistent changes, do not update MONITOR_TRX_COMMIT_UNDO.
trx_commit_in_memory(): Simplify the logic, and add assertions.
trx_undo_page_report_modify(): Keep a direct reference to the
persistent update_undo log.
trx_undo_report_row_operation(): Simplify some code.
Always assign TRX_UNDO_INSERT for temporary undo logs.
trx_prepare_low(): Keep only one parameter. Prepare all 3 undo logs.
trx_roll_try_truncate(): Remove the parameter undo_ptr.
Try to truncate all 3 undo logs of the transaction.
trx_roll_pop_top_rec_of_trx_low(): Remove.
trx_roll_pop_top_rec_of_trx(): Remove the redundant parameter
trx->roll_limit. Clear roll_limit when exhausting the undo logs.
Consider all 3 undo logs at once, prioritizing the persistent
undo logs.
row_undo(): Minor cleanup. Let trx_roll_pop_top_rec_of_trx()
reset the trx->roll_limit.
2017-03-09 23:20:51 +02:00
|
|
|
rseg->space, rseg->page_no, mtr);
|
2019-12-03 10:19:45 +02:00
|
|
|
buf_block_t* undo_page = trx_undo_set_state_at_finish(
|
2017-08-11 12:47:54 +03:00
|
|
|
undo, mtr);
|
2019-12-03 10:19:45 +02:00
|
|
|
trx_ulogf_t* undo_header = undo_page->frame + undo->hdr_offset;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2017-08-15 17:18:55 +03:00
|
|
|
ut_ad(mach_read_from_2(undo_header + TRX_UNDO_NEEDS_PURGE) <= 1);
|
|
|
|
|
2019-12-03 10:19:45 +02:00
|
|
|
if (UNIV_UNLIKELY(mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT
|
|
|
|
+ rseg_header->frame))) {
|
2018-02-20 15:10:03 +02:00
|
|
|
/* This database must have been upgraded from
|
|
|
|
before MariaDB 10.3.5. */
|
|
|
|
trx_rseg_format_upgrade(rseg_header, mtr);
|
|
|
|
}
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
if (undo->state != TRX_UNDO_CACHED) {
|
|
|
|
/* The undo log segment will not be reused */
|
2018-02-20 15:10:03 +02:00
|
|
|
ut_a(undo->id < TRX_RSEG_N_SLOTS);
|
2019-12-03 10:19:45 +02:00
|
|
|
compile_time_assert(FIL_NULL == 0xffffffff);
|
2019-12-03 10:26:53 +02:00
|
|
|
mtr->memset(rseg_header,
|
|
|
|
TRX_RSEG + TRX_RSEG_UNDO_SLOTS
|
|
|
|
+ undo->id * TRX_RSEG_SLOT_SIZE, 4, 0xff);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_USED);
|
|
|
|
|
2019-03-19 11:09:10 +02:00
|
|
|
uint32_t hist_size = mach_read_from_4(TRX_RSEG_HISTORY_SIZE
|
2019-12-03 10:19:45 +02:00
|
|
|
+ TRX_RSEG
|
|
|
|
+ rseg_header->frame);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2019-03-19 11:09:10 +02:00
|
|
|
ut_ad(undo->size == flst_get_len(TRX_UNDO_SEG_HDR
|
|
|
|
+ TRX_UNDO_PAGE_LIST
|
2019-12-03 10:19:45 +02:00
|
|
|
+ undo_page->frame));
|
|
|
|
|
|
|
|
mtr->write<4>(*rseg_header, TRX_RSEG + TRX_RSEG_HISTORY_SIZE
|
|
|
|
+ rseg_header->frame,
|
|
|
|
hist_size + undo->size);
|
|
|
|
mtr->write<8>(*rseg_header, TRX_RSEG + TRX_RSEG_MAX_TRX_ID
|
|
|
|
+ rseg_header->frame,
|
|
|
|
trx_sys.get_max_trx_id());
|
MDEV-15443 Properly read wsrep XID and binlog position from rollback segment headers
The problem is a regression caused by MDEV-15158.
If some transactions were committed with wsrep_on=0, a
rollback segment header having the highest trx_id assigned might
store undefined wsrep XID. When reading the wsrep checkpoint
from InnodB, the undefined wsrep XID might be returned instead
of the highest valid one.
Similarly, if the binary log is intermittently disabled or enabled
while InnoDB transactions are being committed, the latest updated
rollback segment header page might not contain the latest binlog metadata.
Therefore, the MDEV-15158 logic to rely on TRX_RSEG_MAX_TRX_ID for
determining the most recent WSREP XID or binlog position is invalid.
We must choose the maximum entries among the rollback segment header
pages.
This fix is based on code submitted by Teemu Ollakka from Codership
and by Thirunarayanan Balathandayuthapani from MariaDB Corporation.
trx_purge_add_undo_to_history(): Only write TRX_RSEG_MAX_TRX_ID
when it was used to be written before MDEV-15158.
wsrep_seqno: Renamed from trx_sys_cur_xid_seqno.
wsrep_uuid: Renamed from trx_sys_cur_xid_uuid, and enable in non-debug
builds.
read_wsrep_xid_uuid(): Make non-debug, and remove the memcpy().
trx_rseg_update_wsrep_checkpoint(): Correctly compare and copy
the entire UUID in the debug check. In case of UUID mismatch,
write the WSREP XID to all 128 rollback segment headers in
a single mini-transaction.
trx_rseg_read_wsrep_checkpoint(rseg_header, xid): Make static.
In case the information is absent, do not overwrite xid.
trx_rseg_read_wsrep_checkpoint(xid): Determine the maximum
WSREP XID.
trx_rseg_mem_restore(): Remove the parameter max_rseg_trx_id.
Determine the latest binlog file and position by comparing
file names and offsets. Declare trx_sys.recovered_binlog_offset
as an unsigned type.
2018-03-06 23:29:38 +02:00
|
|
|
}
|
2018-02-20 15:10:03 +02:00
|
|
|
|
2018-08-21 12:01:44 +03:00
|
|
|
/* After the purge thread has been given permission to exit,
|
|
|
|
we may roll back transactions (trx->undo_no==0)
|
|
|
|
in THD::cleanup() invoked from unlink_thd() in fast shutdown,
|
2019-09-05 15:57:39 +03:00
|
|
|
or in trx_rollback_recovered() in slow shutdown.
|
2018-08-21 12:01:44 +03:00
|
|
|
|
|
|
|
Before any transaction-generating background threads or the
|
MDEV-23399: Performance regression with write workloads
The buffer pool refactoring in MDEV-15053 and MDEV-22871 shifted
the performance bottleneck to the page flushing.
The configuration parameters will be changed as follows:
innodb_lru_flush_size=32 (new: how many pages to flush on LRU eviction)
innodb_lru_scan_depth=1536 (old: 1024)
innodb_max_dirty_pages_pct=90 (old: 75)
innodb_max_dirty_pages_pct_lwm=75 (old: 0)
Note: The parameter innodb_lru_scan_depth will only affect LRU
eviction of buffer pool pages when a new page is being allocated. The
page cleaner thread will no longer evict any pages. It used to
guarantee that some pages will remain free in the buffer pool. Now, we
perform that eviction 'on demand' in buf_LRU_get_free_block().
The parameter innodb_lru_scan_depth(srv_LRU_scan_depth) is used as follows:
* When the buffer pool is being shrunk in buf_pool_t::withdraw_blocks()
* As a buf_pool.free limit in buf_LRU_list_batch() for terminating
the flushing that is initiated e.g., by buf_LRU_get_free_block()
The parameter also used to serve as an initial limit for unzip_LRU
eviction (evicting uncompressed page frames while retaining
ROW_FORMAT=COMPRESSED pages), but now we will use a hard-coded limit
of 100 or unlimited for invoking buf_LRU_scan_and_free_block().
The status variables will be changed as follows:
innodb_buffer_pool_pages_flushed: This includes also the count of
innodb_buffer_pool_pages_LRU_flushed and should work reliably,
updated one by one in buf_flush_page() to give more real-time
statistics. The function buf_flush_stats(), which we are removing,
was not called in every code path. For both counters, we will use
regular variables that are incremented in a critical section of
buf_pool.mutex. Note that show_innodb_vars() directly links to the
variables, and reads of the counters will *not* be protected by
buf_pool.mutex, so you cannot get a consistent snapshot of both variables.
The following INFORMATION_SCHEMA.INNODB_METRICS counters will be
removed, because the page cleaner no longer deals with writing or
evicting least recently used pages, and because the single-page writes
have been removed:
* buffer_LRU_batch_flush_avg_time_slot
* buffer_LRU_batch_flush_avg_time_thread
* buffer_LRU_batch_flush_avg_time_est
* buffer_LRU_batch_flush_avg_pass
* buffer_LRU_single_flush_scanned
* buffer_LRU_single_flush_num_scan
* buffer_LRU_single_flush_scanned_per_call
When moving to a single buffer pool instance in MDEV-15058, we missed
some opportunity to simplify the buf_flush_page_cleaner thread. It was
unnecessarily using a mutex and some complex data structures, even
though we always have a single page cleaner thread.
Furthermore, the buf_flush_page_cleaner thread had separate 'recovery'
and 'shutdown' modes where it was waiting to be triggered by some
other thread, adding unnecessary latency and potential for hangs in
relatively rarely executed startup or shutdown code.
The page cleaner was also running two kinds of batches in an
interleaved fashion: "LRU flush" (writing out some least recently used
pages and evicting them on write completion) and the normal batches
that aim to increase the MIN(oldest_modification) in the buffer pool,
to help the log checkpoint advance.
The buf_pool.flush_list flushing was being blocked by
buf_block_t::lock for no good reason. Furthermore, if the FIL_PAGE_LSN
of a page is ahead of log_sys.get_flushed_lsn(), that is, what has
been persistently written to the redo log, we would trigger a log
flush and then resume the page flushing. This would unnecessarily
limit the performance of the page cleaner thread and trigger the
infamous messages "InnoDB: page_cleaner: 1000ms intended loop took 4450ms.
The settings might not be optimal" that were suppressed in
commit d1ab89037a518fcffbc50c24e4bd94e4ec33aed0 unless log_warnings>2.
Our revised algorithm will make log_sys.get_flushed_lsn() advance at
the start of buf_flush_lists(), and then execute a 'best effort' to
write out all pages. The flush batches will skip pages that were modified
since the log was written, or are are currently exclusively locked.
The MDEV-13670 message "page_cleaner: 1000ms intended loop took" message
will be removed, because by design, the buf_flush_page_cleaner() should
not be blocked during a batch for extended periods of time.
We will remove the single-page flushing altogether. Related to this,
the debug parameter innodb_doublewrite_batch_size will be removed,
because all of the doublewrite buffer will be used for flushing
batches. If a page needs to be evicted from the buffer pool and all
100 least recently used pages in the buffer pool have unflushed
changes, buf_LRU_get_free_block() will execute buf_flush_lists() to
write out and evict innodb_lru_flush_size pages. At most one thread
will execute buf_flush_lists() in buf_LRU_get_free_block(); other
threads will wait for that LRU flushing batch to finish.
To improve concurrency, we will replace the InnoDB ib_mutex_t and
os_event_t native mutexes and condition variables in this area of code.
Most notably, this means that the buffer pool mutex (buf_pool.mutex)
is no longer instrumented via any InnoDB interfaces. It will continue
to be instrumented via PERFORMANCE_SCHEMA.
For now, both buf_pool.flush_list_mutex and buf_pool.mutex will be
declared with MY_MUTEX_INIT_FAST (PTHREAD_MUTEX_ADAPTIVE_NP). The critical
sections of buf_pool.flush_list_mutex should be shorter than those for
buf_pool.mutex, because in the worst case, they cover a linear scan of
buf_pool.flush_list, while the worst case of a critical section of
buf_pool.mutex covers a linear scan of the potentially much longer
buf_pool.LRU list.
mysql_mutex_is_owner(), safe_mutex_is_owner(): New predicate, usable
with SAFE_MUTEX. Some InnoDB debug assertions need this predicate
instead of mysql_mutex_assert_owner() or mysql_mutex_assert_not_owner().
buf_pool_t::n_flush_LRU, buf_pool_t::n_flush_list:
Replaces buf_pool_t::init_flush[] and buf_pool_t::n_flush[].
The number of active flush operations.
buf_pool_t::mutex, buf_pool_t::flush_list_mutex: Use mysql_mutex_t
instead of ib_mutex_t, to have native mutexes with PERFORMANCE_SCHEMA
and SAFE_MUTEX instrumentation.
buf_pool_t::done_flush_LRU: Condition variable for !n_flush_LRU.
buf_pool_t::done_flush_list: Condition variable for !n_flush_list.
buf_pool_t::do_flush_list: Condition variable to wake up the
buf_flush_page_cleaner when a log checkpoint needs to be written
or the server is being shut down. Replaces buf_flush_event.
We will keep using timed waits (the page cleaner thread will wake
_at least_ once per second), because the calculations for
innodb_adaptive_flushing depend on fixed time intervals.
buf_dblwr: Allocate statically, and move all code to member functions.
Use a native mutex and condition variable. Remove code to deal with
single-page flushing.
buf_dblwr_check_block(): Make the check debug-only. We were spending
a significant amount of execution time in page_simple_validate_new().
flush_counters_t::unzip_LRU_evicted: Remove.
IORequest: Make more members const. FIXME: m_fil_node should be removed.
buf_flush_sync_lsn: Protect by std::atomic, not page_cleaner.mutex
(which we are removing).
page_cleaner_slot_t, page_cleaner_t: Remove many redundant members.
pc_request_flush_slot(): Replaces pc_request() and pc_flush_slot().
recv_writer_thread: Remove. Recovery works just fine without it, if we
simply invoke buf_flush_sync() at the end of each batch in
recv_sys_t::apply().
recv_recovery_from_checkpoint_finish(): Remove. We can simply call
recv_sys.debug_free() directly.
srv_started_redo: Replaces srv_start_state.
SRV_SHUTDOWN_FLUSH_PHASE: Remove. logs_empty_and_mark_files_at_shutdown()
can communicate with the normal page cleaner loop via the new function
flush_buffer_pool().
buf_flush_remove(): Assert that the calling thread is holding
buf_pool.flush_list_mutex. This removes unnecessary mutex operations
from buf_flush_remove_pages() and buf_flush_dirty_pages(),
which replace buf_LRU_flush_or_remove_pages().
buf_flush_lists(): Renamed from buf_flush_batch(), with simplified
interface. Return the number of flushed pages. Clarified comments and
renamed min_n to max_n. Identify LRU batch by lsn=0. Merge all the functions
buf_flush_start(), buf_flush_batch(), buf_flush_end() directly to this
function, which was their only caller, and remove 2 unnecessary
buf_pool.mutex release/re-acquisition that we used to perform around
the buf_flush_batch() call. At the start, if not all log has been
durably written, wait for a background task to do it, or start a new
task to do it. This allows the log write to run concurrently with our
page flushing batch. Any pages that were skipped due to too recent
FIL_PAGE_LSN or due to them being latched by a writer should be flushed
during the next batch, unless there are further modifications to those
pages. It is possible that a page that we must flush due to small
oldest_modification also carries a recent FIL_PAGE_LSN or is being
constantly modified. In the worst case, all writers would then end up
waiting in log_free_check() to allow the flushing and the checkpoint
to complete.
buf_do_flush_list_batch(): Clarify comments, and rename min_n to max_n.
Cache the last looked up tablespace. If neighbor flushing is not applicable,
invoke buf_flush_page() directly, avoiding a page lookup in between.
buf_flush_space(): Auxiliary function to look up a tablespace for
page flushing.
buf_flush_page(): Defer the computation of space->full_crc32(). Never
call log_write_up_to(), but instead skip persistent pages whose latest
modification (FIL_PAGE_LSN) is newer than the redo log. Also skip
pages on which we cannot acquire a shared latch without waiting.
buf_flush_try_neighbors(): Do not bother checking buf_fix_count
because buf_flush_page() will no longer wait for the page latch.
Take the tablespace as a parameter, and only execute this function
when innodb_flush_neighbors>0. Avoid repeated calls of page_id_t::fold().
buf_flush_relocate_on_flush_list(): Declare as cold, and push down
a condition from the callers.
buf_flush_check_neighbor(): Take id.fold() as a parameter.
buf_flush_sync(): Ensure that the buf_pool.flush_list is empty,
because the flushing batch will skip pages whose modifications have
not yet been written to the log or were latched for modification.
buf_free_from_unzip_LRU_list_batch(): Remove redundant local variables.
buf_flush_LRU_list_batch(): Let the caller buf_do_LRU_batch() initialize
the counters, and report n->evicted.
Cache the last looked up tablespace. If neighbor flushing is not applicable,
invoke buf_flush_page() directly, avoiding a page lookup in between.
buf_do_LRU_batch(): Return the number of pages flushed.
buf_LRU_free_page(): Only release and re-acquire buf_pool.mutex if
adaptive hash index entries are pointing to the block.
buf_LRU_get_free_block(): Do not wake up the page cleaner, because it
will no longer perform any useful work for us, and we do not want it
to compete for I/O while buf_flush_lists(innodb_lru_flush_size, 0)
writes out and evicts at most innodb_lru_flush_size pages. (The
function buf_do_LRU_batch() may complete after writing fewer pages if
more than innodb_lru_scan_depth pages end up in buf_pool.free list.)
Eliminate some mutex release-acquire cycles, and wait for the LRU
flush batch to complete before rescanning.
buf_LRU_check_size_of_non_data_objects(): Simplify the code.
buf_page_write_complete(): Remove the parameter evict, and always
evict pages that were part of an LRU flush.
buf_page_create(): Take a pre-allocated page as a parameter.
buf_pool_t::free_block(): Free a pre-allocated block.
recv_sys_t::recover_low(), recv_sys_t::apply(): Preallocate the block
while not holding recv_sys.mutex. During page allocation, we may
initiate a page flush, which in turn may initiate a log flush, which
would require acquiring log_sys.mutex, which should always be acquired
before recv_sys.mutex in order to avoid deadlocks. Therefore, we must
not be holding recv_sys.mutex while allocating a buffer pool block.
BtrBulk::logFreeCheck(): Skip a redundant condition.
row_undo_step(): Do not invoke srv_inc_activity_count() for every row
that is being rolled back. It should suffice to invoke the function in
trx_flush_log_if_needed() during trx_t::commit_in_memory() when the
rollback completes.
sync_check_enable(): Remove. We will enable innodb_sync_debug from the
very beginning.
Reviewed by: Vladislav Vaintroub
2020-10-15 12:10:42 +03:00
|
|
|
purge have been started, we can
|
MDEV-13039 innodb_fast_shutdown=0 may fail to purge all undo log
When a slow shutdown is performed soon after spawning some work for
background threads that can create or commit transactions, it is possible
that new transactions are started or committed after the purge has finished.
This is violating the specification of innodb_fast_shutdown=0, namely that
the purge must be completed. (None of the history of the recent transactions
would be purged.)
Also, it is possible that the purge threads would exit in slow shutdown
while there exist active transactions, such as recovered incomplete
transactions that are being rolled back. Thus, the slow shutdown could
fail to purge some undo log that becomes purgeable after the transaction
commit or rollback.
srv_undo_sources: A flag that indicates if undo log can be generated
or the persistent, whether by background threads or by user SQL.
Even when this flag is clear, active transactions that already exist
in the system may be committed or rolled back.
innodb_shutdown(): Renamed from innobase_shutdown_for_mysql().
Do not return an error code; the operation never fails.
Clear the srv_undo_sources flag, and also ensure that the background
DROP TABLE queue is empty.
srv_purge_should_exit(): Do not allow the purge to exit if
srv_undo_sources are active or the background DROP TABLE queue is not
empty, or in slow shutdown, if any active transactions exist
(and are being rolled back).
srv_purge_coordinator_thread(): Remove some previous workarounds
for this bug.
innobase_start_or_create_for_mysql(): Set buf_page_cleaner_is_active
and srv_dict_stats_thread_active directly. Set srv_undo_sources before
starting the purge subsystem, to prevent immediate shutdown of the purge.
Create dict_stats_thread and fts_optimize_thread immediately
after setting srv_undo_sources, so that shutdown can use this flag to
determine if these subsystems were started.
dict_stats_shutdown(): Shut down dict_stats_thread. Backported from 10.2.
srv_shutdown_table_bg_threads(): Remove (unused).
2017-06-08 15:43:06 +03:00
|
|
|
start transactions in row_merge_drop_temp_indexes() and
|
|
|
|
fts_drop_orphaned_tables(), and roll back recovered transactions.
|
2017-06-23 09:46:51 +03:00
|
|
|
|
|
|
|
Arbitrary user transactions may be executed when all the undo log
|
|
|
|
related background processes (including purge) are disabled due to
|
|
|
|
innodb_force_recovery=2 or innodb_force_recovery=3.
|
|
|
|
DROP TABLE may be executed at any innodb_force_recovery level.
|
|
|
|
|
2018-08-21 15:20:34 +03:00
|
|
|
During fast shutdown, we may also continue to execute
|
|
|
|
user transactions. */
|
MDEV-13039 innodb_fast_shutdown=0 may fail to purge all undo log
When a slow shutdown is performed soon after spawning some work for
background threads that can create or commit transactions, it is possible
that new transactions are started or committed after the purge has finished.
This is violating the specification of innodb_fast_shutdown=0, namely that
the purge must be completed. (None of the history of the recent transactions
would be purged.)
Also, it is possible that the purge threads would exit in slow shutdown
while there exist active transactions, such as recovered incomplete
transactions that are being rolled back. Thus, the slow shutdown could
fail to purge some undo log that becomes purgeable after the transaction
commit or rollback.
srv_undo_sources: A flag that indicates if undo log can be generated
or the persistent, whether by background threads or by user SQL.
Even when this flag is clear, active transactions that already exist
in the system may be committed or rolled back.
innodb_shutdown(): Renamed from innobase_shutdown_for_mysql().
Do not return an error code; the operation never fails.
Clear the srv_undo_sources flag, and also ensure that the background
DROP TABLE queue is empty.
srv_purge_should_exit(): Do not allow the purge to exit if
srv_undo_sources are active or the background DROP TABLE queue is not
empty, or in slow shutdown, if any active transactions exist
(and are being rolled back).
srv_purge_coordinator_thread(): Remove some previous workarounds
for this bug.
innobase_start_or_create_for_mysql(): Set buf_page_cleaner_is_active
and srv_dict_stats_thread_active directly. Set srv_undo_sources before
starting the purge subsystem, to prevent immediate shutdown of the purge.
Create dict_stats_thread and fts_optimize_thread immediately
after setting srv_undo_sources, so that shutdown can use this flag to
determine if these subsystems were started.
dict_stats_shutdown(): Shut down dict_stats_thread. Backported from 10.2.
srv_shutdown_table_bg_threads(): Remove (unused).
2017-06-08 15:43:06 +03:00
|
|
|
ut_ad(srv_undo_sources
|
2018-08-21 12:01:44 +03:00
|
|
|
|| trx->undo_no == 0
|
2018-05-15 14:39:50 +03:00
|
|
|
|| (!purge_sys.enabled()
|
2018-11-06 09:40:39 +02:00
|
|
|
&& (srv_is_being_started
|
2018-05-15 14:39:50 +03:00
|
|
|
|| trx_rollback_is_active
|
|
|
|
|| srv_force_recovery >= SRV_FORCE_NO_BACKGROUND))
|
2018-08-28 12:22:56 +03:00
|
|
|
|| ((trx->mysql_thd || trx->internal)
|
2017-08-08 19:54:12 +03:00
|
|
|
&& srv_fast_shutdown));
|
2016-09-06 09:43:16 +03:00
|
|
|
|
2018-02-20 15:10:03 +02:00
|
|
|
#ifdef WITH_WSREP
|
|
|
|
if (wsrep_is_wsrep_xid(trx->xid)) {
|
|
|
|
trx_rseg_update_wsrep_checkpoint(rseg_header, trx->xid, mtr);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (trx->mysql_log_file_name && *trx->mysql_log_file_name) {
|
|
|
|
/* Update the latest MySQL binlog name and offset info
|
|
|
|
in rollback segment header if MySQL binlogging is on
|
|
|
|
or the database server is a MySQL replication save. */
|
|
|
|
trx_rseg_update_binlog_offset(rseg_header, trx, mtr);
|
|
|
|
}
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/* Add the log as the first in the history list */
|
MDEV-21907: InnoDB: Enable -Wconversion on clang and GCC
The -Wconversion in GCC seems to be stricter than in clang.
GCC at least since version 4.4.7 issues truncation warnings for
assignments to bitfields, while clang 10 appears to only issue
warnings when the sizes in bytes rounded to the nearest integer
powers of 2 are different.
Before GCC 10.0.0, -Wconversion required more casts and would not
allow some operations, such as x<<=1 or x+=1 on a data type that
is narrower than int.
GCC 5 (but not GCC 4, GCC 6, or any later version) is complaining
about x|=y even when x and y are compatible types that are narrower
than int. Hence, we must rewrite some x|=y as
x=static_cast<byte>(x|y) or similar, or we must disable -Wconversion.
In GCC 6 and later, the warning for assigning wider to bitfields
that are narrower than 8, 16, or 32 bits can be suppressed by
applying a bitwise & with the exact bitmask of the bitfield.
For older GCC, we must disable -Wconversion for GCC 4 or 5 in such
cases.
The bitwise negation operator appears to promote short integers
to a wider type, and hence we must add explicit truncation casts
around them. Microsoft Visual C does not allow a static_cast to
truncate a constant, such as static_cast<byte>(1) truncating int.
Hence, we will use the constructor-style cast byte(~1) for such cases.
This has been tested at least with GCC 4.8.5, 5.4.0, 7.4.0, 9.2.1, 10.0.0,
clang 9.0.1, 10.0.0, and MSVC 14.22.27905 (Microsoft Visual Studio 2019)
on 64-bit and 32-bit targets (IA-32, AMD64, POWER 8, POWER 9, ARMv8).
2020-03-12 19:46:41 +02:00
|
|
|
flst_add_first(rseg_header, TRX_RSEG + TRX_RSEG_HISTORY, undo_page,
|
|
|
|
static_cast<uint16_t>(undo->hdr_offset
|
|
|
|
+ TRX_UNDO_HISTORY_NODE), mtr);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2020-04-02 19:34:34 +03:00
|
|
|
mtr->write<8,mtr_t::MAYBE_NOP>(*undo_page,
|
2020-05-25 22:25:03 +04:00
|
|
|
undo_header + TRX_UNDO_TRX_NO,
|
|
|
|
trx->rw_trx_hash_element->no);
|
2021-06-21 14:22:22 +03:00
|
|
|
mtr->write<2,mtr_t::MAYBE_NOP>(*undo_page, undo_header
|
|
|
|
+ TRX_UNDO_NEEDS_PURGE, 1U);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
if (rseg->last_page_no == FIL_NULL) {
|
|
|
|
rseg->last_page_no = undo->hdr_page_no;
|
2021-06-21 14:22:22 +03:00
|
|
|
rseg->set_last_commit(undo->hdr_offset,
|
|
|
|
trx->rw_trx_hash_element->no);
|
2017-08-15 17:18:55 +03:00
|
|
|
rseg->needs_purge = true;
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
2017-08-11 12:47:54 +03:00
|
|
|
|
2018-10-25 17:37:16 +04:00
|
|
|
trx_sys.rseg_history_len++;
|
2018-02-20 10:02:42 +02:00
|
|
|
|
2017-08-11 12:47:54 +03:00
|
|
|
if (undo->state == TRX_UNDO_CACHED) {
|
|
|
|
UT_LIST_ADD_FIRST(rseg->undo_cached, undo);
|
|
|
|
MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
|
|
|
|
} else {
|
|
|
|
ut_ad(undo->state == TRX_UNDO_TO_PURGE);
|
2018-02-20 10:16:52 +02:00
|
|
|
ut_free(undo);
|
2017-08-11 12:47:54 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
undo = NULL;
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
2017-01-04 18:16:37 +02:00
|
|
|
/** Remove undo log header from the history list.
|
2019-12-03 10:19:45 +02:00
|
|
|
@param[in,out] rseg rollback segment header page
|
|
|
|
@param[in] log undo log segment header page
|
|
|
|
@param[in] offset byte offset in the undo log segment header page
|
|
|
|
@param[in,out] mtr mini-transaction */
|
|
|
|
static void trx_purge_remove_log_hdr(buf_block_t *rseg, buf_block_t* log,
|
|
|
|
uint16_t offset, mtr_t *mtr)
|
2017-01-04 18:16:37 +02:00
|
|
|
{
|
2019-12-03 10:19:45 +02:00
|
|
|
flst_remove(rseg, TRX_RSEG + TRX_RSEG_HISTORY,
|
MDEV-21907: InnoDB: Enable -Wconversion on clang and GCC
The -Wconversion in GCC seems to be stricter than in clang.
GCC at least since version 4.4.7 issues truncation warnings for
assignments to bitfields, while clang 10 appears to only issue
warnings when the sizes in bytes rounded to the nearest integer
powers of 2 are different.
Before GCC 10.0.0, -Wconversion required more casts and would not
allow some operations, such as x<<=1 or x+=1 on a data type that
is narrower than int.
GCC 5 (but not GCC 4, GCC 6, or any later version) is complaining
about x|=y even when x and y are compatible types that are narrower
than int. Hence, we must rewrite some x|=y as
x=static_cast<byte>(x|y) or similar, or we must disable -Wconversion.
In GCC 6 and later, the warning for assigning wider to bitfields
that are narrower than 8, 16, or 32 bits can be suppressed by
applying a bitwise & with the exact bitmask of the bitfield.
For older GCC, we must disable -Wconversion for GCC 4 or 5 in such
cases.
The bitwise negation operator appears to promote short integers
to a wider type, and hence we must add explicit truncation casts
around them. Microsoft Visual C does not allow a static_cast to
truncate a constant, such as static_cast<byte>(1) truncating int.
Hence, we will use the constructor-style cast byte(~1) for such cases.
This has been tested at least with GCC 4.8.5, 5.4.0, 7.4.0, 9.2.1, 10.0.0,
clang 9.0.1, 10.0.0, and MSVC 14.22.27905 (Microsoft Visual Studio 2019)
on 64-bit and 32-bit targets (IA-32, AMD64, POWER 8, POWER 9, ARMv8).
2020-03-12 19:46:41 +02:00
|
|
|
log, static_cast<uint16_t>(offset + TRX_UNDO_HISTORY_NODE), mtr);
|
2019-12-03 10:19:45 +02:00
|
|
|
trx_sys.rseg_history_len--;
|
2017-01-04 18:16:37 +02:00
|
|
|
}
|
|
|
|
|
MDEV-12289 Keep 128 persistent rollback segments for compatibility and performance
InnoDB divides the allocation of undo logs into rollback segments.
The DB_ROLL_PTR system column of clustered indexes can address up to
128 rollback segments (TRX_SYS_N_RSEGS). Originally, InnoDB only
created one rollback segment. In MySQL 5.5 or in the InnoDB Plugin
for MySQL 5.1, all 128 rollback segments were created.
MySQL 5.7 hard-codes the rollback segment IDs 1..32 for temporary undo logs.
On upgrade, unless a slow shutdown (innodb_fast_shutdown=0)
was performed on the old server instance, these rollback segments
could be in use by transactions that are in XA PREPARE state or
transactions that were left behind by a server kill followed by a
normal shutdown immediately after restart.
Persistent tables cannot refer to temporary undo logs or vice versa.
Therefore, we should keep two distinct sets of rollback segments:
one for persistent tables and another for temporary tables. In this way,
all 128 rollback segments will be available for both types of tables,
which could improve performance. Also, MariaDB 10.2 will remain more
compatible than MySQL 5.7 with data files from earlier versions of
MySQL or MariaDB.
trx_sys_t::temp_rsegs[TRX_SYS_N_RSEGS]: A new array of temporary
rollback segments. The trx_sys_t::rseg_array[TRX_SYS_N_RSEGS] will
be solely for persistent undo logs.
srv_tmp_undo_logs. Remove. Use the constant TRX_SYS_N_RSEGS.
srv_available_undo_logs: Change the type to ulong.
trx_rseg_get_on_id(): Remove. Instead, let the callers refer to
trx_sys directly.
trx_rseg_create(), trx_sysf_rseg_find_free(): Remove unneeded parameters.
These functions only deal with persistent undo logs.
trx_temp_rseg_create(): New function, to create all temporary rollback
segments at server startup.
trx_rseg_t::is_persistent(): Determine if the rollback segment is for
persistent tables.
trx_sys_is_noredo_rseg_slot(): Remove. The callers must know based on
context (such as table handle) whether the DB_ROLL_PTR is referring to
a persistent undo log.
trx_sys_create_rsegs(): Remove all parameters, which were always passed
as global variables. Instead, modify the global variables directly.
enum trx_rseg_type_t: Remove.
trx_t::get_temp_rseg(): A method to ensure that a temporary
rollback segment has been assigned for the transaction.
trx_t::assign_temp_rseg(): Replaces trx_assign_rseg().
trx_purge_free_segment(), trx_purge_truncate_rseg_history():
Remove the redundant variable noredo=false.
Temporary undo logs are discarded immediately at transaction commit
or rollback, not lazily by purge.
trx_purge_mark_undo_for_truncate(): Remove references to the
temporary rollback segments.
trx_purge_mark_undo_for_truncate(): Remove a check for temporary
rollback segments. Only the dedicated persistent undo log tablespaces
can be truncated.
trx_undo_get_undo_rec_low(), trx_undo_get_undo_rec(): Add the
parameter is_temp.
trx_rseg_mem_restore(): Split from trx_rseg_mem_create().
Initialize the undo log and the rollback segment from the file
data structures.
trx_sysf_get_n_rseg_slots(): Renamed from
trx_sysf_used_slots_for_redo_rseg(). Count the persistent
rollback segment headers that have been initialized.
trx_sys_close(): Also free trx_sys->temp_rsegs[].
get_next_redo_rseg(): Merged to trx_assign_rseg_low().
trx_assign_rseg_low(): Remove the parameters and access the
global variables directly. Revert to simple round-robin, now that
the whole trx_sys->rseg_array[] is for persistent undo log again.
get_next_noredo_rseg(): Moved to trx_t::assign_temp_rseg().
srv_undo_tablespaces_init(): Remove some parameters and use the
global variables directly. Clarify some error messages.
Adjust the test innodb.log_file. Apparently, before these changes,
InnoDB somehow ignored missing dedicated undo tablespace files that
are pointed by the TRX_SYS header page, possibly losing part of
essential transaction system state.
2017-03-30 13:11:34 +03:00
|
|
|
/** Free an undo log segment, and remove the header from the history list.
|
2017-01-04 18:16:37 +02:00
|
|
|
@param[in,out] rseg rollback segment
|
MDEV-12289 Keep 128 persistent rollback segments for compatibility and performance
InnoDB divides the allocation of undo logs into rollback segments.
The DB_ROLL_PTR system column of clustered indexes can address up to
128 rollback segments (TRX_SYS_N_RSEGS). Originally, InnoDB only
created one rollback segment. In MySQL 5.5 or in the InnoDB Plugin
for MySQL 5.1, all 128 rollback segments were created.
MySQL 5.7 hard-codes the rollback segment IDs 1..32 for temporary undo logs.
On upgrade, unless a slow shutdown (innodb_fast_shutdown=0)
was performed on the old server instance, these rollback segments
could be in use by transactions that are in XA PREPARE state or
transactions that were left behind by a server kill followed by a
normal shutdown immediately after restart.
Persistent tables cannot refer to temporary undo logs or vice versa.
Therefore, we should keep two distinct sets of rollback segments:
one for persistent tables and another for temporary tables. In this way,
all 128 rollback segments will be available for both types of tables,
which could improve performance. Also, MariaDB 10.2 will remain more
compatible than MySQL 5.7 with data files from earlier versions of
MySQL or MariaDB.
trx_sys_t::temp_rsegs[TRX_SYS_N_RSEGS]: A new array of temporary
rollback segments. The trx_sys_t::rseg_array[TRX_SYS_N_RSEGS] will
be solely for persistent undo logs.
srv_tmp_undo_logs. Remove. Use the constant TRX_SYS_N_RSEGS.
srv_available_undo_logs: Change the type to ulong.
trx_rseg_get_on_id(): Remove. Instead, let the callers refer to
trx_sys directly.
trx_rseg_create(), trx_sysf_rseg_find_free(): Remove unneeded parameters.
These functions only deal with persistent undo logs.
trx_temp_rseg_create(): New function, to create all temporary rollback
segments at server startup.
trx_rseg_t::is_persistent(): Determine if the rollback segment is for
persistent tables.
trx_sys_is_noredo_rseg_slot(): Remove. The callers must know based on
context (such as table handle) whether the DB_ROLL_PTR is referring to
a persistent undo log.
trx_sys_create_rsegs(): Remove all parameters, which were always passed
as global variables. Instead, modify the global variables directly.
enum trx_rseg_type_t: Remove.
trx_t::get_temp_rseg(): A method to ensure that a temporary
rollback segment has been assigned for the transaction.
trx_t::assign_temp_rseg(): Replaces trx_assign_rseg().
trx_purge_free_segment(), trx_purge_truncate_rseg_history():
Remove the redundant variable noredo=false.
Temporary undo logs are discarded immediately at transaction commit
or rollback, not lazily by purge.
trx_purge_mark_undo_for_truncate(): Remove references to the
temporary rollback segments.
trx_purge_mark_undo_for_truncate(): Remove a check for temporary
rollback segments. Only the dedicated persistent undo log tablespaces
can be truncated.
trx_undo_get_undo_rec_low(), trx_undo_get_undo_rec(): Add the
parameter is_temp.
trx_rseg_mem_restore(): Split from trx_rseg_mem_create().
Initialize the undo log and the rollback segment from the file
data structures.
trx_sysf_get_n_rseg_slots(): Renamed from
trx_sysf_used_slots_for_redo_rseg(). Count the persistent
rollback segment headers that have been initialized.
trx_sys_close(): Also free trx_sys->temp_rsegs[].
get_next_redo_rseg(): Merged to trx_assign_rseg_low().
trx_assign_rseg_low(): Remove the parameters and access the
global variables directly. Revert to simple round-robin, now that
the whole trx_sys->rseg_array[] is for persistent undo log again.
get_next_noredo_rseg(): Moved to trx_t::assign_temp_rseg().
srv_undo_tablespaces_init(): Remove some parameters and use the
global variables directly. Clarify some error messages.
Adjust the test innodb.log_file. Apparently, before these changes,
InnoDB somehow ignored missing dedicated undo tablespace files that
are pointed by the TRX_SYS header page, possibly losing part of
essential transaction system state.
2017-03-30 13:11:34 +03:00
|
|
|
@param[in] hdr_addr file address of log_hdr */
|
2014-02-26 19:11:54 +01:00
|
|
|
static
|
|
|
|
void
|
MDEV-12289 Keep 128 persistent rollback segments for compatibility and performance
InnoDB divides the allocation of undo logs into rollback segments.
The DB_ROLL_PTR system column of clustered indexes can address up to
128 rollback segments (TRX_SYS_N_RSEGS). Originally, InnoDB only
created one rollback segment. In MySQL 5.5 or in the InnoDB Plugin
for MySQL 5.1, all 128 rollback segments were created.
MySQL 5.7 hard-codes the rollback segment IDs 1..32 for temporary undo logs.
On upgrade, unless a slow shutdown (innodb_fast_shutdown=0)
was performed on the old server instance, these rollback segments
could be in use by transactions that are in XA PREPARE state or
transactions that were left behind by a server kill followed by a
normal shutdown immediately after restart.
Persistent tables cannot refer to temporary undo logs or vice versa.
Therefore, we should keep two distinct sets of rollback segments:
one for persistent tables and another for temporary tables. In this way,
all 128 rollback segments will be available for both types of tables,
which could improve performance. Also, MariaDB 10.2 will remain more
compatible than MySQL 5.7 with data files from earlier versions of
MySQL or MariaDB.
trx_sys_t::temp_rsegs[TRX_SYS_N_RSEGS]: A new array of temporary
rollback segments. The trx_sys_t::rseg_array[TRX_SYS_N_RSEGS] will
be solely for persistent undo logs.
srv_tmp_undo_logs. Remove. Use the constant TRX_SYS_N_RSEGS.
srv_available_undo_logs: Change the type to ulong.
trx_rseg_get_on_id(): Remove. Instead, let the callers refer to
trx_sys directly.
trx_rseg_create(), trx_sysf_rseg_find_free(): Remove unneeded parameters.
These functions only deal with persistent undo logs.
trx_temp_rseg_create(): New function, to create all temporary rollback
segments at server startup.
trx_rseg_t::is_persistent(): Determine if the rollback segment is for
persistent tables.
trx_sys_is_noredo_rseg_slot(): Remove. The callers must know based on
context (such as table handle) whether the DB_ROLL_PTR is referring to
a persistent undo log.
trx_sys_create_rsegs(): Remove all parameters, which were always passed
as global variables. Instead, modify the global variables directly.
enum trx_rseg_type_t: Remove.
trx_t::get_temp_rseg(): A method to ensure that a temporary
rollback segment has been assigned for the transaction.
trx_t::assign_temp_rseg(): Replaces trx_assign_rseg().
trx_purge_free_segment(), trx_purge_truncate_rseg_history():
Remove the redundant variable noredo=false.
Temporary undo logs are discarded immediately at transaction commit
or rollback, not lazily by purge.
trx_purge_mark_undo_for_truncate(): Remove references to the
temporary rollback segments.
trx_purge_mark_undo_for_truncate(): Remove a check for temporary
rollback segments. Only the dedicated persistent undo log tablespaces
can be truncated.
trx_undo_get_undo_rec_low(), trx_undo_get_undo_rec(): Add the
parameter is_temp.
trx_rseg_mem_restore(): Split from trx_rseg_mem_create().
Initialize the undo log and the rollback segment from the file
data structures.
trx_sysf_get_n_rseg_slots(): Renamed from
trx_sysf_used_slots_for_redo_rseg(). Count the persistent
rollback segment headers that have been initialized.
trx_sys_close(): Also free trx_sys->temp_rsegs[].
get_next_redo_rseg(): Merged to trx_assign_rseg_low().
trx_assign_rseg_low(): Remove the parameters and access the
global variables directly. Revert to simple round-robin, now that
the whole trx_sys->rseg_array[] is for persistent undo log again.
get_next_noredo_rseg(): Moved to trx_t::assign_temp_rseg().
srv_undo_tablespaces_init(): Remove some parameters and use the
global variables directly. Clarify some error messages.
Adjust the test innodb.log_file. Apparently, before these changes,
InnoDB somehow ignored missing dedicated undo tablespace files that
are pointed by the TRX_SYS header page, possibly losing part of
essential transaction system state.
2017-03-30 13:11:34 +03:00
|
|
|
trx_purge_free_segment(trx_rseg_t* rseg, fil_addr_t hdr_addr)
|
2014-02-26 19:11:54 +01:00
|
|
|
{
|
|
|
|
mtr_t mtr;
|
|
|
|
|
2017-08-15 17:18:55 +03:00
|
|
|
mtr.start();
|
|
|
|
mutex_enter(&rseg->mutex);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2019-12-03 10:19:45 +02:00
|
|
|
buf_block_t* rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
|
|
|
|
buf_block_t* block = trx_undo_page_get(
|
2018-03-26 17:23:47 +03:00
|
|
|
page_id_t(rseg->space->id, hdr_addr.page), &mtr);
|
2017-08-15 17:18:55 +03:00
|
|
|
|
|
|
|
/* Mark the last undo log totally purged, so that if the
|
|
|
|
system crashes, the tail of the undo log will not get accessed
|
|
|
|
again. The list of pages in the undo log tail gets
|
|
|
|
inconsistent during the freeing of the segment, and therefore
|
|
|
|
purge should not try to access them again. */
|
2020-04-02 19:34:34 +03:00
|
|
|
mtr.write<2,mtr_t::MAYBE_NOP>(*block, block->frame + hdr_addr.boffset
|
|
|
|
+ TRX_UNDO_NEEDS_PURGE, 0U);
|
2017-08-15 17:18:55 +03:00
|
|
|
|
|
|
|
while (!fseg_free_step_not_header(
|
|
|
|
TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER
|
2020-05-18 17:30:02 +03:00
|
|
|
+ block->frame, &mtr)) {
|
2017-08-15 17:18:55 +03:00
|
|
|
mutex_exit(&rseg->mutex);
|
|
|
|
|
|
|
|
mtr.commit();
|
|
|
|
mtr.start();
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
mutex_enter(&rseg->mutex);
|
|
|
|
|
2017-03-09 22:06:22 +02:00
|
|
|
rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2019-12-03 10:19:45 +02:00
|
|
|
block = trx_undo_page_get(
|
2018-03-26 17:23:47 +03:00
|
|
|
page_id_t(rseg->space->id, hdr_addr.page), &mtr);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* The page list may now be inconsistent, but the length field
|
|
|
|
stored in the list base node tells us how big it was before we
|
|
|
|
started the freeing. */
|
|
|
|
|
2019-12-03 10:19:45 +02:00
|
|
|
const uint32_t seg_size = flst_get_len(
|
|
|
|
TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + block->frame);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/* We may free the undo log segment header page; it must be freed
|
|
|
|
within the same mtr as the undo log header is removed from the
|
|
|
|
history list: otherwise, in case of a database crash, the segment
|
|
|
|
could become inaccessible garbage in the file space. */
|
|
|
|
|
2019-12-03 10:19:45 +02:00
|
|
|
trx_purge_remove_log_hdr(rseg_hdr, block, hdr_addr.boffset, &mtr);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
do {
|
|
|
|
|
|
|
|
/* Here we assume that a file segment with just the header
|
|
|
|
page can be freed in a few steps, so that the buffer pool
|
|
|
|
is not flooded with bufferfixed pages: see the note in
|
|
|
|
fsp0fsp.cc. */
|
|
|
|
|
2017-08-15 17:18:55 +03:00
|
|
|
} while (!fseg_free_step(TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER
|
2020-05-18 17:30:02 +03:00
|
|
|
+ block->frame, &mtr));
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2019-12-03 10:19:45 +02:00
|
|
|
byte* hist = TRX_RSEG + TRX_RSEG_HISTORY_SIZE + rseg_hdr->frame;
|
|
|
|
ut_ad(mach_read_from_4(hist) >= seg_size);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2019-12-03 10:19:45 +02:00
|
|
|
mtr.write<4>(*rseg_hdr, hist, mach_read_from_4(hist) - seg_size);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
ut_ad(rseg->curr_size >= seg_size);
|
|
|
|
|
|
|
|
rseg->curr_size -= seg_size;
|
|
|
|
|
|
|
|
mutex_exit(&(rseg->mutex));
|
|
|
|
|
|
|
|
mtr_commit(&mtr);
|
|
|
|
}
|
|
|
|
|
MDEV-12289 Keep 128 persistent rollback segments for compatibility and performance
InnoDB divides the allocation of undo logs into rollback segments.
The DB_ROLL_PTR system column of clustered indexes can address up to
128 rollback segments (TRX_SYS_N_RSEGS). Originally, InnoDB only
created one rollback segment. In MySQL 5.5 or in the InnoDB Plugin
for MySQL 5.1, all 128 rollback segments were created.
MySQL 5.7 hard-codes the rollback segment IDs 1..32 for temporary undo logs.
On upgrade, unless a slow shutdown (innodb_fast_shutdown=0)
was performed on the old server instance, these rollback segments
could be in use by transactions that are in XA PREPARE state or
transactions that were left behind by a server kill followed by a
normal shutdown immediately after restart.
Persistent tables cannot refer to temporary undo logs or vice versa.
Therefore, we should keep two distinct sets of rollback segments:
one for persistent tables and another for temporary tables. In this way,
all 128 rollback segments will be available for both types of tables,
which could improve performance. Also, MariaDB 10.2 will remain more
compatible than MySQL 5.7 with data files from earlier versions of
MySQL or MariaDB.
trx_sys_t::temp_rsegs[TRX_SYS_N_RSEGS]: A new array of temporary
rollback segments. The trx_sys_t::rseg_array[TRX_SYS_N_RSEGS] will
be solely for persistent undo logs.
srv_tmp_undo_logs. Remove. Use the constant TRX_SYS_N_RSEGS.
srv_available_undo_logs: Change the type to ulong.
trx_rseg_get_on_id(): Remove. Instead, let the callers refer to
trx_sys directly.
trx_rseg_create(), trx_sysf_rseg_find_free(): Remove unneeded parameters.
These functions only deal with persistent undo logs.
trx_temp_rseg_create(): New function, to create all temporary rollback
segments at server startup.
trx_rseg_t::is_persistent(): Determine if the rollback segment is for
persistent tables.
trx_sys_is_noredo_rseg_slot(): Remove. The callers must know based on
context (such as table handle) whether the DB_ROLL_PTR is referring to
a persistent undo log.
trx_sys_create_rsegs(): Remove all parameters, which were always passed
as global variables. Instead, modify the global variables directly.
enum trx_rseg_type_t: Remove.
trx_t::get_temp_rseg(): A method to ensure that a temporary
rollback segment has been assigned for the transaction.
trx_t::assign_temp_rseg(): Replaces trx_assign_rseg().
trx_purge_free_segment(), trx_purge_truncate_rseg_history():
Remove the redundant variable noredo=false.
Temporary undo logs are discarded immediately at transaction commit
or rollback, not lazily by purge.
trx_purge_mark_undo_for_truncate(): Remove references to the
temporary rollback segments.
trx_purge_mark_undo_for_truncate(): Remove a check for temporary
rollback segments. Only the dedicated persistent undo log tablespaces
can be truncated.
trx_undo_get_undo_rec_low(), trx_undo_get_undo_rec(): Add the
parameter is_temp.
trx_rseg_mem_restore(): Split from trx_rseg_mem_create().
Initialize the undo log and the rollback segment from the file
data structures.
trx_sysf_get_n_rseg_slots(): Renamed from
trx_sysf_used_slots_for_redo_rseg(). Count the persistent
rollback segment headers that have been initialized.
trx_sys_close(): Also free trx_sys->temp_rsegs[].
get_next_redo_rseg(): Merged to trx_assign_rseg_low().
trx_assign_rseg_low(): Remove the parameters and access the
global variables directly. Revert to simple round-robin, now that
the whole trx_sys->rseg_array[] is for persistent undo log again.
get_next_noredo_rseg(): Moved to trx_t::assign_temp_rseg().
srv_undo_tablespaces_init(): Remove some parameters and use the
global variables directly. Clarify some error messages.
Adjust the test innodb.log_file. Apparently, before these changes,
InnoDB somehow ignored missing dedicated undo tablespace files that
are pointed by the TRX_SYS header page, possibly losing part of
essential transaction system state.
2017-03-30 13:11:34 +03:00
|
|
|
/** Remove unnecessary history data from a rollback segment.
|
|
|
|
@param[in,out] rseg rollback segment
|
2018-02-21 12:54:33 +02:00
|
|
|
@param[in] limit truncate anything before this */
|
2014-02-26 19:11:54 +01:00
|
|
|
static
|
|
|
|
void
|
2018-02-21 12:54:33 +02:00
|
|
|
trx_purge_truncate_rseg_history(
|
|
|
|
trx_rseg_t& rseg,
|
|
|
|
const purge_sys_t::iterator& limit)
|
2014-02-26 19:11:54 +01:00
|
|
|
{
|
|
|
|
fil_addr_t hdr_addr;
|
|
|
|
fil_addr_t prev_hdr_addr;
|
|
|
|
mtr_t mtr;
|
|
|
|
trx_id_t undo_trx_no;
|
|
|
|
|
2018-02-21 12:54:33 +02:00
|
|
|
mtr.start();
|
|
|
|
ut_ad(rseg.is_persistent());
|
|
|
|
mutex_enter(&rseg.mutex);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2019-12-03 10:19:45 +02:00
|
|
|
buf_block_t* rseg_hdr = trx_rsegf_get(rseg.space, rseg.page_no, &mtr);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
MDEV-21907: InnoDB: Enable -Wconversion on clang and GCC
The -Wconversion in GCC seems to be stricter than in clang.
GCC at least since version 4.4.7 issues truncation warnings for
assignments to bitfields, while clang 10 appears to only issue
warnings when the sizes in bytes rounded to the nearest integer
powers of 2 are different.
Before GCC 10.0.0, -Wconversion required more casts and would not
allow some operations, such as x<<=1 or x+=1 on a data type that
is narrower than int.
GCC 5 (but not GCC 4, GCC 6, or any later version) is complaining
about x|=y even when x and y are compatible types that are narrower
than int. Hence, we must rewrite some x|=y as
x=static_cast<byte>(x|y) or similar, or we must disable -Wconversion.
In GCC 6 and later, the warning for assigning wider to bitfields
that are narrower than 8, 16, or 32 bits can be suppressed by
applying a bitwise & with the exact bitmask of the bitfield.
For older GCC, we must disable -Wconversion for GCC 4 or 5 in such
cases.
The bitwise negation operator appears to promote short integers
to a wider type, and hence we must add explicit truncation casts
around them. Microsoft Visual C does not allow a static_cast to
truncate a constant, such as static_cast<byte>(1) truncating int.
Hence, we will use the constructor-style cast byte(~1) for such cases.
This has been tested at least with GCC 4.8.5, 5.4.0, 7.4.0, 9.2.1, 10.0.0,
clang 9.0.1, 10.0.0, and MSVC 14.22.27905 (Microsoft Visual Studio 2019)
on 64-bit and 32-bit targets (IA-32, AMD64, POWER 8, POWER 9, ARMv8).
2020-03-12 19:46:41 +02:00
|
|
|
hdr_addr = flst_get_last(TRX_RSEG + TRX_RSEG_HISTORY
|
|
|
|
+ rseg_hdr->frame);
|
|
|
|
hdr_addr.boffset = static_cast<uint16_t>(hdr_addr.boffset
|
|
|
|
- TRX_UNDO_HISTORY_NODE);
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
loop:
|
|
|
|
if (hdr_addr.page == FIL_NULL) {
|
2018-02-21 12:54:33 +02:00
|
|
|
func_exit:
|
|
|
|
mutex_exit(&rseg.mutex);
|
|
|
|
mtr.commit();
|
2014-02-26 19:11:54 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2019-12-03 10:19:45 +02:00
|
|
|
buf_block_t* block = trx_undo_page_get(page_id_t(rseg.space->id,
|
|
|
|
hdr_addr.page),
|
|
|
|
&mtr);
|
|
|
|
undo_trx_no = mach_read_from_8(block->frame + hdr_addr.boffset
|
|
|
|
+ TRX_UNDO_TRX_NO);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2021-06-21 12:34:07 +03:00
|
|
|
if (undo_trx_no >= limit.trx_no) {
|
|
|
|
if (undo_trx_no == limit.trx_no) {
|
2014-02-26 19:11:54 +01:00
|
|
|
trx_undo_truncate_start(
|
2018-02-21 12:54:33 +02:00
|
|
|
&rseg, hdr_addr.page,
|
|
|
|
hdr_addr.boffset, limit.undo_no);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
2018-02-21 12:54:33 +02:00
|
|
|
goto func_exit;
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
MDEV-21907: InnoDB: Enable -Wconversion on clang and GCC
The -Wconversion in GCC seems to be stricter than in clang.
GCC at least since version 4.4.7 issues truncation warnings for
assignments to bitfields, while clang 10 appears to only issue
warnings when the sizes in bytes rounded to the nearest integer
powers of 2 are different.
Before GCC 10.0.0, -Wconversion required more casts and would not
allow some operations, such as x<<=1 or x+=1 on a data type that
is narrower than int.
GCC 5 (but not GCC 4, GCC 6, or any later version) is complaining
about x|=y even when x and y are compatible types that are narrower
than int. Hence, we must rewrite some x|=y as
x=static_cast<byte>(x|y) or similar, or we must disable -Wconversion.
In GCC 6 and later, the warning for assigning wider to bitfields
that are narrower than 8, 16, or 32 bits can be suppressed by
applying a bitwise & with the exact bitmask of the bitfield.
For older GCC, we must disable -Wconversion for GCC 4 or 5 in such
cases.
The bitwise negation operator appears to promote short integers
to a wider type, and hence we must add explicit truncation casts
around them. Microsoft Visual C does not allow a static_cast to
truncate a constant, such as static_cast<byte>(1) truncating int.
Hence, we will use the constructor-style cast byte(~1) for such cases.
This has been tested at least with GCC 4.8.5, 5.4.0, 7.4.0, 9.2.1, 10.0.0,
clang 9.0.1, 10.0.0, and MSVC 14.22.27905 (Microsoft Visual Studio 2019)
on 64-bit and 32-bit targets (IA-32, AMD64, POWER 8, POWER 9, ARMv8).
2020-03-12 19:46:41 +02:00
|
|
|
prev_hdr_addr = flst_get_prev_addr(block->frame + hdr_addr.boffset
|
|
|
|
+ TRX_UNDO_HISTORY_NODE);
|
|
|
|
prev_hdr_addr.boffset = static_cast<uint16_t>(prev_hdr_addr.boffset
|
|
|
|
- TRX_UNDO_HISTORY_NODE);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2019-12-03 10:19:45 +02:00
|
|
|
if (mach_read_from_2(TRX_UNDO_SEG_HDR + TRX_UNDO_STATE + block->frame)
|
|
|
|
== TRX_UNDO_TO_PURGE
|
|
|
|
&& !mach_read_from_2(block->frame + hdr_addr.boffset
|
|
|
|
+ TRX_UNDO_NEXT_LOG)) {
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/* We can free the whole log segment */
|
|
|
|
|
2018-02-21 12:54:33 +02:00
|
|
|
mutex_exit(&rseg.mutex);
|
|
|
|
mtr.commit();
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2017-01-04 18:16:37 +02:00
|
|
|
/* calls the trx_purge_remove_log_hdr()
|
|
|
|
inside trx_purge_free_segment(). */
|
2018-02-21 12:54:33 +02:00
|
|
|
trx_purge_free_segment(&rseg, hdr_addr);
|
2014-02-26 19:11:54 +01:00
|
|
|
} else {
|
2017-01-04 18:16:37 +02:00
|
|
|
/* Remove the log hdr from the rseg history. */
|
2019-12-03 10:19:45 +02:00
|
|
|
trx_purge_remove_log_hdr(rseg_hdr, block, hdr_addr.boffset,
|
|
|
|
&mtr);
|
2017-01-04 18:16:37 +02:00
|
|
|
|
2018-02-21 12:54:33 +02:00
|
|
|
mutex_exit(&rseg.mutex);
|
|
|
|
mtr.commit();
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
2018-02-21 12:54:33 +02:00
|
|
|
mtr.start();
|
|
|
|
mutex_enter(&rseg.mutex);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2018-02-21 12:54:33 +02:00
|
|
|
rseg_hdr = trx_rsegf_get(rseg.space, rseg.page_no, &mtr);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
hdr_addr = prev_hdr_addr;
|
|
|
|
|
|
|
|
goto loop;
|
|
|
|
}
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
/** Cleanse purge queue to remove the rseg that reside in undo-tablespace
|
|
|
|
marked for truncate.
|
2018-09-10 18:01:54 +03:00
|
|
|
@param[in] space undo tablespace being truncated */
|
|
|
|
static void trx_purge_cleanse_purge_queue(const fil_space_t& space)
|
2016-08-12 11:17:45 +03:00
|
|
|
{
|
|
|
|
typedef std::vector<TrxUndoRsegs> purge_elem_list_t;
|
|
|
|
purge_elem_list_t purge_elem_list;
|
|
|
|
|
2018-09-10 18:01:54 +03:00
|
|
|
mutex_enter(&purge_sys.pq_mutex);
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
/* Remove rseg instances that are in the purge queue before we start
|
|
|
|
truncate of corresponding UNDO truncate. */
|
2018-02-22 09:30:41 +02:00
|
|
|
while (!purge_sys.purge_queue.empty()) {
|
|
|
|
purge_elem_list.push_back(purge_sys.purge_queue.top());
|
|
|
|
purge_sys.purge_queue.pop();
|
2016-08-12 11:17:45 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
for (purge_elem_list_t::iterator it = purge_elem_list.begin();
|
|
|
|
it != purge_elem_list.end();
|
|
|
|
++it) {
|
|
|
|
|
|
|
|
for (TrxUndoRsegs::iterator it2 = it->begin();
|
|
|
|
it2 != it->end();
|
|
|
|
++it2) {
|
2018-09-10 18:01:54 +03:00
|
|
|
if ((*it2)->space == &space) {
|
2016-08-12 11:17:45 +03:00
|
|
|
it->erase(it2);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-02-21 16:15:20 +02:00
|
|
|
if (!it->empty()) {
|
2018-02-22 09:30:41 +02:00
|
|
|
purge_sys.purge_queue.push(*it);
|
2016-08-12 11:17:45 +03:00
|
|
|
}
|
|
|
|
}
|
2018-09-10 18:01:54 +03:00
|
|
|
|
2018-02-22 09:30:41 +02:00
|
|
|
mutex_exit(&purge_sys.pq_mutex);
|
2016-08-12 11:17:45 +03:00
|
|
|
}
|
|
|
|
|
2021-10-05 07:13:14 +03:00
|
|
|
#if defined __GNUC__ && __GNUC__ == 4 && !defined __clang__
|
|
|
|
# if defined __arm__ || defined __aarch64__
|
|
|
|
/* Work around an internal compiler error in GCC 4.8.5 */
|
|
|
|
__attribute__((optimize(0)))
|
|
|
|
# endif
|
|
|
|
#endif
|
2018-09-10 18:01:54 +03:00
|
|
|
/**
|
|
|
|
Removes unnecessary history data from rollback segments. NOTE that when this
|
|
|
|
function is called, the caller must not have any latches on undo log pages!
|
|
|
|
*/
|
|
|
|
static void trx_purge_truncate_history()
|
2016-08-12 11:17:45 +03:00
|
|
|
{
|
2021-09-24 08:24:03 +03:00
|
|
|
ut_ad(purge_sys.head <= purge_sys.tail);
|
|
|
|
purge_sys_t::iterator &head= purge_sys.head.trx_no
|
|
|
|
? purge_sys.head : purge_sys.tail;
|
|
|
|
|
|
|
|
if (head.trx_no >= purge_sys.low_limit_no())
|
|
|
|
{
|
|
|
|
/* This is sometimes necessary. TODO: find out why. */
|
|
|
|
head.trx_no= purge_sys.low_limit_no();
|
|
|
|
head.undo_no= 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (ulint i= 0; i < TRX_SYS_N_RSEGS; ++i)
|
|
|
|
{
|
|
|
|
if (trx_rseg_t *rseg= trx_sys.rseg_array[i])
|
|
|
|
{
|
|
|
|
ut_ad(rseg->id == i);
|
|
|
|
trx_purge_truncate_rseg_history(*rseg, head);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (srv_undo_tablespaces_active < 2)
|
|
|
|
return;
|
2016-08-12 11:17:45 +03:00
|
|
|
|
2021-09-24 08:24:03 +03:00
|
|
|
while (srv_undo_log_truncate)
|
|
|
|
{
|
|
|
|
if (!purge_sys.truncate.current)
|
|
|
|
{
|
|
|
|
const ulint threshold=
|
|
|
|
ulint(srv_max_undo_log_size >> srv_page_size_shift);
|
|
|
|
for (ulint i= purge_sys.truncate.last
|
|
|
|
? purge_sys.truncate.last->id - srv_undo_space_id_start : 0,
|
|
|
|
j= i;; )
|
|
|
|
{
|
|
|
|
const auto space_id= srv_undo_space_id_start + i;
|
|
|
|
ut_ad(srv_is_undo_tablespace(space_id));
|
|
|
|
fil_space_t *space= fil_space_get(space_id);
|
|
|
|
ut_a(UT_LIST_GET_LEN(space->chain) == 1);
|
|
|
|
|
|
|
|
if (space && space->get_size() > threshold)
|
|
|
|
{
|
|
|
|
purge_sys.truncate.current= space;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
++i;
|
|
|
|
i %= srv_undo_tablespaces_active;
|
|
|
|
if (i == j)
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fil_space_t &space= *purge_sys.truncate.current;
|
|
|
|
/* Undo tablespace always are a single file. */
|
|
|
|
fil_node_t *file= UT_LIST_GET_FIRST(space.chain);
|
|
|
|
/* The undo tablespace files are never closed. */
|
|
|
|
ut_ad(file->is_open());
|
|
|
|
|
|
|
|
DBUG_LOG("undo", "marking for truncate: " << file->name);
|
|
|
|
|
|
|
|
for (ulint i= 0; i < TRX_SYS_N_RSEGS; ++i)
|
|
|
|
if (trx_rseg_t *rseg= trx_sys.rseg_array[i])
|
|
|
|
if (rseg->space == &space)
|
|
|
|
/* Once set, this rseg will not be allocated to subsequent
|
|
|
|
transactions, but we will wait for existing active
|
|
|
|
transactions to finish. */
|
|
|
|
rseg->skip_allocation= true;
|
|
|
|
|
|
|
|
for (ulint i= 0; i < TRX_SYS_N_RSEGS; ++i)
|
|
|
|
{
|
|
|
|
trx_rseg_t *rseg= trx_sys.rseg_array[i];
|
|
|
|
if (!rseg || rseg->space != &space)
|
|
|
|
continue;
|
|
|
|
mutex_enter(&rseg->mutex);
|
|
|
|
ut_ad(rseg->skip_allocation);
|
|
|
|
ut_ad(rseg->is_persistent());
|
|
|
|
if (rseg->trx_ref_count)
|
|
|
|
{
|
2018-09-10 18:01:54 +03:00
|
|
|
not_free:
|
2021-09-24 08:24:03 +03:00
|
|
|
mutex_exit(&rseg->mutex);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (rseg->curr_size != 1)
|
|
|
|
{
|
|
|
|
/* Check if all segments are cached and safe to remove. */
|
|
|
|
ulint cached= 0;
|
|
|
|
for (trx_undo_t *undo= UT_LIST_GET_FIRST(rseg->undo_cached); undo;
|
|
|
|
undo= UT_LIST_GET_NEXT(undo_list, undo))
|
|
|
|
{
|
|
|
|
if (head.trx_no < undo->trx_id)
|
|
|
|
goto not_free;
|
|
|
|
else
|
|
|
|
cached+= undo->size;
|
|
|
|
}
|
|
|
|
|
|
|
|
ut_ad(rseg->curr_size > cached);
|
|
|
|
|
|
|
|
if (rseg->curr_size > cached + 1)
|
|
|
|
goto not_free;
|
|
|
|
}
|
|
|
|
|
|
|
|
mutex_exit(&rseg->mutex);
|
|
|
|
}
|
|
|
|
|
|
|
|
ib::info() << "Truncating " << file->name;
|
|
|
|
trx_purge_cleanse_purge_queue(space);
|
|
|
|
|
|
|
|
log_free_check();
|
|
|
|
|
|
|
|
mtr_t mtr;
|
|
|
|
mtr.start();
|
|
|
|
mtr_x_lock_space(&space, &mtr);
|
|
|
|
|
|
|
|
/* Lock all modified pages of the tablespace.
|
|
|
|
|
|
|
|
During truncation, we do not want any writes to the file.
|
|
|
|
|
|
|
|
If a log checkpoint was completed at LSN earlier than our
|
|
|
|
mini-transaction commit and the server was killed, then
|
|
|
|
discarding the to-be-trimmed pages without flushing would
|
|
|
|
break crash recovery. */
|
|
|
|
mysql_mutex_lock(&buf_pool.flush_list_mutex);
|
|
|
|
|
|
|
|
for (buf_page_t *bpage= UT_LIST_GET_LAST(buf_pool.flush_list); bpage; )
|
|
|
|
{
|
|
|
|
ut_ad(bpage->oldest_modification());
|
|
|
|
ut_ad(bpage->in_file());
|
|
|
|
|
|
|
|
buf_page_t *prev= UT_LIST_GET_PREV(list, bpage);
|
|
|
|
|
|
|
|
if (bpage->id().space() == space.id &&
|
|
|
|
bpage->oldest_modification() != 1)
|
|
|
|
{
|
|
|
|
ut_ad(bpage->state() == BUF_BLOCK_FILE_PAGE);
|
|
|
|
auto block= reinterpret_cast<buf_block_t*>(bpage);
|
|
|
|
block->fix();
|
|
|
|
ut_ad(rw_lock_s_lock_nowait(block->debug_latch, __FILE__, __LINE__));
|
|
|
|
buf_pool.flush_hp.set(prev);
|
|
|
|
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
|
|
|
|
|
|
|
|
#ifdef BTR_CUR_HASH_ADAPT
|
|
|
|
ut_ad(!block->index); /* There is no AHI on undo tablespaces. */
|
|
|
|
#endif
|
|
|
|
rw_lock_x_lock(&block->lock);
|
|
|
|
mysql_mutex_lock(&buf_pool.flush_list_mutex);
|
|
|
|
ut_ad(bpage->io_fix() == BUF_IO_NONE);
|
|
|
|
|
|
|
|
if (bpage->oldest_modification() > 1)
|
|
|
|
{
|
|
|
|
bpage->clear_oldest_modification(false);
|
|
|
|
mtr.memo_push(block, MTR_MEMO_PAGE_X_FIX);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
rw_lock_x_unlock(&block->lock);
|
|
|
|
block->unfix();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (prev != buf_pool.flush_hp.get())
|
|
|
|
{
|
|
|
|
/* Rescan, because we may have lost the position. */
|
|
|
|
bpage= UT_LIST_GET_LAST(buf_pool.flush_list);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bpage= prev;
|
|
|
|
}
|
|
|
|
|
|
|
|
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
|
|
|
|
|
|
|
|
/* Adjust the tablespace metadata. */
|
|
|
|
if (!fil_truncate_prepare(space.id))
|
|
|
|
{
|
|
|
|
ib::error() << "Failed to find UNDO tablespace " << file->name;
|
|
|
|
mtr.commit();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Re-initialize tablespace, in a single mini-transaction. */
|
|
|
|
const ulint size= SRV_UNDO_TABLESPACE_SIZE_IN_PAGES;
|
|
|
|
/* Associate the undo tablespace with mtr.
|
|
|
|
During mtr::commit_shrink(), InnoDB can use the undo
|
|
|
|
tablespace object to clear all freed ranges */
|
|
|
|
mtr.set_named_space(&space);
|
|
|
|
mtr.trim_pages(page_id_t(space.id, size));
|
|
|
|
fsp_header_init(&space, size, &mtr);
|
|
|
|
mutex_enter(&fil_system.mutex);
|
|
|
|
space.size= file->size= size;
|
|
|
|
mutex_exit(&fil_system.mutex);
|
|
|
|
|
|
|
|
buf_block_t *sys_header= trx_sysf_get(&mtr);
|
|
|
|
|
|
|
|
for (ulint i= 0; i < TRX_SYS_N_RSEGS; ++i)
|
|
|
|
{
|
|
|
|
trx_rseg_t *rseg= trx_sys.rseg_array[i];
|
|
|
|
if (!rseg || rseg->space != &space)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
ut_ad(rseg->id == i);
|
|
|
|
ut_ad(rseg->is_persistent());
|
|
|
|
ut_d(const auto old_page= rseg->page_no);
|
|
|
|
|
2021-09-24 12:14:35 +03:00
|
|
|
buf_block_t *rblock= trx_rseg_header_create(&space, i,
|
|
|
|
trx_sys.get_max_trx_id(),
|
|
|
|
sys_header, &mtr);
|
2021-09-24 08:24:03 +03:00
|
|
|
ut_ad(rblock);
|
|
|
|
rseg->page_no= rblock ? rblock->page.id().page_no() : FIL_NULL;
|
|
|
|
ut_ad(old_page == rseg->page_no);
|
|
|
|
|
|
|
|
/* Before re-initialization ensure that we free the existing
|
|
|
|
structure. There can't be any active transactions. */
|
|
|
|
ut_a(UT_LIST_GET_LEN(rseg->undo_list) == 0);
|
|
|
|
|
|
|
|
for (trx_undo_t *undo= UT_LIST_GET_FIRST(rseg->undo_cached), *next_undo;
|
|
|
|
undo; undo= next_undo)
|
|
|
|
{
|
|
|
|
next_undo= UT_LIST_GET_NEXT(undo_list, undo);
|
|
|
|
UT_LIST_REMOVE(rseg->undo_cached, undo);
|
|
|
|
MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
|
|
|
|
ut_free(undo);
|
|
|
|
}
|
|
|
|
|
|
|
|
UT_LIST_INIT(rseg->undo_list, &trx_undo_t::undo_list);
|
|
|
|
UT_LIST_INIT(rseg->undo_cached, &trx_undo_t::undo_list);
|
|
|
|
|
|
|
|
/* These were written by trx_rseg_header_create(). */
|
|
|
|
ut_ad(!mach_read_from_4(TRX_RSEG + TRX_RSEG_FORMAT + rblock->frame));
|
|
|
|
ut_ad(!mach_read_from_4(TRX_RSEG + TRX_RSEG_HISTORY_SIZE +
|
|
|
|
rblock->frame));
|
|
|
|
/* Initialize the undo log lists according to
|
|
|
|
the rseg header */
|
|
|
|
rseg->curr_size= 1;
|
|
|
|
rseg->trx_ref_count= 0;
|
|
|
|
rseg->last_page_no= FIL_NULL;
|
|
|
|
rseg->last_commit_and_offset= 0;
|
|
|
|
rseg->needs_purge= false;
|
|
|
|
}
|
|
|
|
|
|
|
|
mtr.commit_shrink(space);
|
|
|
|
|
|
|
|
/* No mutex; this is only updated by the purge coordinator. */
|
|
|
|
export_vars.innodb_undo_truncations++;
|
|
|
|
|
|
|
|
if (purge_sys.rseg && purge_sys.rseg->last_page_no == FIL_NULL)
|
|
|
|
{
|
|
|
|
/* If purge_sys.rseg is pointing to rseg that was recently
|
|
|
|
truncated then move to next rseg element.
|
|
|
|
|
|
|
|
Note: Ideally purge_sys.rseg should be NULL because purge should
|
|
|
|
complete processing of all the records but srv_purge_batch_size
|
|
|
|
can force the purge loop to exit before all the records are purged. */
|
|
|
|
purge_sys.rseg= nullptr;
|
|
|
|
purge_sys.next_stored= false;
|
|
|
|
}
|
|
|
|
|
|
|
|
DBUG_EXECUTE_IF("ib_undo_trunc", ib::info() << "ib_undo_trunc";
|
|
|
|
log_buffer_flush_to_disk();
|
|
|
|
DBUG_SUICIDE(););
|
|
|
|
|
|
|
|
for (ulint i= 0; i < TRX_SYS_N_RSEGS; ++i)
|
|
|
|
{
|
|
|
|
if (trx_rseg_t *rseg= trx_sys.rseg_array[i])
|
|
|
|
{
|
|
|
|
ut_ad(rseg->id == i);
|
|
|
|
ut_ad(rseg->is_persistent());
|
|
|
|
if (rseg->space == &space)
|
|
|
|
rseg->skip_allocation= false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ib::info() << "Truncated " << file->name;
|
|
|
|
purge_sys.truncate.last= purge_sys.truncate.current;
|
|
|
|
ut_ad(&space == purge_sys.truncate.current);
|
|
|
|
purge_sys.truncate.current= nullptr;
|
|
|
|
}
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/***********************************************************************//**
|
|
|
|
Updates the last not yet purged history log info in rseg when we have purged
|
2018-02-22 09:30:41 +02:00
|
|
|
a whole undo log. Advances also purge_sys.purge_trx_no past the purged log. */
|
2019-03-19 15:49:53 +02:00
|
|
|
static void trx_purge_rseg_get_next_history_log(
|
2014-02-26 19:11:54 +01:00
|
|
|
ulint* n_pages_handled)/*!< in/out: number of UNDO pages
|
|
|
|
handled */
|
|
|
|
{
|
|
|
|
fil_addr_t prev_log_addr;
|
|
|
|
trx_id_t trx_no;
|
|
|
|
mtr_t mtr;
|
|
|
|
|
2019-03-19 15:49:53 +02:00
|
|
|
mutex_enter(&purge_sys.rseg->mutex);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2019-03-19 15:49:53 +02:00
|
|
|
ut_a(purge_sys.rseg->last_page_no != FIL_NULL);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2021-06-21 12:34:07 +03:00
|
|
|
purge_sys.tail.trx_no = purge_sys.rseg->last_trx_no() + 1;
|
2018-02-22 09:30:41 +02:00
|
|
|
purge_sys.tail.undo_no = 0;
|
|
|
|
purge_sys.next_stored = false;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2019-03-19 15:49:53 +02:00
|
|
|
mtr.start();
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2019-12-03 10:19:45 +02:00
|
|
|
const buf_block_t* undo_page = trx_undo_page_get_s_latched(
|
2019-03-19 15:49:53 +02:00
|
|
|
page_id_t(purge_sys.rseg->space->id,
|
|
|
|
purge_sys.rseg->last_page_no), &mtr);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2019-12-03 10:19:45 +02:00
|
|
|
const trx_ulogf_t* log_hdr = undo_page->frame
|
2021-06-21 14:22:22 +03:00
|
|
|
+ purge_sys.rseg->last_offset();
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/* Increase the purge page count by one for every handled log */
|
|
|
|
|
|
|
|
(*n_pages_handled)++;
|
|
|
|
|
MDEV-21907: InnoDB: Enable -Wconversion on clang and GCC
The -Wconversion in GCC seems to be stricter than in clang.
GCC at least since version 4.4.7 issues truncation warnings for
assignments to bitfields, while clang 10 appears to only issue
warnings when the sizes in bytes rounded to the nearest integer
powers of 2 are different.
Before GCC 10.0.0, -Wconversion required more casts and would not
allow some operations, such as x<<=1 or x+=1 on a data type that
is narrower than int.
GCC 5 (but not GCC 4, GCC 6, or any later version) is complaining
about x|=y even when x and y are compatible types that are narrower
than int. Hence, we must rewrite some x|=y as
x=static_cast<byte>(x|y) or similar, or we must disable -Wconversion.
In GCC 6 and later, the warning for assigning wider to bitfields
that are narrower than 8, 16, or 32 bits can be suppressed by
applying a bitwise & with the exact bitmask of the bitfield.
For older GCC, we must disable -Wconversion for GCC 4 or 5 in such
cases.
The bitwise negation operator appears to promote short integers
to a wider type, and hence we must add explicit truncation casts
around them. Microsoft Visual C does not allow a static_cast to
truncate a constant, such as static_cast<byte>(1) truncating int.
Hence, we will use the constructor-style cast byte(~1) for such cases.
This has been tested at least with GCC 4.8.5, 5.4.0, 7.4.0, 9.2.1, 10.0.0,
clang 9.0.1, 10.0.0, and MSVC 14.22.27905 (Microsoft Visual Studio 2019)
on 64-bit and 32-bit targets (IA-32, AMD64, POWER 8, POWER 9, ARMv8).
2020-03-12 19:46:41 +02:00
|
|
|
prev_log_addr = flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE);
|
|
|
|
prev_log_addr.boffset = static_cast<uint16_t>(prev_log_addr.boffset
|
|
|
|
- TRX_UNDO_HISTORY_NODE);
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2019-03-19 15:49:53 +02:00
|
|
|
const bool empty = prev_log_addr.page == FIL_NULL;
|
|
|
|
|
|
|
|
if (empty) {
|
2014-02-26 19:11:54 +01:00
|
|
|
/* No logs left in the history list */
|
2019-03-19 15:49:53 +02:00
|
|
|
purge_sys.rseg->last_page_no = FIL_NULL;
|
|
|
|
}
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2019-03-19 15:49:53 +02:00
|
|
|
mutex_exit(&purge_sys.rseg->mutex);
|
|
|
|
mtr.commit();
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2019-03-19 15:49:53 +02:00
|
|
|
if (empty) {
|
2014-02-26 19:11:54 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-08-15 17:18:55 +03:00
|
|
|
/* Read the previous log header. */
|
2019-03-19 15:49:53 +02:00
|
|
|
mtr.start();
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2019-03-19 15:49:53 +02:00
|
|
|
log_hdr = trx_undo_page_get_s_latched(
|
|
|
|
page_id_t(purge_sys.rseg->space->id, prev_log_addr.page),
|
2019-12-03 10:19:45 +02:00
|
|
|
&mtr)->frame
|
2014-02-26 19:11:54 +01:00
|
|
|
+ prev_log_addr.boffset;
|
|
|
|
|
|
|
|
trx_no = mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO);
|
2021-06-21 12:34:07 +03:00
|
|
|
ut_ad(mach_read_from_2(log_hdr + TRX_UNDO_NEEDS_PURGE) <= 1);
|
2021-08-23 17:00:01 +03:00
|
|
|
const byte needs_purge = log_hdr[TRX_UNDO_NEEDS_PURGE + 1];
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2021-08-23 17:00:01 +03:00
|
|
|
mtr.commit();
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2019-03-19 15:49:53 +02:00
|
|
|
mutex_enter(&purge_sys.rseg->mutex);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2019-03-19 15:49:53 +02:00
|
|
|
purge_sys.rseg->last_page_no = prev_log_addr.page;
|
2021-06-21 12:34:07 +03:00
|
|
|
purge_sys.rseg->set_last_commit(prev_log_addr.boffset, trx_no);
|
2021-08-23 17:00:01 +03:00
|
|
|
purge_sys.rseg->needs_purge = needs_purge != 0;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/* Purge can also produce events, however these are already ordered
|
|
|
|
in the rollback segment and any user generated event will be greater
|
|
|
|
than the events that Purge produces. ie. Purge can never produce
|
|
|
|
events from an empty rollback segment. */
|
|
|
|
|
2018-02-22 09:30:41 +02:00
|
|
|
mutex_enter(&purge_sys.pq_mutex);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2019-03-19 15:49:53 +02:00
|
|
|
purge_sys.purge_queue.push(*purge_sys.rseg);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2018-02-22 09:30:41 +02:00
|
|
|
mutex_exit(&purge_sys.pq_mutex);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2019-03-19 15:49:53 +02:00
|
|
|
mutex_exit(&purge_sys.rseg->mutex);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
2017-03-09 22:06:22 +02:00
|
|
|
/** Position the purge sys "iterator" on the undo record to use for purging. */
|
2021-06-21 12:34:07 +03:00
|
|
|
static void trx_purge_read_undo_rec()
|
2014-02-26 19:11:54 +01:00
|
|
|
{
|
2019-12-03 10:19:45 +02:00
|
|
|
uint16_t offset;
|
|
|
|
uint32_t page_no;
|
2014-02-26 19:11:54 +01:00
|
|
|
ib_uint64_t undo_no;
|
|
|
|
|
2021-06-21 12:34:07 +03:00
|
|
|
purge_sys.hdr_offset = purge_sys.rseg->last_offset();
|
2018-02-22 09:30:41 +02:00
|
|
|
page_no = purge_sys.hdr_page_no = purge_sys.rseg->last_page_no;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2018-02-22 09:30:41 +02:00
|
|
|
if (purge_sys.rseg->needs_purge) {
|
2014-02-26 19:11:54 +01:00
|
|
|
mtr_t mtr;
|
2017-08-15 17:18:55 +03:00
|
|
|
mtr.start();
|
2019-12-03 10:19:45 +02:00
|
|
|
buf_block_t* undo_page;
|
2017-08-15 17:18:55 +03:00
|
|
|
if (trx_undo_rec_t* undo_rec = trx_undo_get_first_rec(
|
2019-12-03 10:19:45 +02:00
|
|
|
*purge_sys.rseg->space, purge_sys.hdr_page_no,
|
|
|
|
purge_sys.hdr_offset, RW_S_LATCH,
|
|
|
|
undo_page, &mtr)) {
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
offset = page_offset(undo_rec);
|
|
|
|
undo_no = trx_undo_rec_get_undo_no(undo_rec);
|
MDEV-15053 Reduce buf_pool_t::mutex contention
User-visible changes: The INFORMATION_SCHEMA views INNODB_BUFFER_PAGE
and INNODB_BUFFER_PAGE_LRU will report a dummy value FLUSH_TYPE=0
and will no longer report the PAGE_STATE value READY_FOR_USE.
We will remove some fields from buf_page_t and move much code to
member functions of buf_pool_t and buf_page_t, so that the access
rules of data members can be enforced consistently.
Evicting or adding pages in buf_pool.LRU will remain covered by
buf_pool.mutex.
Evicting or adding pages in buf_pool.page_hash will remain
covered by both buf_pool.mutex and the buf_pool.page_hash X-latch.
After this fix, buf_pool.page_hash lookups can entirely
avoid acquiring buf_pool.mutex, only relying on
buf_pool.hash_lock_get() S-latch.
Similarly, buf_flush_check_neighbors() can will rely solely on
buf_pool.mutex, no buf_pool.page_hash latch at all.
The buf_pool.mutex is rather contended in I/O heavy benchmarks,
especially when the workload does not fit in the buffer pool.
The first attempt to alleviate the contention was the
buf_pool_t::mutex split in
commit 4ed7082eefe56b3e97e0edefb3df76dd7ef5e858
which introduced buf_block_t::mutex, which we are now removing.
Later, multiple instances of buf_pool_t were introduced
in commit c18084f71b02ea707c6461353e6cfc15d7553bc6
and recently removed by us in
commit 1a6f708ec594ac0ae2dd30db926ab07b100fa24b (MDEV-15058).
UNIV_BUF_DEBUG: Remove. This option to enable some buffer pool
related debugging in otherwise non-debug builds has not been used
for years. Instead, we have been using UNIV_DEBUG, which is enabled
in CMAKE_BUILD_TYPE=Debug.
buf_block_t::mutex, buf_pool_t::zip_mutex: Remove. We can mainly rely on
std::atomic and the buf_pool.page_hash latches, and in some cases
depend on buf_pool.mutex or buf_pool.flush_list_mutex just like before.
We must always release buf_block_t::lock before invoking
unfix() or io_unfix(), to prevent a glitch where a block that was
added to the buf_pool.free list would apper X-latched. See
commit c5883debd6ef440a037011c11873b396923e93c5 how this glitch
was finally caught in a debug environment.
We move some buf_pool_t::page_hash specific code from the
ha and hash modules to buf_pool, for improved readability.
buf_pool_t::close(): Assert that all blocks are clean, except
on aborted startup or crash-like shutdown.
buf_pool_t::validate(): No longer attempt to validate
n_flush[] against the number of BUF_IO_WRITE fixed blocks,
because buf_page_t::flush_type no longer exists.
buf_pool_t::watch_set(): Replaces buf_pool_watch_set().
Reduce mutex contention by separating the buf_pool.watch[]
allocation and the insert into buf_pool.page_hash.
buf_pool_t::page_hash_lock<bool exclusive>(): Acquire a
buf_pool.page_hash latch.
Replaces and extends buf_page_hash_lock_s_confirm()
and buf_page_hash_lock_x_confirm().
buf_pool_t::READ_AHEAD_PAGES: Renamed from BUF_READ_AHEAD_PAGES.
buf_pool_t::curr_size, old_size, read_ahead_area, n_pend_reads:
Use Atomic_counter.
buf_pool_t::running_out(): Replaces buf_LRU_buf_pool_running_out().
buf_pool_t::LRU_remove(): Remove a block from the LRU list
and return its predecessor. Incorporates buf_LRU_adjust_hp(),
which was removed.
buf_page_get_gen(): Remove a redundant call of fsp_is_system_temporary(),
for mode == BUF_GET_IF_IN_POOL_OR_WATCH, which is only used by
BTR_DELETE_OP (purge), which is never invoked on temporary tables.
buf_free_from_unzip_LRU_list_batch(): Avoid redundant assignments.
buf_LRU_free_from_unzip_LRU_list(): Simplify the loop condition.
buf_LRU_free_page(): Clarify the function comment.
buf_flush_check_neighbor(), buf_flush_check_neighbors():
Rewrite the construction of the page hash range. We will hold
the buf_pool.mutex for up to buf_pool.read_ahead_area (at most 64)
consecutive lookups of buf_pool.page_hash.
buf_flush_page_and_try_neighbors(): Remove.
Merge to its only callers, and remove redundant operations in
buf_flush_LRU_list_batch().
buf_read_ahead_random(), buf_read_ahead_linear(): Rewrite.
Do not acquire buf_pool.mutex, and iterate directly with page_id_t.
ut_2_power_up(): Remove. my_round_up_to_next_power() is inlined
and avoids any loops.
fil_page_get_prev(), fil_page_get_next(), fil_addr_is_null(): Remove.
buf_flush_page(): Add a fil_space_t* parameter. Minimize the
buf_pool.mutex hold time. buf_pool.n_flush[] is no longer updated
atomically with the io_fix, and we will protect most buf_block_t
fields with buf_block_t::lock. The function
buf_flush_write_block_low() is removed and merged here.
buf_page_init_for_read(): Use static linkage. Initialize the newly
allocated block and acquire the exclusive buf_block_t::lock while not
holding any mutex.
IORequest::IORequest(): Remove the body. We only need to invoke
set_punch_hole() in buf_flush_page() and nowhere else.
buf_page_t::flush_type: Remove. Replaced by IORequest::flush_type.
This field is only used during a fil_io() call.
That function already takes IORequest as a parameter, so we had
better introduce for the rarely changing field.
buf_block_t::init(): Replaces buf_page_init().
buf_page_t::init(): Replaces buf_page_init_low().
buf_block_t::initialise(): Initialise many fields, but
keep the buf_page_t::state(). Both buf_pool_t::validate() and
buf_page_optimistic_get() requires that buf_page_t::in_file()
be protected atomically with buf_page_t::in_page_hash
and buf_page_t::in_LRU_list.
buf_page_optimistic_get(): Now that buf_block_t::mutex
no longer exists, we must check buf_page_t::io_fix()
after acquiring the buf_pool.page_hash lock, to detect
whether buf_page_init_for_read() has been initiated.
We will also check the io_fix() before acquiring hash_lock
in order to avoid unnecessary computation.
The field buf_block_t::modify_clock (protected by buf_block_t::lock)
allows buf_page_optimistic_get() to validate the block.
buf_page_t::real_size: Remove. It was only used while flushing
pages of page_compressed tables.
buf_page_encrypt(): Add an output parameter that allows us ot eliminate
buf_page_t::real_size. Replace a condition with debug assertion.
buf_page_should_punch_hole(): Remove.
buf_dblwr_t::add_to_batch(): Replaces buf_dblwr_add_to_batch().
Add the parameter size (to replace buf_page_t::real_size).
buf_dblwr_t::write_single_page(): Replaces buf_dblwr_write_single_page().
Add the parameter size (to replace buf_page_t::real_size).
fil_system_t::detach(): Replaces fil_space_detach().
Ensure that fil_validate() will not be violated even if
fil_system.mutex is released and reacquired.
fil_node_t::complete_io(): Renamed from fil_node_complete_io().
fil_node_t::close_to_free(): Replaces fil_node_close_to_free().
Avoid invoking fil_node_t::close() because fil_system.n_open
has already been decremented in fil_space_t::detach().
BUF_BLOCK_READY_FOR_USE: Remove. Directly use BUF_BLOCK_MEMORY.
BUF_BLOCK_ZIP_DIRTY: Remove. Directly use BUF_BLOCK_ZIP_PAGE,
and distinguish dirty pages by buf_page_t::oldest_modification().
BUF_BLOCK_POOL_WATCH: Remove. Use BUF_BLOCK_NOT_USED instead.
This state was only being used for buf_page_t that are in
buf_pool.watch.
buf_pool_t::watch[]: Remove pointer indirection.
buf_page_t::in_flush_list: Remove. It was set if and only if
buf_page_t::oldest_modification() is nonzero.
buf_page_decrypt_after_read(), buf_corrupt_page_release(),
buf_page_check_corrupt(): Change the const fil_space_t* parameter
to const fil_node_t& so that we can report the correct file name.
buf_page_monitor(): Declare as an ATTRIBUTE_COLD global function.
buf_page_io_complete(): Split to buf_page_read_complete() and
buf_page_write_complete().
buf_dblwr_t::in_use: Remove.
buf_dblwr_t::buf_block_array: Add IORequest::flush_t.
buf_dblwr_sync_datafiles(): Remove. It was a useless wrapper of
os_aio_wait_until_no_pending_writes().
buf_flush_write_complete(): Declare static, not global.
Add the parameter IORequest::flush_t.
buf_flush_freed_page(): Simplify the code.
recv_sys_t::flush_lru: Renamed from flush_type and changed to bool.
fil_read(), fil_write(): Replaced with direct use of fil_io().
fil_buffering_disabled(): Remove. Check srv_file_flush_method directly.
fil_mutex_enter_and_prepare_for_io(): Return the resolved
fil_space_t* to avoid a duplicated lookup in the caller.
fil_report_invalid_page_access(): Clean up the parameters.
fil_io(): Return fil_io_t, which comprises fil_node_t and error code.
Always invoke fil_space_t::acquire_for_io() and let either the
sync=true caller or fil_aio_callback() invoke
fil_space_t::release_for_io().
fil_aio_callback(): Rewrite to replace buf_page_io_complete().
fil_check_pending_operations(): Remove a parameter, and remove some
redundant lookups.
fil_node_close_to_free(): Wait for n_pending==0. Because we no longer
do an extra lookup of the tablespace between fil_io() and the
completion of the operation, we must give fil_node_t::complete_io() a
chance to decrement the counter.
fil_close_tablespace(): Remove unused parameter trx, and document
that this is only invoked during the error handling of IMPORT TABLESPACE.
row_import_discard_changes(): Merged with the only caller,
row_import_cleanup(). Do not lock up the data dictionary while
invoking fil_close_tablespace().
logs_empty_and_mark_files_at_shutdown(): Do not invoke
fil_close_all_files(), to avoid a !needs_flush assertion failure
on fil_node_t::close().
innodb_shutdown(): Invoke os_aio_free() before fil_close_all_files().
fil_close_all_files(): Invoke fil_flush_file_spaces()
to ensure proper durability.
thread_pool::unbind(): Fix a crash that would occur on Windows
after srv_thread_pool->disable_aio() and os_file_close().
This fix was submitted by Vladislav Vaintroub.
Thanks to Matthias Leich and Axel Schwenke for extensive testing,
Vladislav Vaintroub for helpful comments, and Eugene Kosov for a review.
2020-06-05 12:35:46 +03:00
|
|
|
page_no = undo_page->page.id().page_no();
|
2014-02-26 19:11:54 +01:00
|
|
|
} else {
|
|
|
|
offset = 0;
|
|
|
|
undo_no = 0;
|
|
|
|
}
|
|
|
|
|
2017-08-15 17:18:55 +03:00
|
|
|
mtr.commit();
|
2014-02-26 19:11:54 +01:00
|
|
|
} else {
|
|
|
|
offset = 0;
|
|
|
|
undo_no = 0;
|
|
|
|
}
|
|
|
|
|
2018-02-22 09:30:41 +02:00
|
|
|
purge_sys.offset = offset;
|
|
|
|
purge_sys.page_no = page_no;
|
|
|
|
purge_sys.tail.undo_no = undo_no;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2018-02-22 09:30:41 +02:00
|
|
|
purge_sys.next_stored = true;
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/***********************************************************************//**
|
|
|
|
Chooses the next undo log to purge and updates the info in purge_sys. This
|
|
|
|
function is used to initialize purge_sys when the next record to purge is
|
|
|
|
not known, and also to update the purge system info on the next record when
|
|
|
|
purge has handled the whole undo log for a transaction. */
|
|
|
|
static
|
|
|
|
void
|
|
|
|
trx_purge_choose_next_log(void)
|
|
|
|
/*===========================*/
|
|
|
|
{
|
2018-02-22 09:30:41 +02:00
|
|
|
ut_ad(!purge_sys.next_stored);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2018-02-22 09:30:41 +02:00
|
|
|
if (purge_sys.rseg_iter.set_next()) {
|
2017-03-09 22:06:22 +02:00
|
|
|
trx_purge_read_undo_rec();
|
2014-02-26 19:11:54 +01:00
|
|
|
} else {
|
|
|
|
/* There is nothing to do yet. */
|
|
|
|
os_thread_yield();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/***********************************************************************//**
|
|
|
|
Gets the next record to purge and updates the info in the purge system.
|
2016-08-12 11:17:45 +03:00
|
|
|
@return copy of an undo log record or pointer to the dummy undo log record */
|
2014-02-26 19:11:54 +01:00
|
|
|
static
|
|
|
|
trx_undo_rec_t*
|
|
|
|
trx_purge_get_next_rec(
|
|
|
|
/*===================*/
|
|
|
|
ulint* n_pages_handled,/*!< in/out: number of UNDO pages
|
|
|
|
handled */
|
|
|
|
mem_heap_t* heap) /*!< in: memory heap where copied */
|
|
|
|
{
|
|
|
|
mtr_t mtr;
|
|
|
|
|
2018-02-22 09:30:41 +02:00
|
|
|
ut_ad(purge_sys.next_stored);
|
2021-06-21 14:22:22 +03:00
|
|
|
ut_ad(purge_sys.tail.trx_no < purge_sys.low_limit_no());
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2019-12-03 10:19:45 +02:00
|
|
|
const ulint space = purge_sys.rseg->space->id;
|
|
|
|
const uint32_t page_no = purge_sys.page_no;
|
|
|
|
const uint16_t offset = purge_sys.offset;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
if (offset == 0) {
|
|
|
|
/* It is the dummy undo log record, which means that there is
|
|
|
|
no need to purge this undo log */
|
|
|
|
|
2019-03-19 15:49:53 +02:00
|
|
|
trx_purge_rseg_get_next_history_log(n_pages_handled);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/* Look for the next undo log and record to purge */
|
|
|
|
|
|
|
|
trx_purge_choose_next_log();
|
|
|
|
|
|
|
|
return(&trx_purge_dummy_rec);
|
|
|
|
}
|
|
|
|
|
|
|
|
mtr_start(&mtr);
|
|
|
|
|
2019-12-03 10:19:45 +02:00
|
|
|
buf_block_t* undo_page = trx_undo_page_get_s_latched(
|
|
|
|
page_id_t(space, page_no), &mtr);
|
|
|
|
buf_block_t* rec2_page = undo_page;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2019-12-03 10:19:45 +02:00
|
|
|
const trx_undo_rec_t* rec2 = trx_undo_page_get_next_rec(
|
|
|
|
undo_page, offset, purge_sys.hdr_page_no, purge_sys.hdr_offset);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2017-08-15 17:18:55 +03:00
|
|
|
if (rec2 == NULL) {
|
2019-12-03 10:19:45 +02:00
|
|
|
rec2 = trx_undo_get_next_rec(rec2_page, offset,
|
|
|
|
purge_sys.hdr_page_no,
|
2018-02-22 09:30:41 +02:00
|
|
|
purge_sys.hdr_offset, &mtr);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (rec2 == NULL) {
|
|
|
|
mtr_commit(&mtr);
|
|
|
|
|
2019-03-19 15:49:53 +02:00
|
|
|
trx_purge_rseg_get_next_history_log(n_pages_handled);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/* Look for the next undo log and record to purge */
|
|
|
|
|
|
|
|
trx_purge_choose_next_log();
|
|
|
|
|
|
|
|
mtr_start(&mtr);
|
|
|
|
|
|
|
|
undo_page = trx_undo_page_get_s_latched(
|
2017-03-09 22:06:22 +02:00
|
|
|
page_id_t(space, page_no), &mtr);
|
2014-02-26 19:11:54 +01:00
|
|
|
} else {
|
2019-12-03 10:19:45 +02:00
|
|
|
purge_sys.offset = page_offset(rec2);
|
MDEV-15053 Reduce buf_pool_t::mutex contention
User-visible changes: The INFORMATION_SCHEMA views INNODB_BUFFER_PAGE
and INNODB_BUFFER_PAGE_LRU will report a dummy value FLUSH_TYPE=0
and will no longer report the PAGE_STATE value READY_FOR_USE.
We will remove some fields from buf_page_t and move much code to
member functions of buf_pool_t and buf_page_t, so that the access
rules of data members can be enforced consistently.
Evicting or adding pages in buf_pool.LRU will remain covered by
buf_pool.mutex.
Evicting or adding pages in buf_pool.page_hash will remain
covered by both buf_pool.mutex and the buf_pool.page_hash X-latch.
After this fix, buf_pool.page_hash lookups can entirely
avoid acquiring buf_pool.mutex, only relying on
buf_pool.hash_lock_get() S-latch.
Similarly, buf_flush_check_neighbors() can will rely solely on
buf_pool.mutex, no buf_pool.page_hash latch at all.
The buf_pool.mutex is rather contended in I/O heavy benchmarks,
especially when the workload does not fit in the buffer pool.
The first attempt to alleviate the contention was the
buf_pool_t::mutex split in
commit 4ed7082eefe56b3e97e0edefb3df76dd7ef5e858
which introduced buf_block_t::mutex, which we are now removing.
Later, multiple instances of buf_pool_t were introduced
in commit c18084f71b02ea707c6461353e6cfc15d7553bc6
and recently removed by us in
commit 1a6f708ec594ac0ae2dd30db926ab07b100fa24b (MDEV-15058).
UNIV_BUF_DEBUG: Remove. This option to enable some buffer pool
related debugging in otherwise non-debug builds has not been used
for years. Instead, we have been using UNIV_DEBUG, which is enabled
in CMAKE_BUILD_TYPE=Debug.
buf_block_t::mutex, buf_pool_t::zip_mutex: Remove. We can mainly rely on
std::atomic and the buf_pool.page_hash latches, and in some cases
depend on buf_pool.mutex or buf_pool.flush_list_mutex just like before.
We must always release buf_block_t::lock before invoking
unfix() or io_unfix(), to prevent a glitch where a block that was
added to the buf_pool.free list would apper X-latched. See
commit c5883debd6ef440a037011c11873b396923e93c5 how this glitch
was finally caught in a debug environment.
We move some buf_pool_t::page_hash specific code from the
ha and hash modules to buf_pool, for improved readability.
buf_pool_t::close(): Assert that all blocks are clean, except
on aborted startup or crash-like shutdown.
buf_pool_t::validate(): No longer attempt to validate
n_flush[] against the number of BUF_IO_WRITE fixed blocks,
because buf_page_t::flush_type no longer exists.
buf_pool_t::watch_set(): Replaces buf_pool_watch_set().
Reduce mutex contention by separating the buf_pool.watch[]
allocation and the insert into buf_pool.page_hash.
buf_pool_t::page_hash_lock<bool exclusive>(): Acquire a
buf_pool.page_hash latch.
Replaces and extends buf_page_hash_lock_s_confirm()
and buf_page_hash_lock_x_confirm().
buf_pool_t::READ_AHEAD_PAGES: Renamed from BUF_READ_AHEAD_PAGES.
buf_pool_t::curr_size, old_size, read_ahead_area, n_pend_reads:
Use Atomic_counter.
buf_pool_t::running_out(): Replaces buf_LRU_buf_pool_running_out().
buf_pool_t::LRU_remove(): Remove a block from the LRU list
and return its predecessor. Incorporates buf_LRU_adjust_hp(),
which was removed.
buf_page_get_gen(): Remove a redundant call of fsp_is_system_temporary(),
for mode == BUF_GET_IF_IN_POOL_OR_WATCH, which is only used by
BTR_DELETE_OP (purge), which is never invoked on temporary tables.
buf_free_from_unzip_LRU_list_batch(): Avoid redundant assignments.
buf_LRU_free_from_unzip_LRU_list(): Simplify the loop condition.
buf_LRU_free_page(): Clarify the function comment.
buf_flush_check_neighbor(), buf_flush_check_neighbors():
Rewrite the construction of the page hash range. We will hold
the buf_pool.mutex for up to buf_pool.read_ahead_area (at most 64)
consecutive lookups of buf_pool.page_hash.
buf_flush_page_and_try_neighbors(): Remove.
Merge to its only callers, and remove redundant operations in
buf_flush_LRU_list_batch().
buf_read_ahead_random(), buf_read_ahead_linear(): Rewrite.
Do not acquire buf_pool.mutex, and iterate directly with page_id_t.
ut_2_power_up(): Remove. my_round_up_to_next_power() is inlined
and avoids any loops.
fil_page_get_prev(), fil_page_get_next(), fil_addr_is_null(): Remove.
buf_flush_page(): Add a fil_space_t* parameter. Minimize the
buf_pool.mutex hold time. buf_pool.n_flush[] is no longer updated
atomically with the io_fix, and we will protect most buf_block_t
fields with buf_block_t::lock. The function
buf_flush_write_block_low() is removed and merged here.
buf_page_init_for_read(): Use static linkage. Initialize the newly
allocated block and acquire the exclusive buf_block_t::lock while not
holding any mutex.
IORequest::IORequest(): Remove the body. We only need to invoke
set_punch_hole() in buf_flush_page() and nowhere else.
buf_page_t::flush_type: Remove. Replaced by IORequest::flush_type.
This field is only used during a fil_io() call.
That function already takes IORequest as a parameter, so we had
better introduce for the rarely changing field.
buf_block_t::init(): Replaces buf_page_init().
buf_page_t::init(): Replaces buf_page_init_low().
buf_block_t::initialise(): Initialise many fields, but
keep the buf_page_t::state(). Both buf_pool_t::validate() and
buf_page_optimistic_get() requires that buf_page_t::in_file()
be protected atomically with buf_page_t::in_page_hash
and buf_page_t::in_LRU_list.
buf_page_optimistic_get(): Now that buf_block_t::mutex
no longer exists, we must check buf_page_t::io_fix()
after acquiring the buf_pool.page_hash lock, to detect
whether buf_page_init_for_read() has been initiated.
We will also check the io_fix() before acquiring hash_lock
in order to avoid unnecessary computation.
The field buf_block_t::modify_clock (protected by buf_block_t::lock)
allows buf_page_optimistic_get() to validate the block.
buf_page_t::real_size: Remove. It was only used while flushing
pages of page_compressed tables.
buf_page_encrypt(): Add an output parameter that allows us ot eliminate
buf_page_t::real_size. Replace a condition with debug assertion.
buf_page_should_punch_hole(): Remove.
buf_dblwr_t::add_to_batch(): Replaces buf_dblwr_add_to_batch().
Add the parameter size (to replace buf_page_t::real_size).
buf_dblwr_t::write_single_page(): Replaces buf_dblwr_write_single_page().
Add the parameter size (to replace buf_page_t::real_size).
fil_system_t::detach(): Replaces fil_space_detach().
Ensure that fil_validate() will not be violated even if
fil_system.mutex is released and reacquired.
fil_node_t::complete_io(): Renamed from fil_node_complete_io().
fil_node_t::close_to_free(): Replaces fil_node_close_to_free().
Avoid invoking fil_node_t::close() because fil_system.n_open
has already been decremented in fil_space_t::detach().
BUF_BLOCK_READY_FOR_USE: Remove. Directly use BUF_BLOCK_MEMORY.
BUF_BLOCK_ZIP_DIRTY: Remove. Directly use BUF_BLOCK_ZIP_PAGE,
and distinguish dirty pages by buf_page_t::oldest_modification().
BUF_BLOCK_POOL_WATCH: Remove. Use BUF_BLOCK_NOT_USED instead.
This state was only being used for buf_page_t that are in
buf_pool.watch.
buf_pool_t::watch[]: Remove pointer indirection.
buf_page_t::in_flush_list: Remove. It was set if and only if
buf_page_t::oldest_modification() is nonzero.
buf_page_decrypt_after_read(), buf_corrupt_page_release(),
buf_page_check_corrupt(): Change the const fil_space_t* parameter
to const fil_node_t& so that we can report the correct file name.
buf_page_monitor(): Declare as an ATTRIBUTE_COLD global function.
buf_page_io_complete(): Split to buf_page_read_complete() and
buf_page_write_complete().
buf_dblwr_t::in_use: Remove.
buf_dblwr_t::buf_block_array: Add IORequest::flush_t.
buf_dblwr_sync_datafiles(): Remove. It was a useless wrapper of
os_aio_wait_until_no_pending_writes().
buf_flush_write_complete(): Declare static, not global.
Add the parameter IORequest::flush_t.
buf_flush_freed_page(): Simplify the code.
recv_sys_t::flush_lru: Renamed from flush_type and changed to bool.
fil_read(), fil_write(): Replaced with direct use of fil_io().
fil_buffering_disabled(): Remove. Check srv_file_flush_method directly.
fil_mutex_enter_and_prepare_for_io(): Return the resolved
fil_space_t* to avoid a duplicated lookup in the caller.
fil_report_invalid_page_access(): Clean up the parameters.
fil_io(): Return fil_io_t, which comprises fil_node_t and error code.
Always invoke fil_space_t::acquire_for_io() and let either the
sync=true caller or fil_aio_callback() invoke
fil_space_t::release_for_io().
fil_aio_callback(): Rewrite to replace buf_page_io_complete().
fil_check_pending_operations(): Remove a parameter, and remove some
redundant lookups.
fil_node_close_to_free(): Wait for n_pending==0. Because we no longer
do an extra lookup of the tablespace between fil_io() and the
completion of the operation, we must give fil_node_t::complete_io() a
chance to decrement the counter.
fil_close_tablespace(): Remove unused parameter trx, and document
that this is only invoked during the error handling of IMPORT TABLESPACE.
row_import_discard_changes(): Merged with the only caller,
row_import_cleanup(). Do not lock up the data dictionary while
invoking fil_close_tablespace().
logs_empty_and_mark_files_at_shutdown(): Do not invoke
fil_close_all_files(), to avoid a !needs_flush assertion failure
on fil_node_t::close().
innodb_shutdown(): Invoke os_aio_free() before fil_close_all_files().
fil_close_all_files(): Invoke fil_flush_file_spaces()
to ensure proper durability.
thread_pool::unbind(): Fix a crash that would occur on Windows
after srv_thread_pool->disable_aio() and os_file_close().
This fix was submitted by Vladislav Vaintroub.
Thanks to Matthias Leich and Axel Schwenke for extensive testing,
Vladislav Vaintroub for helpful comments, and Eugene Kosov for a review.
2020-06-05 12:35:46 +03:00
|
|
|
purge_sys.page_no = rec2_page->page.id().page_no();
|
2018-02-22 09:30:41 +02:00
|
|
|
purge_sys.tail.undo_no = trx_undo_rec_get_undo_no(rec2);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2019-12-03 10:19:45 +02:00
|
|
|
if (undo_page != rec2_page) {
|
2014-02-26 19:11:54 +01:00
|
|
|
/* We advance to a new page of the undo log: */
|
|
|
|
(*n_pages_handled)++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-12-03 10:19:45 +02:00
|
|
|
trx_undo_rec_t* rec_copy = trx_undo_rec_copy(undo_page->frame + offset,
|
|
|
|
heap);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
mtr_commit(&mtr);
|
|
|
|
|
|
|
|
return(rec_copy);
|
|
|
|
}
|
|
|
|
|
|
|
|
/********************************************************************//**
|
|
|
|
Fetches the next undo log record from the history list to purge. It must be
|
|
|
|
released with the corresponding release function.
|
|
|
|
@return copy of an undo log record or pointer to trx_purge_dummy_rec,
|
|
|
|
if the whole undo log can skipped in purge; NULL if none left */
|
2016-09-06 09:43:16 +03:00
|
|
|
static MY_ATTRIBUTE((warn_unused_result))
|
2014-02-26 19:11:54 +01:00
|
|
|
trx_undo_rec_t*
|
|
|
|
trx_purge_fetch_next_rec(
|
|
|
|
/*=====================*/
|
|
|
|
roll_ptr_t* roll_ptr, /*!< out: roll pointer to undo record */
|
|
|
|
ulint* n_pages_handled,/*!< in/out: number of UNDO log pages
|
|
|
|
handled */
|
|
|
|
mem_heap_t* heap) /*!< in: memory heap where copied */
|
|
|
|
{
|
2018-02-22 09:30:41 +02:00
|
|
|
if (!purge_sys.next_stored) {
|
2014-02-26 19:11:54 +01:00
|
|
|
trx_purge_choose_next_log();
|
|
|
|
|
2018-02-22 09:30:41 +02:00
|
|
|
if (!purge_sys.next_stored) {
|
2016-08-12 11:17:45 +03:00
|
|
|
DBUG_PRINT("ib_purge",
|
|
|
|
("no logs left in the history list"));
|
2014-02-26 19:11:54 +01:00
|
|
|
return(NULL);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-06-21 14:22:22 +03:00
|
|
|
if (purge_sys.tail.trx_no >= purge_sys.low_limit_no()) {
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
return(NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* fprintf(stderr, "Thread %lu purging trx %llu undo record %llu\n",
|
|
|
|
os_thread_get_curr_id(), iter->trx_no, iter->undo_no); */
|
|
|
|
|
|
|
|
*roll_ptr = trx_undo_build_roll_ptr(
|
2017-08-15 17:18:55 +03:00
|
|
|
/* row_purge_record_func() will later set
|
|
|
|
ROLL_PTR_INSERT_FLAG for TRX_UNDO_INSERT_REC */
|
|
|
|
false,
|
2018-02-22 09:30:41 +02:00
|
|
|
purge_sys.rseg->id,
|
|
|
|
purge_sys.page_no, purge_sys.offset);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/* The following call will advance the stored values of the
|
|
|
|
purge iterator. */
|
|
|
|
|
|
|
|
return(trx_purge_get_next_rec(n_pages_handled, heap));
|
|
|
|
}
|
|
|
|
|
2018-02-22 09:18:53 +02:00
|
|
|
/** Run a purge batch.
|
|
|
|
@param n_purge_threads number of purge threads
|
2016-08-12 11:17:45 +03:00
|
|
|
@return number of undo log pages handled in the batch */
|
2014-02-26 19:11:54 +01:00
|
|
|
static
|
|
|
|
ulint
|
2018-02-22 09:18:53 +02:00
|
|
|
trx_purge_attach_undo_recs(ulint n_purge_threads)
|
2014-02-26 19:11:54 +01:00
|
|
|
{
|
|
|
|
que_thr_t* thr;
|
2019-03-11 17:18:37 +02:00
|
|
|
ulint i;
|
2014-02-26 19:11:54 +01:00
|
|
|
ulint n_pages_handled = 0;
|
2018-02-22 09:30:41 +02:00
|
|
|
ulint n_thrs = UT_LIST_GET_LEN(purge_sys.query->thrs);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
ut_a(n_purge_threads > 0);
|
|
|
|
|
2018-02-22 09:30:41 +02:00
|
|
|
purge_sys.head = purge_sys.tail;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2019-03-11 17:18:37 +02:00
|
|
|
#ifdef UNIV_DEBUG
|
|
|
|
i = 0;
|
2014-02-26 19:11:54 +01:00
|
|
|
/* Debug code to validate some pre-requisites and reset done flag. */
|
2018-02-22 09:30:41 +02:00
|
|
|
for (thr = UT_LIST_GET_FIRST(purge_sys.query->thrs);
|
2014-02-26 19:11:54 +01:00
|
|
|
thr != NULL && i < n_purge_threads;
|
|
|
|
thr = UT_LIST_GET_NEXT(thrs, thr), ++i) {
|
|
|
|
|
|
|
|
purge_node_t* node;
|
|
|
|
|
|
|
|
/* Get the purge node. */
|
|
|
|
node = (purge_node_t*) thr->child;
|
|
|
|
|
2019-03-11 17:18:37 +02:00
|
|
|
ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);
|
MDEV-16678 Prefer MDL to dict_sys.latch for innodb background tasks
This is joint work with Thirunarayanan Balathandayuthapani.
The MDL interface between InnoDB and the rest of the server
(in storage/innobase/dict/dict0dict.cc and in include/)
is my work, while most everything else is Thiru's.
The collection of InnoDB persistent statistics and the
defragmentation were not refactored to use MDL. They will
keep relying on lower-level interlocking with
fil_check_pending_operations().
The purge of transaction history and the background operations on
fulltext indexes will use MDL. We will revert
commit 2c4844c9e76427525e8c39a2d72686085efe89c3
(MDEV-17813) because thanks to MDL, purge cannot conflict
with DDL operations anymore. For a similar reason, we will remove
the MDEV-16222 test case from gcol.innodb_virtual_debug_purge.
Purge is essentially replacing all use of the global dict_sys.latch
with MDL. Purge will skip the undo log records for tables whose names
start with #sql-ib or #sql2. Theoretically, such tables might
be renamed back to visible table names if TRUNCATE fails to
create a new table, or the final rename in ALTER TABLE...ALGORITHM=COPY
fails. In that case, purge could permanently leave some garbage
in the table. Such garbage will be tolerated; the table would not
be considered corrupted.
To avoid repeated MDL releases and acquisitions,
trx_purge_attach_undo_recs() will sort undo log records by table_id,
and purge_node_t will keep the MDL and table handle open for multiple
successive undo log records.
get_purge_table(): A new accessor, used during the purge of
history for indexed virtual columns. This interface should ideally
not exist at all.
thd_mdl_context(): Accessor of THD::mdl_context.
Wrapped in a new thd_mdl_service.
dict_get_db_name_len(): Define inline.
dict_acquire_mdl_shared(): Acquire explicit shared MDL on a table name
if needed.
dict_table_open_on_id(): Return MDL_ticket, if requested.
dict_table_close(): Release MDL ticket, if requested.
dict_fts_index_syncing(), dict_index_t::index_fts_syncing: Remove.
row_drop_table_for_mysql() no longer needs to check these, because
MDL guarantees that a fulltext index sync will not be in progress
while MDL_EXCLUSIVE is protecting a DDL operation.
dict_table_t::parse_name(): Parse the table name for acquiring MDL.
purge_node_t::undo_recs: Change the type to std::list<trx_purge_rec_t*>
(different container, and storing also roll_ptr).
purge_node_t: Add mdl_ticket, last_table_id, purge_thd, mdl_hold_recs
for acquiring MDL and for keeping the table open across multiple
undo log records.
purge_vcol_info_t, row_purge_store_vsec_cur(), row_purge_restore_vsec_cur():
Remove. We will acquire the MDL earlier.
purge_sys_t::heap: Added, for reading undo log records.
fts_sync_during_ddl(): Invoked during ALGORITHM=INPLACE operations
to ensure that fts_sync_table() will not conflict with MDL_EXCLUSIVE.
Uses fts_t::sync_message for bookkeeping.
2019-12-10 15:42:50 +02:00
|
|
|
ut_ad(node->undo_recs.empty());
|
2019-03-11 17:18:37 +02:00
|
|
|
ut_ad(!node->in_progress);
|
|
|
|
ut_d(node->in_progress = true);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* There should never be fewer nodes than threads, the inverse
|
|
|
|
however is allowed because we only use purge threads as needed. */
|
2019-03-11 17:18:37 +02:00
|
|
|
ut_ad(i == n_purge_threads);
|
|
|
|
#endif
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/* Fetch and parse the UNDO records. The UNDO records are added
|
|
|
|
to a per purge node vector. */
|
2018-02-22 09:30:41 +02:00
|
|
|
thr = UT_LIST_GET_FIRST(purge_sys.query->thrs);
|
2014-02-26 19:11:54 +01:00
|
|
|
ut_a(n_thrs > 0 && thr != NULL);
|
|
|
|
|
2018-02-22 09:30:41 +02:00
|
|
|
ut_ad(purge_sys.head <= purge_sys.tail);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
i = 0;
|
|
|
|
|
MDEV-16678 Prefer MDL to dict_sys.latch for innodb background tasks
This is joint work with Thirunarayanan Balathandayuthapani.
The MDL interface between InnoDB and the rest of the server
(in storage/innobase/dict/dict0dict.cc and in include/)
is my work, while most everything else is Thiru's.
The collection of InnoDB persistent statistics and the
defragmentation were not refactored to use MDL. They will
keep relying on lower-level interlocking with
fil_check_pending_operations().
The purge of transaction history and the background operations on
fulltext indexes will use MDL. We will revert
commit 2c4844c9e76427525e8c39a2d72686085efe89c3
(MDEV-17813) because thanks to MDL, purge cannot conflict
with DDL operations anymore. For a similar reason, we will remove
the MDEV-16222 test case from gcol.innodb_virtual_debug_purge.
Purge is essentially replacing all use of the global dict_sys.latch
with MDL. Purge will skip the undo log records for tables whose names
start with #sql-ib or #sql2. Theoretically, such tables might
be renamed back to visible table names if TRUNCATE fails to
create a new table, or the final rename in ALTER TABLE...ALGORITHM=COPY
fails. In that case, purge could permanently leave some garbage
in the table. Such garbage will be tolerated; the table would not
be considered corrupted.
To avoid repeated MDL releases and acquisitions,
trx_purge_attach_undo_recs() will sort undo log records by table_id,
and purge_node_t will keep the MDL and table handle open for multiple
successive undo log records.
get_purge_table(): A new accessor, used during the purge of
history for indexed virtual columns. This interface should ideally
not exist at all.
thd_mdl_context(): Accessor of THD::mdl_context.
Wrapped in a new thd_mdl_service.
dict_get_db_name_len(): Define inline.
dict_acquire_mdl_shared(): Acquire explicit shared MDL on a table name
if needed.
dict_table_open_on_id(): Return MDL_ticket, if requested.
dict_table_close(): Release MDL ticket, if requested.
dict_fts_index_syncing(), dict_index_t::index_fts_syncing: Remove.
row_drop_table_for_mysql() no longer needs to check these, because
MDL guarantees that a fulltext index sync will not be in progress
while MDL_EXCLUSIVE is protecting a DDL operation.
dict_table_t::parse_name(): Parse the table name for acquiring MDL.
purge_node_t::undo_recs: Change the type to std::list<trx_purge_rec_t*>
(different container, and storing also roll_ptr).
purge_node_t: Add mdl_ticket, last_table_id, purge_thd, mdl_hold_recs
for acquiring MDL and for keeping the table open across multiple
undo log records.
purge_vcol_info_t, row_purge_store_vsec_cur(), row_purge_restore_vsec_cur():
Remove. We will acquire the MDL earlier.
purge_sys_t::heap: Added, for reading undo log records.
fts_sync_during_ddl(): Invoked during ALGORITHM=INPLACE operations
to ensure that fts_sync_table() will not conflict with MDL_EXCLUSIVE.
Uses fts_t::sync_message for bookkeeping.
2019-12-10 15:42:50 +02:00
|
|
|
const ulint batch_size = srv_purge_batch_size;
|
2020-05-05 13:24:58 +03:00
|
|
|
std::unordered_map<table_id_t, purge_node_t*> table_id_map;
|
MDEV-16678 Prefer MDL to dict_sys.latch for innodb background tasks
This is joint work with Thirunarayanan Balathandayuthapani.
The MDL interface between InnoDB and the rest of the server
(in storage/innobase/dict/dict0dict.cc and in include/)
is my work, while most everything else is Thiru's.
The collection of InnoDB persistent statistics and the
defragmentation were not refactored to use MDL. They will
keep relying on lower-level interlocking with
fil_check_pending_operations().
The purge of transaction history and the background operations on
fulltext indexes will use MDL. We will revert
commit 2c4844c9e76427525e8c39a2d72686085efe89c3
(MDEV-17813) because thanks to MDL, purge cannot conflict
with DDL operations anymore. For a similar reason, we will remove
the MDEV-16222 test case from gcol.innodb_virtual_debug_purge.
Purge is essentially replacing all use of the global dict_sys.latch
with MDL. Purge will skip the undo log records for tables whose names
start with #sql-ib or #sql2. Theoretically, such tables might
be renamed back to visible table names if TRUNCATE fails to
create a new table, or the final rename in ALTER TABLE...ALGORITHM=COPY
fails. In that case, purge could permanently leave some garbage
in the table. Such garbage will be tolerated; the table would not
be considered corrupted.
To avoid repeated MDL releases and acquisitions,
trx_purge_attach_undo_recs() will sort undo log records by table_id,
and purge_node_t will keep the MDL and table handle open for multiple
successive undo log records.
get_purge_table(): A new accessor, used during the purge of
history for indexed virtual columns. This interface should ideally
not exist at all.
thd_mdl_context(): Accessor of THD::mdl_context.
Wrapped in a new thd_mdl_service.
dict_get_db_name_len(): Define inline.
dict_acquire_mdl_shared(): Acquire explicit shared MDL on a table name
if needed.
dict_table_open_on_id(): Return MDL_ticket, if requested.
dict_table_close(): Release MDL ticket, if requested.
dict_fts_index_syncing(), dict_index_t::index_fts_syncing: Remove.
row_drop_table_for_mysql() no longer needs to check these, because
MDL guarantees that a fulltext index sync will not be in progress
while MDL_EXCLUSIVE is protecting a DDL operation.
dict_table_t::parse_name(): Parse the table name for acquiring MDL.
purge_node_t::undo_recs: Change the type to std::list<trx_purge_rec_t*>
(different container, and storing also roll_ptr).
purge_node_t: Add mdl_ticket, last_table_id, purge_thd, mdl_hold_recs
for acquiring MDL and for keeping the table open across multiple
undo log records.
purge_vcol_info_t, row_purge_store_vsec_cur(), row_purge_restore_vsec_cur():
Remove. We will acquire the MDL earlier.
purge_sys_t::heap: Added, for reading undo log records.
fts_sync_during_ddl(): Invoked during ALGORITHM=INPLACE operations
to ensure that fts_sync_table() will not conflict with MDL_EXCLUSIVE.
Uses fts_t::sync_message for bookkeeping.
2019-12-10 15:42:50 +02:00
|
|
|
mem_heap_empty(purge_sys.heap);
|
2018-02-22 09:18:53 +02:00
|
|
|
|
2019-03-15 12:09:46 +02:00
|
|
|
while (UNIV_LIKELY(srv_undo_sources) || !srv_fast_shutdown) {
|
2014-02-26 19:11:54 +01:00
|
|
|
purge_node_t* node;
|
2019-12-11 23:32:50 +07:00
|
|
|
trx_purge_rec_t purge_rec;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
ut_a(!thr->is_active);
|
|
|
|
|
|
|
|
/* Get the purge node. */
|
|
|
|
node = (purge_node_t*) thr->child;
|
|
|
|
ut_a(que_node_get_type(node) == QUE_NODE_PURGE);
|
|
|
|
|
|
|
|
/* Track the max {trx_id, undo_no} for truncating the
|
|
|
|
UNDO logs once we have purged the records. */
|
|
|
|
|
2018-02-22 09:30:41 +02:00
|
|
|
if (purge_sys.head <= purge_sys.tail) {
|
|
|
|
purge_sys.head = purge_sys.tail;
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
2018-02-22 09:30:41 +02:00
|
|
|
/* Fetch the next record, and advance the purge_sys.tail. */
|
2019-12-11 23:32:50 +07:00
|
|
|
purge_rec.undo_rec = trx_purge_fetch_next_rec(
|
|
|
|
&purge_rec.roll_ptr, &n_pages_handled,
|
MDEV-16678 Prefer MDL to dict_sys.latch for innodb background tasks
This is joint work with Thirunarayanan Balathandayuthapani.
The MDL interface between InnoDB and the rest of the server
(in storage/innobase/dict/dict0dict.cc and in include/)
is my work, while most everything else is Thiru's.
The collection of InnoDB persistent statistics and the
defragmentation were not refactored to use MDL. They will
keep relying on lower-level interlocking with
fil_check_pending_operations().
The purge of transaction history and the background operations on
fulltext indexes will use MDL. We will revert
commit 2c4844c9e76427525e8c39a2d72686085efe89c3
(MDEV-17813) because thanks to MDL, purge cannot conflict
with DDL operations anymore. For a similar reason, we will remove
the MDEV-16222 test case from gcol.innodb_virtual_debug_purge.
Purge is essentially replacing all use of the global dict_sys.latch
with MDL. Purge will skip the undo log records for tables whose names
start with #sql-ib or #sql2. Theoretically, such tables might
be renamed back to visible table names if TRUNCATE fails to
create a new table, or the final rename in ALTER TABLE...ALGORITHM=COPY
fails. In that case, purge could permanently leave some garbage
in the table. Such garbage will be tolerated; the table would not
be considered corrupted.
To avoid repeated MDL releases and acquisitions,
trx_purge_attach_undo_recs() will sort undo log records by table_id,
and purge_node_t will keep the MDL and table handle open for multiple
successive undo log records.
get_purge_table(): A new accessor, used during the purge of
history for indexed virtual columns. This interface should ideally
not exist at all.
thd_mdl_context(): Accessor of THD::mdl_context.
Wrapped in a new thd_mdl_service.
dict_get_db_name_len(): Define inline.
dict_acquire_mdl_shared(): Acquire explicit shared MDL on a table name
if needed.
dict_table_open_on_id(): Return MDL_ticket, if requested.
dict_table_close(): Release MDL ticket, if requested.
dict_fts_index_syncing(), dict_index_t::index_fts_syncing: Remove.
row_drop_table_for_mysql() no longer needs to check these, because
MDL guarantees that a fulltext index sync will not be in progress
while MDL_EXCLUSIVE is protecting a DDL operation.
dict_table_t::parse_name(): Parse the table name for acquiring MDL.
purge_node_t::undo_recs: Change the type to std::list<trx_purge_rec_t*>
(different container, and storing also roll_ptr).
purge_node_t: Add mdl_ticket, last_table_id, purge_thd, mdl_hold_recs
for acquiring MDL and for keeping the table open across multiple
undo log records.
purge_vcol_info_t, row_purge_store_vsec_cur(), row_purge_restore_vsec_cur():
Remove. We will acquire the MDL earlier.
purge_sys_t::heap: Added, for reading undo log records.
fts_sync_during_ddl(): Invoked during ALGORITHM=INPLACE operations
to ensure that fts_sync_table() will not conflict with MDL_EXCLUSIVE.
Uses fts_t::sync_message for bookkeeping.
2019-12-10 15:42:50 +02:00
|
|
|
purge_sys.heap);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2019-12-11 23:32:50 +07:00
|
|
|
if (purge_rec.undo_rec == NULL) {
|
MDEV-16678 Prefer MDL to dict_sys.latch for innodb background tasks
This is joint work with Thirunarayanan Balathandayuthapani.
The MDL interface between InnoDB and the rest of the server
(in storage/innobase/dict/dict0dict.cc and in include/)
is my work, while most everything else is Thiru's.
The collection of InnoDB persistent statistics and the
defragmentation were not refactored to use MDL. They will
keep relying on lower-level interlocking with
fil_check_pending_operations().
The purge of transaction history and the background operations on
fulltext indexes will use MDL. We will revert
commit 2c4844c9e76427525e8c39a2d72686085efe89c3
(MDEV-17813) because thanks to MDL, purge cannot conflict
with DDL operations anymore. For a similar reason, we will remove
the MDEV-16222 test case from gcol.innodb_virtual_debug_purge.
Purge is essentially replacing all use of the global dict_sys.latch
with MDL. Purge will skip the undo log records for tables whose names
start with #sql-ib or #sql2. Theoretically, such tables might
be renamed back to visible table names if TRUNCATE fails to
create a new table, or the final rename in ALTER TABLE...ALGORITHM=COPY
fails. In that case, purge could permanently leave some garbage
in the table. Such garbage will be tolerated; the table would not
be considered corrupted.
To avoid repeated MDL releases and acquisitions,
trx_purge_attach_undo_recs() will sort undo log records by table_id,
and purge_node_t will keep the MDL and table handle open for multiple
successive undo log records.
get_purge_table(): A new accessor, used during the purge of
history for indexed virtual columns. This interface should ideally
not exist at all.
thd_mdl_context(): Accessor of THD::mdl_context.
Wrapped in a new thd_mdl_service.
dict_get_db_name_len(): Define inline.
dict_acquire_mdl_shared(): Acquire explicit shared MDL on a table name
if needed.
dict_table_open_on_id(): Return MDL_ticket, if requested.
dict_table_close(): Release MDL ticket, if requested.
dict_fts_index_syncing(), dict_index_t::index_fts_syncing: Remove.
row_drop_table_for_mysql() no longer needs to check these, because
MDL guarantees that a fulltext index sync will not be in progress
while MDL_EXCLUSIVE is protecting a DDL operation.
dict_table_t::parse_name(): Parse the table name for acquiring MDL.
purge_node_t::undo_recs: Change the type to std::list<trx_purge_rec_t*>
(different container, and storing also roll_ptr).
purge_node_t: Add mdl_ticket, last_table_id, purge_thd, mdl_hold_recs
for acquiring MDL and for keeping the table open across multiple
undo log records.
purge_vcol_info_t, row_purge_store_vsec_cur(), row_purge_restore_vsec_cur():
Remove. We will acquire the MDL earlier.
purge_sys_t::heap: Added, for reading undo log records.
fts_sync_during_ddl(): Invoked during ALGORITHM=INPLACE operations
to ensure that fts_sync_table() will not conflict with MDL_EXCLUSIVE.
Uses fts_t::sync_message for bookkeeping.
2019-12-10 15:42:50 +02:00
|
|
|
break;
|
2019-12-11 23:32:50 +07:00
|
|
|
} else if (purge_rec.undo_rec == &trx_purge_dummy_rec) {
|
MDEV-16678 Prefer MDL to dict_sys.latch for innodb background tasks
This is joint work with Thirunarayanan Balathandayuthapani.
The MDL interface between InnoDB and the rest of the server
(in storage/innobase/dict/dict0dict.cc and in include/)
is my work, while most everything else is Thiru's.
The collection of InnoDB persistent statistics and the
defragmentation were not refactored to use MDL. They will
keep relying on lower-level interlocking with
fil_check_pending_operations().
The purge of transaction history and the background operations on
fulltext indexes will use MDL. We will revert
commit 2c4844c9e76427525e8c39a2d72686085efe89c3
(MDEV-17813) because thanks to MDL, purge cannot conflict
with DDL operations anymore. For a similar reason, we will remove
the MDEV-16222 test case from gcol.innodb_virtual_debug_purge.
Purge is essentially replacing all use of the global dict_sys.latch
with MDL. Purge will skip the undo log records for tables whose names
start with #sql-ib or #sql2. Theoretically, such tables might
be renamed back to visible table names if TRUNCATE fails to
create a new table, or the final rename in ALTER TABLE...ALGORITHM=COPY
fails. In that case, purge could permanently leave some garbage
in the table. Such garbage will be tolerated; the table would not
be considered corrupted.
To avoid repeated MDL releases and acquisitions,
trx_purge_attach_undo_recs() will sort undo log records by table_id,
and purge_node_t will keep the MDL and table handle open for multiple
successive undo log records.
get_purge_table(): A new accessor, used during the purge of
history for indexed virtual columns. This interface should ideally
not exist at all.
thd_mdl_context(): Accessor of THD::mdl_context.
Wrapped in a new thd_mdl_service.
dict_get_db_name_len(): Define inline.
dict_acquire_mdl_shared(): Acquire explicit shared MDL on a table name
if needed.
dict_table_open_on_id(): Return MDL_ticket, if requested.
dict_table_close(): Release MDL ticket, if requested.
dict_fts_index_syncing(), dict_index_t::index_fts_syncing: Remove.
row_drop_table_for_mysql() no longer needs to check these, because
MDL guarantees that a fulltext index sync will not be in progress
while MDL_EXCLUSIVE is protecting a DDL operation.
dict_table_t::parse_name(): Parse the table name for acquiring MDL.
purge_node_t::undo_recs: Change the type to std::list<trx_purge_rec_t*>
(different container, and storing also roll_ptr).
purge_node_t: Add mdl_ticket, last_table_id, purge_thd, mdl_hold_recs
for acquiring MDL and for keeping the table open across multiple
undo log records.
purge_vcol_info_t, row_purge_store_vsec_cur(), row_purge_restore_vsec_cur():
Remove. We will acquire the MDL earlier.
purge_sys_t::heap: Added, for reading undo log records.
fts_sync_during_ddl(): Invoked during ALGORITHM=INPLACE operations
to ensure that fts_sync_table() will not conflict with MDL_EXCLUSIVE.
Uses fts_t::sync_message for bookkeeping.
2019-12-10 15:42:50 +02:00
|
|
|
continue;
|
|
|
|
}
|
2014-02-26 19:11:54 +01:00
|
|
|
|
MDEV-16678 Prefer MDL to dict_sys.latch for innodb background tasks
This is joint work with Thirunarayanan Balathandayuthapani.
The MDL interface between InnoDB and the rest of the server
(in storage/innobase/dict/dict0dict.cc and in include/)
is my work, while most everything else is Thiru's.
The collection of InnoDB persistent statistics and the
defragmentation were not refactored to use MDL. They will
keep relying on lower-level interlocking with
fil_check_pending_operations().
The purge of transaction history and the background operations on
fulltext indexes will use MDL. We will revert
commit 2c4844c9e76427525e8c39a2d72686085efe89c3
(MDEV-17813) because thanks to MDL, purge cannot conflict
with DDL operations anymore. For a similar reason, we will remove
the MDEV-16222 test case from gcol.innodb_virtual_debug_purge.
Purge is essentially replacing all use of the global dict_sys.latch
with MDL. Purge will skip the undo log records for tables whose names
start with #sql-ib or #sql2. Theoretically, such tables might
be renamed back to visible table names if TRUNCATE fails to
create a new table, or the final rename in ALTER TABLE...ALGORITHM=COPY
fails. In that case, purge could permanently leave some garbage
in the table. Such garbage will be tolerated; the table would not
be considered corrupted.
To avoid repeated MDL releases and acquisitions,
trx_purge_attach_undo_recs() will sort undo log records by table_id,
and purge_node_t will keep the MDL and table handle open for multiple
successive undo log records.
get_purge_table(): A new accessor, used during the purge of
history for indexed virtual columns. This interface should ideally
not exist at all.
thd_mdl_context(): Accessor of THD::mdl_context.
Wrapped in a new thd_mdl_service.
dict_get_db_name_len(): Define inline.
dict_acquire_mdl_shared(): Acquire explicit shared MDL on a table name
if needed.
dict_table_open_on_id(): Return MDL_ticket, if requested.
dict_table_close(): Release MDL ticket, if requested.
dict_fts_index_syncing(), dict_index_t::index_fts_syncing: Remove.
row_drop_table_for_mysql() no longer needs to check these, because
MDL guarantees that a fulltext index sync will not be in progress
while MDL_EXCLUSIVE is protecting a DDL operation.
dict_table_t::parse_name(): Parse the table name for acquiring MDL.
purge_node_t::undo_recs: Change the type to std::list<trx_purge_rec_t*>
(different container, and storing also roll_ptr).
purge_node_t: Add mdl_ticket, last_table_id, purge_thd, mdl_hold_recs
for acquiring MDL and for keeping the table open across multiple
undo log records.
purge_vcol_info_t, row_purge_store_vsec_cur(), row_purge_restore_vsec_cur():
Remove. We will acquire the MDL earlier.
purge_sys_t::heap: Added, for reading undo log records.
fts_sync_during_ddl(): Invoked during ALGORITHM=INPLACE operations
to ensure that fts_sync_table() will not conflict with MDL_EXCLUSIVE.
Uses fts_t::sync_message for bookkeeping.
2019-12-10 15:42:50 +02:00
|
|
|
table_id_t table_id = trx_undo_rec_get_table_id(
|
2019-12-11 23:32:50 +07:00
|
|
|
purge_rec.undo_rec);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2020-05-05 13:24:58 +03:00
|
|
|
purge_node_t *& table_node = table_id_map[table_id];
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2020-05-05 13:24:58 +03:00
|
|
|
if (table_node) {
|
|
|
|
node = table_node;
|
MDEV-16678 Prefer MDL to dict_sys.latch for innodb background tasks
This is joint work with Thirunarayanan Balathandayuthapani.
The MDL interface between InnoDB and the rest of the server
(in storage/innobase/dict/dict0dict.cc and in include/)
is my work, while most everything else is Thiru's.
The collection of InnoDB persistent statistics and the
defragmentation were not refactored to use MDL. They will
keep relying on lower-level interlocking with
fil_check_pending_operations().
The purge of transaction history and the background operations on
fulltext indexes will use MDL. We will revert
commit 2c4844c9e76427525e8c39a2d72686085efe89c3
(MDEV-17813) because thanks to MDL, purge cannot conflict
with DDL operations anymore. For a similar reason, we will remove
the MDEV-16222 test case from gcol.innodb_virtual_debug_purge.
Purge is essentially replacing all use of the global dict_sys.latch
with MDL. Purge will skip the undo log records for tables whose names
start with #sql-ib or #sql2. Theoretically, such tables might
be renamed back to visible table names if TRUNCATE fails to
create a new table, or the final rename in ALTER TABLE...ALGORITHM=COPY
fails. In that case, purge could permanently leave some garbage
in the table. Such garbage will be tolerated; the table would not
be considered corrupted.
To avoid repeated MDL releases and acquisitions,
trx_purge_attach_undo_recs() will sort undo log records by table_id,
and purge_node_t will keep the MDL and table handle open for multiple
successive undo log records.
get_purge_table(): A new accessor, used during the purge of
history for indexed virtual columns. This interface should ideally
not exist at all.
thd_mdl_context(): Accessor of THD::mdl_context.
Wrapped in a new thd_mdl_service.
dict_get_db_name_len(): Define inline.
dict_acquire_mdl_shared(): Acquire explicit shared MDL on a table name
if needed.
dict_table_open_on_id(): Return MDL_ticket, if requested.
dict_table_close(): Release MDL ticket, if requested.
dict_fts_index_syncing(), dict_index_t::index_fts_syncing: Remove.
row_drop_table_for_mysql() no longer needs to check these, because
MDL guarantees that a fulltext index sync will not be in progress
while MDL_EXCLUSIVE is protecting a DDL operation.
dict_table_t::parse_name(): Parse the table name for acquiring MDL.
purge_node_t::undo_recs: Change the type to std::list<trx_purge_rec_t*>
(different container, and storing also roll_ptr).
purge_node_t: Add mdl_ticket, last_table_id, purge_thd, mdl_hold_recs
for acquiring MDL and for keeping the table open across multiple
undo log records.
purge_vcol_info_t, row_purge_store_vsec_cur(), row_purge_restore_vsec_cur():
Remove. We will acquire the MDL earlier.
purge_sys_t::heap: Added, for reading undo log records.
fts_sync_during_ddl(): Invoked during ALGORITHM=INPLACE operations
to ensure that fts_sync_table() will not conflict with MDL_EXCLUSIVE.
Uses fts_t::sync_message for bookkeeping.
2019-12-10 15:42:50 +02:00
|
|
|
} else {
|
|
|
|
thr = UT_LIST_GET_NEXT(thrs, thr);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
MDEV-16678 Prefer MDL to dict_sys.latch for innodb background tasks
This is joint work with Thirunarayanan Balathandayuthapani.
The MDL interface between InnoDB and the rest of the server
(in storage/innobase/dict/dict0dict.cc and in include/)
is my work, while most everything else is Thiru's.
The collection of InnoDB persistent statistics and the
defragmentation were not refactored to use MDL. They will
keep relying on lower-level interlocking with
fil_check_pending_operations().
The purge of transaction history and the background operations on
fulltext indexes will use MDL. We will revert
commit 2c4844c9e76427525e8c39a2d72686085efe89c3
(MDEV-17813) because thanks to MDL, purge cannot conflict
with DDL operations anymore. For a similar reason, we will remove
the MDEV-16222 test case from gcol.innodb_virtual_debug_purge.
Purge is essentially replacing all use of the global dict_sys.latch
with MDL. Purge will skip the undo log records for tables whose names
start with #sql-ib or #sql2. Theoretically, such tables might
be renamed back to visible table names if TRUNCATE fails to
create a new table, or the final rename in ALTER TABLE...ALGORITHM=COPY
fails. In that case, purge could permanently leave some garbage
in the table. Such garbage will be tolerated; the table would not
be considered corrupted.
To avoid repeated MDL releases and acquisitions,
trx_purge_attach_undo_recs() will sort undo log records by table_id,
and purge_node_t will keep the MDL and table handle open for multiple
successive undo log records.
get_purge_table(): A new accessor, used during the purge of
history for indexed virtual columns. This interface should ideally
not exist at all.
thd_mdl_context(): Accessor of THD::mdl_context.
Wrapped in a new thd_mdl_service.
dict_get_db_name_len(): Define inline.
dict_acquire_mdl_shared(): Acquire explicit shared MDL on a table name
if needed.
dict_table_open_on_id(): Return MDL_ticket, if requested.
dict_table_close(): Release MDL ticket, if requested.
dict_fts_index_syncing(), dict_index_t::index_fts_syncing: Remove.
row_drop_table_for_mysql() no longer needs to check these, because
MDL guarantees that a fulltext index sync will not be in progress
while MDL_EXCLUSIVE is protecting a DDL operation.
dict_table_t::parse_name(): Parse the table name for acquiring MDL.
purge_node_t::undo_recs: Change the type to std::list<trx_purge_rec_t*>
(different container, and storing also roll_ptr).
purge_node_t: Add mdl_ticket, last_table_id, purge_thd, mdl_hold_recs
for acquiring MDL and for keeping the table open across multiple
undo log records.
purge_vcol_info_t, row_purge_store_vsec_cur(), row_purge_restore_vsec_cur():
Remove. We will acquire the MDL earlier.
purge_sys_t::heap: Added, for reading undo log records.
fts_sync_during_ddl(): Invoked during ALGORITHM=INPLACE operations
to ensure that fts_sync_table() will not conflict with MDL_EXCLUSIVE.
Uses fts_t::sync_message for bookkeeping.
2019-12-10 15:42:50 +02:00
|
|
|
if (!(++i % n_purge_threads)) {
|
|
|
|
thr = UT_LIST_GET_FIRST(
|
|
|
|
purge_sys.query->thrs);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
MDEV-16678 Prefer MDL to dict_sys.latch for innodb background tasks
This is joint work with Thirunarayanan Balathandayuthapani.
The MDL interface between InnoDB and the rest of the server
(in storage/innobase/dict/dict0dict.cc and in include/)
is my work, while most everything else is Thiru's.
The collection of InnoDB persistent statistics and the
defragmentation were not refactored to use MDL. They will
keep relying on lower-level interlocking with
fil_check_pending_operations().
The purge of transaction history and the background operations on
fulltext indexes will use MDL. We will revert
commit 2c4844c9e76427525e8c39a2d72686085efe89c3
(MDEV-17813) because thanks to MDL, purge cannot conflict
with DDL operations anymore. For a similar reason, we will remove
the MDEV-16222 test case from gcol.innodb_virtual_debug_purge.
Purge is essentially replacing all use of the global dict_sys.latch
with MDL. Purge will skip the undo log records for tables whose names
start with #sql-ib or #sql2. Theoretically, such tables might
be renamed back to visible table names if TRUNCATE fails to
create a new table, or the final rename in ALTER TABLE...ALGORITHM=COPY
fails. In that case, purge could permanently leave some garbage
in the table. Such garbage will be tolerated; the table would not
be considered corrupted.
To avoid repeated MDL releases and acquisitions,
trx_purge_attach_undo_recs() will sort undo log records by table_id,
and purge_node_t will keep the MDL and table handle open for multiple
successive undo log records.
get_purge_table(): A new accessor, used during the purge of
history for indexed virtual columns. This interface should ideally
not exist at all.
thd_mdl_context(): Accessor of THD::mdl_context.
Wrapped in a new thd_mdl_service.
dict_get_db_name_len(): Define inline.
dict_acquire_mdl_shared(): Acquire explicit shared MDL on a table name
if needed.
dict_table_open_on_id(): Return MDL_ticket, if requested.
dict_table_close(): Release MDL ticket, if requested.
dict_fts_index_syncing(), dict_index_t::index_fts_syncing: Remove.
row_drop_table_for_mysql() no longer needs to check these, because
MDL guarantees that a fulltext index sync will not be in progress
while MDL_EXCLUSIVE is protecting a DDL operation.
dict_table_t::parse_name(): Parse the table name for acquiring MDL.
purge_node_t::undo_recs: Change the type to std::list<trx_purge_rec_t*>
(different container, and storing also roll_ptr).
purge_node_t: Add mdl_ticket, last_table_id, purge_thd, mdl_hold_recs
for acquiring MDL and for keeping the table open across multiple
undo log records.
purge_vcol_info_t, row_purge_store_vsec_cur(), row_purge_restore_vsec_cur():
Remove. We will acquire the MDL earlier.
purge_sys_t::heap: Added, for reading undo log records.
fts_sync_during_ddl(): Invoked during ALGORITHM=INPLACE operations
to ensure that fts_sync_table() will not conflict with MDL_EXCLUSIVE.
Uses fts_t::sync_message for bookkeeping.
2019-12-10 15:42:50 +02:00
|
|
|
|
|
|
|
ut_a(thr != NULL);
|
2020-05-05 13:24:58 +03:00
|
|
|
table_node = node;
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
2019-12-11 23:32:50 +07:00
|
|
|
node->undo_recs.push(purge_rec);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
MDEV-16678 Prefer MDL to dict_sys.latch for innodb background tasks
This is joint work with Thirunarayanan Balathandayuthapani.
The MDL interface between InnoDB and the rest of the server
(in storage/innobase/dict/dict0dict.cc and in include/)
is my work, while most everything else is Thiru's.
The collection of InnoDB persistent statistics and the
defragmentation were not refactored to use MDL. They will
keep relying on lower-level interlocking with
fil_check_pending_operations().
The purge of transaction history and the background operations on
fulltext indexes will use MDL. We will revert
commit 2c4844c9e76427525e8c39a2d72686085efe89c3
(MDEV-17813) because thanks to MDL, purge cannot conflict
with DDL operations anymore. For a similar reason, we will remove
the MDEV-16222 test case from gcol.innodb_virtual_debug_purge.
Purge is essentially replacing all use of the global dict_sys.latch
with MDL. Purge will skip the undo log records for tables whose names
start with #sql-ib or #sql2. Theoretically, such tables might
be renamed back to visible table names if TRUNCATE fails to
create a new table, or the final rename in ALTER TABLE...ALGORITHM=COPY
fails. In that case, purge could permanently leave some garbage
in the table. Such garbage will be tolerated; the table would not
be considered corrupted.
To avoid repeated MDL releases and acquisitions,
trx_purge_attach_undo_recs() will sort undo log records by table_id,
and purge_node_t will keep the MDL and table handle open for multiple
successive undo log records.
get_purge_table(): A new accessor, used during the purge of
history for indexed virtual columns. This interface should ideally
not exist at all.
thd_mdl_context(): Accessor of THD::mdl_context.
Wrapped in a new thd_mdl_service.
dict_get_db_name_len(): Define inline.
dict_acquire_mdl_shared(): Acquire explicit shared MDL on a table name
if needed.
dict_table_open_on_id(): Return MDL_ticket, if requested.
dict_table_close(): Release MDL ticket, if requested.
dict_fts_index_syncing(), dict_index_t::index_fts_syncing: Remove.
row_drop_table_for_mysql() no longer needs to check these, because
MDL guarantees that a fulltext index sync will not be in progress
while MDL_EXCLUSIVE is protecting a DDL operation.
dict_table_t::parse_name(): Parse the table name for acquiring MDL.
purge_node_t::undo_recs: Change the type to std::list<trx_purge_rec_t*>
(different container, and storing also roll_ptr).
purge_node_t: Add mdl_ticket, last_table_id, purge_thd, mdl_hold_recs
for acquiring MDL and for keeping the table open across multiple
undo log records.
purge_vcol_info_t, row_purge_store_vsec_cur(), row_purge_restore_vsec_cur():
Remove. We will acquire the MDL earlier.
purge_sys_t::heap: Added, for reading undo log records.
fts_sync_during_ddl(): Invoked during ALGORITHM=INPLACE operations
to ensure that fts_sync_table() will not conflict with MDL_EXCLUSIVE.
Uses fts_t::sync_message for bookkeeping.
2019-12-10 15:42:50 +02:00
|
|
|
if (n_pages_handled >= batch_size) {
|
|
|
|
break;
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-02-22 09:30:41 +02:00
|
|
|
ut_ad(purge_sys.head <= purge_sys.tail);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
return(n_pages_handled);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*******************************************************************//**
|
|
|
|
Calculate the DML delay required.
|
|
|
|
@return delay in microseconds or ULINT_MAX */
|
|
|
|
static
|
|
|
|
ulint
|
|
|
|
trx_purge_dml_delay(void)
|
|
|
|
/*=====================*/
|
|
|
|
{
|
|
|
|
/* Determine how much data manipulation language (DML) statements
|
|
|
|
need to be delayed in order to reduce the lagging of the purge
|
|
|
|
thread. */
|
|
|
|
ulint delay = 0; /* in microseconds; default: no delay */
|
|
|
|
|
2020-05-16 01:13:02 +04:00
|
|
|
/* If purge lag is set then calculate the new DML delay. */
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
if (srv_max_purge_lag > 0) {
|
2020-03-10 20:05:17 +02:00
|
|
|
double ratio = static_cast<double>(trx_sys.rseg_history_len) /
|
|
|
|
static_cast<double>(srv_max_purge_lag);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
if (ratio > 1.0) {
|
|
|
|
/* If the history list length exceeds the
|
|
|
|
srv_max_purge_lag, the data manipulation
|
|
|
|
statements are delayed by at least 5000
|
|
|
|
microseconds. */
|
|
|
|
delay = (ulint) ((ratio - .5) * 10000);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (delay > srv_max_purge_lag_delay) {
|
|
|
|
delay = srv_max_purge_lag_delay;
|
|
|
|
}
|
|
|
|
|
|
|
|
MONITOR_SET(MONITOR_DML_PURGE_DELAY, delay);
|
|
|
|
}
|
|
|
|
|
|
|
|
return(delay);
|
|
|
|
}
|
|
|
|
|
2019-10-29 22:37:12 +01:00
|
|
|
extern tpool::waitable_task purge_worker_task;
|
|
|
|
|
2018-02-22 09:18:53 +02:00
|
|
|
/** Wait for pending purge jobs to complete. */
|
2019-11-13 18:14:44 +01:00
|
|
|
static void trx_purge_wait_for_workers_to_complete()
|
2014-02-26 19:11:54 +01:00
|
|
|
{
|
2019-12-16 18:22:59 +01:00
|
|
|
bool notify_wait = purge_worker_task.is_running();
|
|
|
|
|
|
|
|
if (notify_wait)
|
|
|
|
tpool::tpool_wait_begin();
|
|
|
|
|
|
|
|
purge_worker_task.wait();
|
|
|
|
|
|
|
|
if(notify_wait)
|
2019-11-29 22:26:04 +00:00
|
|
|
tpool::tpool_wait_end();
|
2019-12-16 18:22:59 +01:00
|
|
|
|
2019-11-13 18:14:44 +01:00
|
|
|
/* There should be no outstanding tasks as long
|
|
|
|
as the worker threads are active. */
|
|
|
|
ut_ad(srv_get_task_queue_length() == 0);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
2019-11-13 18:14:44 +01:00
|
|
|
/**
|
|
|
|
Run a purge batch.
|
|
|
|
@param n_tasks number of purge tasks to submit to the queue
|
|
|
|
@param truncate whether to truncate the history at the end of the batch
|
2016-08-12 11:17:45 +03:00
|
|
|
@return number of undo log pages handled in the batch */
|
2019-11-13 18:14:44 +01:00
|
|
|
ulint trx_purge(ulint n_tasks, bool truncate)
|
2014-02-26 19:11:54 +01:00
|
|
|
{
|
|
|
|
que_thr_t* thr = NULL;
|
|
|
|
ulint n_pages_handled;
|
|
|
|
|
2019-11-13 18:14:44 +01:00
|
|
|
ut_ad(n_tasks > 0);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
srv_dml_needed_delay = trx_purge_dml_delay();
|
|
|
|
|
2020-05-16 01:13:02 +04:00
|
|
|
purge_sys.clone_oldest_view();
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
#ifdef UNIV_DEBUG
|
|
|
|
if (srv_purge_view_update_only_debug) {
|
|
|
|
return(0);
|
|
|
|
}
|
2016-08-12 11:17:45 +03:00
|
|
|
#endif /* UNIV_DEBUG */
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/* Fetch the UNDO recs that need to be purged. */
|
2019-11-13 18:14:44 +01:00
|
|
|
n_pages_handled = trx_purge_attach_undo_recs(n_tasks);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2018-04-08 18:11:49 +03:00
|
|
|
/* Submit tasks to workers queue if using multi-threaded purge. */
|
2019-11-13 18:14:44 +01:00
|
|
|
for (ulint i = n_tasks; --i; ) {
|
2018-02-22 09:30:41 +02:00
|
|
|
thr = que_fork_scheduler_round_robin(purge_sys.query, thr);
|
2018-04-08 18:11:49 +03:00
|
|
|
ut_a(thr);
|
|
|
|
srv_que_task_enqueue_low(thr);
|
2019-10-29 22:37:12 +01:00
|
|
|
srv_thread_pool->submit_task(&purge_worker_task);
|
2018-04-08 18:11:49 +03:00
|
|
|
}
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2018-04-08 18:11:49 +03:00
|
|
|
thr = que_fork_scheduler_round_robin(purge_sys.query, thr);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2018-04-08 18:11:49 +03:00
|
|
|
que_run_threads(thr);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2018-12-27 22:56:10 +04:00
|
|
|
trx_purge_wait_for_workers_to_complete();
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
if (truncate) {
|
2018-02-21 12:54:33 +02:00
|
|
|
trx_purge_truncate_history();
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
MONITOR_INC_VALUE(MONITOR_PURGE_INVOKED, 1);
|
|
|
|
MONITOR_INC_VALUE(MONITOR_PURGE_N_PAGE_HANDLED, n_pages_handled);
|
|
|
|
|
|
|
|
return(n_pages_handled);
|
|
|
|
}
|