Merge 10.6 into 10.8

This commit is contained in:
Marko Mäkelä 2023-03-16 18:11:37 +02:00
commit acf46b7b36
51 changed files with 1503 additions and 1361 deletions

View file

@ -14,6 +14,7 @@ usr/include/mariadb/mariadb_version.h
usr/include/mariadb/my_config.h
usr/include/mariadb/my_global.h
usr/include/mariadb/my_sys.h
usr/include/mariadb/my_alloca.h
usr/include/mariadb/mysql.h
usr/include/mariadb/mysql/
usr/include/mariadb/mysql/client_plugin.h

View file

@ -2280,8 +2280,14 @@ static bool innodb_init()
/* Check if the data files exist or not. */
dberr_t err= srv_sys_space.check_file_spec(&create_new_db, 5U << 20);
if (create_new_db)
{
msg("mariadb-backup: InnoDB files do not exist");
return true;
}
if (err == DB_SUCCESS)
err= srv_start(create_new_db);
err= srv_start(false);
if (err != DB_SUCCESS)
{
@ -2292,6 +2298,7 @@ static bool innodb_init()
ut_ad(srv_force_recovery <= SRV_FORCE_IGNORE_CORRUPT);
ut_ad(recv_no_log_write);
buf_flush_sync();
recv_sys.debug_free();
DBUG_ASSERT(!buf_pool.any_io_pending());
log_sys.close_file();
@ -3473,7 +3480,9 @@ static void xb_load_single_table_tablespace(const char *dirname,
if (err == DB_SUCCESS && file->space_id() != SRV_TMP_SPACE_ID) {
space = fil_space_t::create(
file->space_id(), file->flags(),
FIL_TYPE_TABLESPACE, NULL/* TODO: crypt_data */);
FIL_TYPE_TABLESPACE, nullptr/* TODO: crypt_data */,
FIL_ENCRYPTION_DEFAULT,
file->handle() != OS_FILE_CLOSED);
ut_a(space != NULL);
fil_node_t* node= space->add(
@ -5279,7 +5288,8 @@ exit:
ut_ad(fil_space_t::physical_size(flags) == info.page_size);
if (fil_space_t::create(info.space_id, flags,
FIL_TYPE_TABLESPACE, 0)) {
FIL_TYPE_TABLESPACE, 0, FIL_ENCRYPTION_DEFAULT,
true)) {
*success = xb_space_create_file(real_name, info.space_id,
flags, &file);
} else {

View file

@ -37,6 +37,7 @@ SET(HEADERS
ma_dyncol.h
my_list.h
my_alloc.h
my_alloca.h
typelib.h
my_dbug.h
m_string.h
@ -111,7 +112,9 @@ ${footer}
ENDMACRO()
INSTALL_COMPAT_HEADER(my_global.h "")
INSTALL_COMPAT_HEADER(my_alloca.h "")
INSTALL_COMPAT_HEADER(my_config.h "")
INSTALL_COMPAT_HEADER(my_alloca.h "")
INSTALL_COMPAT_HEADER(my_sys.h "")
INSTALL_COMPAT_HEADER(mysql_version.h "
#include <mariadb_version.h>

45
include/my_alloca.h Normal file
View file

@ -0,0 +1,45 @@
/* Copyright (c) 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
#ifndef MY_ALLOCA_INCLUDED
#define MY_ALLOCA_INCLUDED
#ifdef _WIN32
#include <malloc.h> /*for alloca*/
/*
MSVC may define "alloca" when compiling in /Ze mode
(with extensions from Microsoft), but otherwise only
the _alloca function is defined:
*/
#ifndef alloca
#define alloca _alloca
#endif
#else
#ifdef HAVE_ALLOCA_H
#include <alloca.h>
#endif
#endif
#if defined(HAVE_ALLOCA)
/*
If the GCC/LLVM compiler from the MinGW is used,
alloca may not be defined when using the MSVC CRT:
*/
#if defined(__GNUC__) && !defined(HAVE_ALLOCA_H) && !defined(alloca)
#define alloca __builtin_alloca
#endif /* GNUC */
#endif
#endif /* MY_ALLOCA_INCLUDED */

View file

@ -330,13 +330,6 @@ C_MODE_END
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#if defined(__cplusplus) && defined(NO_CPLUSPLUS_ALLOCA)
#undef HAVE_ALLOCA
#undef HAVE_ALLOCA_H
#endif
#ifdef HAVE_ALLOCA_H
#include <alloca.h>
#endif
#include <errno.h> /* Recommended by debian */
/* We need the following to go around a problem with openssl on solaris */
@ -493,6 +486,7 @@ typedef unsigned short ushort;
#endif
#include <my_compiler.h>
#include <my_alloca.h>
/*
Wen using the embedded library, users might run into link problems,

View file

@ -28,9 +28,7 @@ C_MODE_START
#include <m_ctype.h> /* for CHARSET_INFO */
#include <stdarg.h>
#include <typelib.h>
#ifdef _WIN32
#include <malloc.h> /*for alloca*/
#endif
#include <my_alloca.h>
#include <mysql/plugin.h>
#include <mysql/service_my_print_error.h>
@ -195,16 +193,6 @@ my_bool my_test_if_thinly_provisioned(File handle);
extern my_bool my_may_have_atomic_write;
#if defined(HAVE_ALLOCA) && !defined(HAVE_valgrind)
#if defined(_AIX) && !defined(__GNUC__) && !defined(_AIX43)
#pragma alloca
#endif /* _AIX */
#if defined(__MWERKS__)
#undef alloca
#define alloca _alloca
#endif /* __MWERKS__ */
#if defined(__GNUC__) && !defined(HAVE_ALLOCA_H) && ! defined(alloca)
#define alloca __builtin_alloca
#endif /* GNUC */
#define my_alloca(SZ) alloca((size_t) (SZ))
#define my_afree(PTR) ((void)0)
#define MAX_ALLOCA_SZ 4096

View file

@ -24,22 +24,19 @@
*provider* (encryption plugin).
*/
#ifdef __cplusplus
extern "C" {
#endif
#ifndef MYSQL_ABI_CHECK
#include <my_alloca.h>
#ifdef _WIN32
#include <malloc.h>
#ifndef __cplusplus
#define inline __inline
#endif
#else
#include <stdlib.h>
#ifdef HAVE_ALLOCA_H
#include <alloca.h>
#endif
#endif
#ifdef __cplusplus
extern "C" {
#endif
/* returned from encryption_key_get_latest_version() */

View file

@ -31,8 +31,10 @@ insert into t8 values (1, 'publicmessage');
insert into t9 values (1, 'pugliccompressedaaaaaaaaabbbbbbbbbbbbbbccccccccccccccc');
--echo # should list tables t1-t6
--sorted_result
SELECT NAME,ENCRYPTION_SCHEME,CURRENT_KEY_ID FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION <> 0 AND NAME LIKE 'enctests%';
--echo # should list tables t7-t9
--sorted_result
SELECT NAME,ENCRYPTION_SCHEME,CURRENT_KEY_ID FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION WHERE MIN_KEY_VERSION = 0 and NAME LIKE 'enctests%';
--let $MYSQLD_DATADIR=`select @@datadir`

View file

@ -9,6 +9,7 @@ INSERT INTO t2 VALUES ('foobar');
#
# MDEV-9640: Add used key_id to INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION
#
--sorted_result
SELECT NAME, ENCRYPTION_SCHEME, MIN_KEY_VERSION, CURRENT_KEY_VERSION,
CURRENT_KEY_ID
FROM INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION

View file

@ -192,7 +192,7 @@ compress_pages_page_decompressed compression 0 NULL NULL NULL 0 NULL NULL NULL N
compress_pages_page_compression_error compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of page compression errors
compress_pages_encrypted compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of pages encrypted
compress_pages_decrypted compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of pages decrypted
index_page_splits index 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of index page splits
index_page_splits index 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Number of index page splits
index_page_merge_attempts index 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of index page merge attempts
index_page_merge_successful index 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of successful index page merges
index_page_reorg_attempts index 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of index page reorganization attempts

View file

@ -23,6 +23,7 @@ INNODB_BUFFER_POOL_PAGES_OLD
INNODB_BUFFER_POOL_PAGES_TOTAL
INNODB_BUFFER_POOL_PAGES_LRU_FLUSHED
INNODB_BUFFER_POOL_PAGES_LRU_FREED
INNODB_BUFFER_POOL_PAGES_SPLIT
INNODB_BUFFER_POOL_READ_AHEAD_RND
INNODB_BUFFER_POOL_READ_AHEAD
INNODB_BUFFER_POOL_READ_AHEAD_EVICTED

View file

@ -181,16 +181,19 @@ CREATE TABLE `t12` (
(
pt1 PAGE_COMPRESSED=0
);
--sorted_result
SELECT name, flag FROM information_schema.innodb_sys_tablespaces WHERE name like 'test/t12%';
ALTER TABLE `t12` ADD PARTITION (
PARTITION pt2 PAGE_COMPRESSED=1
);
--sorted_result
SELECT name, flag FROM information_schema.innodb_sys_tablespaces WHERE name like 'test/t12%';
ALTER TABLE `t12` ADD PARTITION (
PARTITION pt3 PAGE_COMPRESSED=1 PAGE_COMPRESSION_LEVEL=3
);
--sorted_result
SELECT name, flag FROM information_schema.innodb_sys_tablespaces WHERE name like 'test/t12%';
DROP TABLE `t12`;
@ -203,6 +206,7 @@ CREATE TABLE `t13` (
PARTITION pt3 VALUES LESS THAN MAXVALUE
);
SHOW CREATE TABLE `t13`;
--sorted_result
SELECT name, flag FROM information_schema.innodb_sys_tablespaces WHERE name like 'test/t13%';
ALTER TABLE `t13` PARTITION BY RANGE(id) (
@ -211,6 +215,7 @@ ALTER TABLE `t13` PARTITION BY RANGE(id) (
PARTITION pt3 VALUES LESS THAN MAXVALUE PAGE_COMPRESSED=0
);
SHOW CREATE TABLE `t13`;
--sorted_result
SELECT name, flag FROM information_schema.innodb_sys_tablespaces WHERE name like 'test/t13%';
DROP TABLE `t13`;

View file

@ -13,7 +13,7 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
#include <my_global.h>
#include <my_alloca.h>
#include <mysql/plugin_password_validation.h>
#include <crack.h>
#include <string.h>

View file

@ -6,7 +6,7 @@
* See COPYRIGHT.txt for details.
*/
#include <my_global.h>
#include <my_alloca.h>
#include <netinet/in.h>
#include <errno.h>
#include <poll.h>
@ -17,9 +17,6 @@
#if __linux__
#include <sys/epoll.h>
#endif
#ifdef HAVE_ALLOCA_H
#include <alloca.h>
#endif
#include "hstcpsvr_worker.hpp"
#include "string_buffer.hpp"

View file

@ -11,6 +11,7 @@
#include <stdlib.h>
#include <string.h>
#include <my_alloca.h>
#if 0
extern "C" {

View file

@ -5289,7 +5289,7 @@ private:
bool use_temporary_table(TABLE *table, TABLE **out_table);
void close_temporary_table(TABLE *table);
bool log_events_and_free_tmp_shares();
void free_tmp_table_share(TMP_TABLE_SHARE *share, bool delete_table);
bool free_tmp_table_share(TMP_TABLE_SHARE *share, bool delete_table);
void free_temporary_table(TABLE *table);
bool lock_temporary_tables();
void unlock_temporary_tables();

View file

@ -671,7 +671,7 @@ bool THD::drop_temporary_table(TABLE *table, bool *is_trans, bool delete_table)
temporary_tables->remove(share);
/* Free the TABLE_SHARE and/or delete the files. */
free_tmp_table_share(share, delete_table);
result= free_tmp_table_share(share, delete_table);
end:
if (locked)
@ -1464,20 +1464,21 @@ bool THD::log_events_and_free_tmp_shares()
@param share [IN] TABLE_SHARE to free
@param delete_table [IN] Whether to delete the table files?
@return void
@return false Success
true Error
*/
void THD::free_tmp_table_share(TMP_TABLE_SHARE *share, bool delete_table)
bool THD::free_tmp_table_share(TMP_TABLE_SHARE *share, bool delete_table)
{
bool error= false;
DBUG_ENTER("THD::free_tmp_table_share");
if (delete_table)
{
rm_temporary_table(share->db_type(), share->path.str);
error= rm_temporary_table(share->db_type(), share->path.str);
}
free_table_share(share);
my_free(share);
DBUG_VOID_RETURN;
DBUG_RETURN(error);
}

View file

@ -254,7 +254,7 @@ Gets the root node of a tree and x- or s-latches it.
buf_block_t*
btr_root_block_get(
/*===============*/
const dict_index_t* index, /*!< in: index tree */
dict_index_t* index, /*!< in: index tree */
rw_lock_type_t mode, /*!< in: either RW_S_LATCH
or RW_X_LATCH */
mtr_t* mtr, /*!< in: mtr */
@ -266,11 +266,31 @@ btr_root_block_get(
return nullptr;
}
buf_block_t *block = btr_block_get(*index, index->page, mode, false, mtr,
err);
if (block)
buf_block_t *block;
#ifndef BTR_CUR_ADAPT
static constexpr buf_block_t *guess= nullptr;
#else
buf_block_t *&guess= btr_search_get_info(index)->root_guess;
guess=
#endif
block=
buf_page_get_gen(page_id_t{index->table->space->id, index->page},
index->table->space->zip_size(), mode, guess, BUF_GET,
mtr, err, false);
ut_ad(!block == (*err != DB_SUCCESS));
if (UNIV_LIKELY(block != nullptr))
{
if (index->is_ibuf());
if (!!page_is_comp(block->page.frame) != index->table->not_redundant() ||
btr_page_get_index_id(block->page.frame) != index->id ||
!fil_page_index_page_check(block->page.frame) ||
index->is_spatial() !=
(fil_page_get_type(block->page.frame) == FIL_PAGE_RTREE))
{
*err= DB_PAGE_CORRUPTED;
block= nullptr;
}
else if (index->is_ibuf());
else if (!btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF,
*block, *index->table->space) ||
!btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP,
@ -280,6 +300,9 @@ btr_root_block_get(
block= nullptr;
}
}
else if (*err == DB_DECRYPTION_FAILED)
btr_decryption_failed(*index);
return block;
}
@ -290,7 +313,7 @@ static
page_t*
btr_root_get(
/*=========*/
const dict_index_t* index, /*!< in: index tree */
dict_index_t* index, /*!< in: index tree */
mtr_t* mtr, /*!< in: mtr */
dberr_t* err) /*!< out: error code */
{
@ -502,9 +525,7 @@ btr_block_reget(mtr_t *mtr, const dict_index_t &index,
return block;
}
#if 0 /* MDEV-29385 FIXME: Acquire the page latch upfront. */
ut_ad(mtr->memo_contains_flagged(&index.lock, MTR_MEMO_X_LOCK));
#endif
return btr_block_get(index, id.page_no(), rw_latch, true, mtr, err);
}
@ -773,9 +794,7 @@ btr_page_get_father_node_ptr_for_validate(
const uint32_t page_no = btr_cur_get_block(cursor)->page.id().page_no();
dict_index_t* index = btr_cur_get_index(cursor);
ut_ad(!dict_index_is_spatial(index));
ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK
| MTR_MEMO_SX_LOCK));
ut_ad(mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK));
ut_ad(dict_index_get_page(index) != page_no);
const auto level = btr_page_get_level(btr_cur_get_page(cursor));
@ -793,10 +812,6 @@ btr_page_get_father_node_ptr_for_validate(
}
const rec_t* node_ptr = btr_cur_get_rec(cursor);
#if 0 /* MDEV-29835 FIXME */
ut_ad(!btr_cur_get_block(cursor)->page.lock.not_recursive()
|| mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK));
#endif
offsets = rec_get_offsets(node_ptr, index, offsets, 0,
ULINT_UNDEFINED, &heap);
@ -2457,11 +2472,10 @@ btr_insert_on_non_leaf_level(
}
ut_ad(cursor.flag == BTR_CUR_BINARY);
#if 0 /* MDEV-29835 FIXME */
ut_ad(!btr_cur_get_block(&cursor)->page.lock.not_recursive()
ut_ad(btr_cur_get_block(&cursor)
!= mtr->at_savepoint(mtr->get_savepoint() - 1)
|| index->is_spatial()
|| mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK));
#endif
if (UNIV_LIKELY(err == DB_SUCCESS)) {
err = btr_cur_optimistic_insert(flags,
@ -2569,10 +2583,8 @@ btr_attach_half_pages(
prev_block = mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX);
#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */
if (!prev_block) {
# if 0 /* MDEV-29835 FIXME */
ut_ad(mtr->memo_contains(index->lock,
MTR_MEMO_X_LOCK));
# endif
prev_block = btr_block_get(*index, prev_page_no,
RW_X_LATCH, !level, mtr);
}
@ -2583,10 +2595,8 @@ btr_attach_half_pages(
next_block = mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX);
#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */
if (!next_block) {
# if 0 /* MDEV-29835 FIXME */
ut_ad(mtr->memo_contains(index->lock,
MTR_MEMO_X_LOCK));
# endif
next_block = btr_block_get(*index, next_page_no,
RW_X_LATCH, !level, mtr);
}
@ -2966,6 +2976,8 @@ btr_page_split_and_insert(
ut_ad(*err == DB_SUCCESS);
ut_ad(dtuple_check_typed(tuple));
buf_pool.pages_split++;
if (cursor->index()->is_spatial()) {
/* Split rtree page and update parent */
return rtr_page_split_and_insert(flags, cursor, offsets, heap,
@ -3363,8 +3375,6 @@ func_exit:
left_block, right_block, mtr);
}
MONITOR_INC(MONITOR_INDEX_SPLIT);
ut_ad(page_validate(buf_block_get_frame(left_block),
page_cursor->index));
ut_ad(page_validate(buf_block_get_frame(right_block),
@ -3399,9 +3409,7 @@ dberr_t btr_level_list_remove(const buf_block_t& block,
#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */
if (!prev)
{
# if 0 /* MDEV-29835 FIXME */
ut_ad(mtr->memo_contains(index.lock, MTR_MEMO_X_LOCK));
# endif
prev= btr_block_get(index, id.page_no(), RW_X_LATCH,
page_is_leaf(block.page.frame), mtr, &err);
if (UNIV_UNLIKELY(!prev))
@ -3417,9 +3425,7 @@ dberr_t btr_level_list_remove(const buf_block_t& block,
#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */
if (!next)
{
# if 0 /* MDEV-29835 FIXME */
ut_ad(mtr->memo_contains(index.lock, MTR_MEMO_X_LOCK));
# endif
next= btr_block_get(index, id.page_no(), RW_X_LATCH,
page_is_leaf(block.page.frame), mtr, &err);
if (UNIV_UNLIKELY(!next))
@ -4293,7 +4299,7 @@ btr_discard_page(
if (UNIV_UNLIKELY(!merge_block)) {
return err;
}
#if 0 /* MDEV-29385 FIXME: Acquire the page latch upfront. */
#if 1 /* MDEV-29835 FIXME: Acquire the page latch upfront. */
ut_ad(!memcmp_aligned<4>(merge_block->page.frame
+ FIL_PAGE_NEXT,
block->page.frame + FIL_PAGE_OFFSET,
@ -4319,7 +4325,7 @@ btr_discard_page(
if (UNIV_UNLIKELY(!merge_block)) {
return err;
}
#if 0 /* MDEV-29385 FIXME: Acquire the page latch upfront. */
#if 1 /* MDEV-29835 FIXME: Acquire the page latch upfront. */
ut_ad(!memcmp_aligned<4>(merge_block->page.frame
+ FIL_PAGE_PREV,
block->page.frame + FIL_PAGE_OFFSET,
@ -4901,8 +4907,7 @@ btr_validate_level(
/*===============*/
dict_index_t* index, /*!< in: index tree */
const trx_t* trx, /*!< in: transaction or NULL */
ulint level, /*!< in: level number */
bool lockout)/*!< in: true if X-latch index is intended */
ulint level) /*!< in: level number */
{
buf_block_t* block;
page_t* page;
@ -4921,18 +4926,10 @@ btr_validate_level(
#ifdef UNIV_ZIP_DEBUG
page_zip_des_t* page_zip;
#endif /* UNIV_ZIP_DEBUG */
ulint savepoint = 0;
uint32_t parent_page_no = FIL_NULL;
uint32_t parent_right_page_no = FIL_NULL;
bool rightmost_child = false;
mtr.start();
if (lockout) {
mtr_x_lock_index(index, &mtr);
} else {
mtr_sx_lock_index(index, &mtr);
}
mtr_x_lock_index(index, &mtr);
dberr_t err;
block = btr_root_block_get(index, RW_SX_LATCH, &mtr, &err);
@ -5028,11 +5025,7 @@ func_exit:
mem_heap_empty(heap);
offsets = offsets2 = NULL;
if (lockout) {
mtr_x_lock_index(index, &mtr);
} else {
mtr_sx_lock_index(index, &mtr);
}
mtr_x_lock_index(index, &mtr);
page = block->page.frame;
@ -5076,7 +5069,6 @@ func_exit:
if (right_page_no != FIL_NULL) {
const rec_t* right_rec;
savepoint = mtr.get_savepoint();
right_block = btr_block_get(*index, right_page_no, RW_SX_LATCH,
!level, &mtr, &err);
@ -5179,11 +5171,6 @@ broken_links:
father_page = btr_cur_get_page(&node_cur);
node_ptr = btr_cur_get_rec(&node_cur);
parent_page_no = page_get_page_no(father_page);
parent_right_page_no = btr_page_get_next(father_page);
rightmost_child = page_rec_is_supremum(
page_rec_get_next(node_ptr));
rec = page_rec_get_prev(page_get_supremum_rec(page));
if (rec) {
btr_cur_position(index, rec, block, &node_cur);
@ -5265,37 +5252,6 @@ broken_links:
}
} else if (const rec_t* right_node_ptr
= page_rec_get_next(node_ptr)) {
if (!lockout && rightmost_child) {
/* To obey latch order of tree blocks,
we should release the right_block once to
obtain lock of the uncle block. */
ut_ad(right_block
== mtr.at_savepoint(savepoint));
mtr.rollback_to_savepoint(savepoint,
savepoint + 1);
if (parent_right_page_no != FIL_NULL) {
btr_block_get(*index,
parent_right_page_no,
RW_SX_LATCH, false,
&mtr);
}
right_block = btr_block_get(*index,
right_page_no,
RW_SX_LATCH,
!level, &mtr,
&err);
if (!right_block) {
btr_validate_report1(index, level,
block);
fputs("InnoDB: broken FIL_PAGE_NEXT"
" link\n", stderr);
goto invalid_page;
}
}
btr_cur_position(
index,
page_get_infimum_rec(right_block->page.frame),
@ -5367,20 +5323,6 @@ node_ptr_fails:
mtr.start();
if (!lockout) {
if (rightmost_child) {
if (parent_right_page_no != FIL_NULL) {
btr_block_get(*index,
parent_right_page_no,
RW_SX_LATCH, false,
&mtr);
}
} else if (parent_page_no != FIL_NULL) {
btr_block_get(*index, parent_page_no,
RW_SX_LATCH, false, &mtr);
}
}
block = btr_block_get(*index, right_page_no, RW_SX_LATCH,
!level, &mtr, &err);
goto loop;
@ -5398,21 +5340,16 @@ btr_validate_index(
dict_index_t* index, /*!< in: index */
const trx_t* trx) /*!< in: transaction or NULL */
{
const bool lockout= index->is_spatial();
mtr_t mtr;
mtr.start();
if (lockout)
mtr_x_lock_index(index, &mtr);
else
mtr_sx_lock_index(index, &mtr);
mtr_x_lock_index(index, &mtr);
dberr_t err;
if (page_t *root= btr_root_get(index, &mtr, &err))
for (auto level= btr_page_get_level(root);; level--)
{
if (dberr_t err_level= btr_validate_level(index, trx, level, lockout))
if (dberr_t err_level= btr_validate_level(index, trx, level))
err= err_level;
if (!level)
break;

View file

@ -748,29 +748,34 @@ btr_cur_will_modify_tree(
/** Detects whether the modifying record might need a opposite modification
to the intention.
@param[in] page page
@param[in] lock_intention lock intention for the tree operation
@param[in] rec record (current node_ptr)
@param page page
@param lock_intention lock intention for the tree operation
@param node_ptr_max_size the maximum size of a node pointer
@param compress_limit BTR_CUR_PAGE_COMPRESS_LIMIT(index)
@param rec record (current node_ptr)
@return true if tree modification is needed */
static
bool
btr_cur_need_opposite_intention(
const page_t* page,
btr_intention_t lock_intention,
const rec_t* rec)
static bool btr_cur_need_opposite_intention(const page_t *page,
btr_intention_t lock_intention,
ulint node_ptr_max_size,
ulint compress_limit,
const rec_t *rec)
{
switch (lock_intention) {
case BTR_INTENTION_DELETE:
return (page_has_prev(page) && page_rec_is_first(rec, page)) ||
(page_has_next(page) && page_rec_is_last(rec, page));
case BTR_INTENTION_INSERT:
return page_has_next(page) && page_rec_is_last(rec, page);
case BTR_INTENTION_BOTH:
return(false);
}
MY_ASSERT_UNREACHABLE();
return(false);
if (lock_intention != BTR_INTENTION_INSERT)
{
/* We compensate also for btr_cur_compress_recommendation() */
if (!page_has_siblings(page) ||
page_rec_is_first(rec, page) || page_rec_is_last(rec, page) ||
page_get_data_size(page) < node_ptr_max_size + compress_limit)
return true;
if (lock_intention == BTR_INTENTION_DELETE)
return false;
}
else if (page_has_next(page) && page_rec_is_last(rec, page))
return true;
LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page), return true);
const ulint max_size= page_get_max_insert_size_after_reorganize(page, 2);
return max_size < BTR_CUR_PAGE_REORGANIZE_LIMIT + node_ptr_max_size ||
max_size < node_ptr_max_size * 2;
}
/**
@ -1039,7 +1044,7 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
const ulint savepoint= mtr->get_savepoint();
ulint node_ptr_max_size= 0;
ulint node_ptr_max_size= 0, compress_limit= 0;
rw_lock_type_t rw_latch= RW_S_LATCH;
switch (latch_mode) {
@ -1051,13 +1056,19 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
ut_ad(mtr->memo_contains_flagged(&index()->lock, MTR_MEMO_X_LOCK));
break;
}
if (lock_intention == BTR_INTENTION_DELETE && buf_pool.n_pend_reads &&
trx_sys.history_size_approx() > BTR_CUR_FINE_HISTORY_LENGTH)
/* Most delete-intended operations are due to the purge of history.
Prioritize them when the history list is growing huge. */
mtr_x_lock_index(index(), mtr);
else
mtr_sx_lock_index(index(), mtr);
if (lock_intention == BTR_INTENTION_DELETE)
{
compress_limit= BTR_CUR_PAGE_COMPRESS_LIMIT(index());
if (buf_pool.n_pend_reads &&
trx_sys.history_size_approx() > BTR_CUR_FINE_HISTORY_LENGTH)
{
/* Most delete-intended operations are due to the purge of history.
Prioritize them when the history list is growing huge. */
mtr_x_lock_index(index(), mtr);
break;
}
}
mtr_sx_lock_index(index(), mtr);
break;
#ifdef UNIV_DEBUG
case BTR_CONT_MODIFY_TREE:
@ -1332,6 +1343,10 @@ release_tree:
!btr_block_get(*index(), btr_page_get_next(block->page.frame),
RW_X_LATCH, false, mtr, &err))
goto func_exit;
if (btr_cur_need_opposite_intention(block->page.frame, lock_intention,
node_ptr_max_size, compress_limit,
page_cur.rec))
goto need_opposite_intention;
}
reached_latched_leaf:
@ -1385,6 +1400,7 @@ release_tree:
break;
case BTR_MODIFY_TREE:
if (btr_cur_need_opposite_intention(block->page.frame, lock_intention,
node_ptr_max_size, compress_limit,
page_cur.rec))
/* If the rec is the first or last in the page for pessimistic
delete intention, it might cause node_ptr insert for the upper
@ -1537,6 +1553,17 @@ release_tree:
goto search_loop;
}
ATTRIBUTE_COLD void mtr_t::index_lock_upgrade()
{
auto &slot= m_memo[get_savepoint() - 1];
if (slot.type == MTR_MEMO_X_LOCK)
return;
ut_ad(slot.type == MTR_MEMO_SX_LOCK);
index_lock *lock= static_cast<index_lock*>(slot.object);
lock->u_x_upgrade(SRW_LOCK_CALL);
slot.type= MTR_MEMO_X_LOCK;
}
ATTRIBUTE_COLD
dberr_t btr_cur_t::pessimistic_search_leaf(const dtuple_t *tuple,
page_cur_mode_t mode, mtr_t *mtr)
@ -1555,8 +1582,7 @@ dberr_t btr_cur_t::pessimistic_search_leaf(const dtuple_t *tuple,
ut_ad(block->page.id().page_no() == index()->page);
block->page.fix();
mtr->rollback_to_savepoint(1);
ut_ad(mtr->memo_contains_flagged(&index()->lock,
MTR_MEMO_SX_LOCK | MTR_MEMO_X_LOCK));
mtr->index_lock_upgrade();
const page_cur_mode_t page_mode{btr_cur_nonleaf_mode(mode)};
@ -1786,7 +1812,6 @@ search_loop:
dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
btr_latch_mode latch_mode, mtr_t *mtr)
{
btr_intention_t lock_intention;
ulint n_blocks= 0;
mem_heap_t *heap= nullptr;
rec_offs offsets_[REC_OFFS_NORMAL_SIZE];
@ -1798,7 +1823,7 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
const bool latch_by_caller= latch_mode & BTR_ALREADY_S_LATCHED;
latch_mode= btr_latch_mode(latch_mode & ~BTR_ALREADY_S_LATCHED);
lock_intention= btr_cur_get_and_clear_intention(&latch_mode);
btr_intention_t lock_intention= btr_cur_get_and_clear_intention(&latch_mode);
/* Store the position of the tree latch we push to mtr so that we
know how to release it when we have latched the leaf node */
@ -1806,7 +1831,7 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
auto savepoint= mtr->get_savepoint();
rw_lock_type_t upper_rw_latch= RW_X_LATCH;
ulint node_ptr_max_size= 0;
ulint node_ptr_max_size= 0, compress_limit= 0;
if (latch_mode == BTR_MODIFY_TREE)
{
@ -1815,12 +1840,18 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
and read IO bandwidth should be prioritized for them, when the
history list is growing huge. */
savepoint++;
if (lock_intention == BTR_INTENTION_DELETE
&& buf_pool.n_pend_reads
&& trx_sys.history_size_approx() > BTR_CUR_FINE_HISTORY_LENGTH)
mtr_x_lock_index(index, mtr);
else
mtr_sx_lock_index(index, mtr);
if (lock_intention == BTR_INTENTION_DELETE)
{
compress_limit= BTR_CUR_PAGE_COMPRESS_LIMIT(index);
if (buf_pool.n_pend_reads &&
trx_sys.history_size_approx() > BTR_CUR_FINE_HISTORY_LENGTH)
{
mtr_x_lock_index(index, mtr);
goto index_locked;
}
}
mtr_sx_lock_index(index, mtr);
}
else
{
@ -1841,9 +1872,11 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
}
}
index_locked:
ut_ad(savepoint == mtr->get_savepoint());
const rw_lock_type_t root_leaf_rw_latch= rw_lock_type_t(latch_mode & ~12);
const rw_lock_type_t root_leaf_rw_latch=
rw_lock_type_t(latch_mode & (RW_S_LATCH | RW_X_LATCH));
page_cur.index = index;
@ -1914,15 +1947,28 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
!btr_block_get(*index, btr_page_get_next(block->page.frame),
RW_X_LATCH, false, mtr, &err))
break;
if (!index->lock.have_x() &&
btr_cur_need_opposite_intention(block->page.frame,
lock_intention,
node_ptr_max_size,
compress_limit, page_cur.rec))
goto need_opposite_intention;
}
else
{
if (rw_latch == RW_NO_LATCH)
mtr->upgrade_buffer_fix(leaf_savepoint - 1,
rw_lock_type_t(latch_mode));
/* Release index->lock if needed, and the non-leaf pages. */
mtr->rollback_to_savepoint(savepoint - !latch_by_caller,
leaf_savepoint - 1);
rw_lock_type_t(latch_mode &
(RW_X_LATCH | RW_S_LATCH)));
if (latch_mode != BTR_CONT_MODIFY_TREE)
{
ut_ad(latch_mode == BTR_MODIFY_LEAF ||
latch_mode == BTR_SEARCH_LEAF);
/* Release index->lock if needed, and the non-leaf pages. */
mtr->rollback_to_savepoint(savepoint - !latch_by_caller,
leaf_savepoint - 1);
}
}
break;
}
@ -1944,22 +1990,25 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
: !page_cur_move_to_prev(&page_cur))
goto corrupted;
const rec_t *node_ptr= page_cur.rec;
offsets= rec_get_offsets(node_ptr, index, offsets, 0, ULINT_UNDEFINED,
offsets= rec_get_offsets(page_cur.rec, index, offsets, 0, ULINT_UNDEFINED,
&heap);
ut_ad(latch_mode != BTR_MODIFY_TREE || upper_rw_latch == RW_X_LATCH);
if (latch_mode != BTR_MODIFY_TREE);
else if (btr_cur_need_opposite_intention(block->page.frame,
lock_intention, node_ptr))
else if (btr_cur_need_opposite_intention(block->page.frame, lock_intention,
node_ptr_max_size, compress_limit,
page_cur.rec))
{
need_opposite_intention:
/* If the rec is the first or last in the page for pessimistic
delete intention, it might cause node_ptr insert for the upper
level. We should change the intention and retry. */
mtr->rollback_to_savepoint(savepoint);
lock_intention= BTR_INTENTION_BOTH;
mtr->index_lock_upgrade();
/* X-latch all pages from now on */
latch_mode= BTR_CONT_MODIFY_TREE;
page= index->page;
height= ULINT_UNDEFINED;
n_blocks= 0;
@ -1968,7 +2017,7 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
else
{
if (!btr_cur_will_modify_tree(index, block->page.frame,
lock_intention, node_ptr,
lock_intention, page_cur.rec,
node_ptr_max_size, zip_size, mtr))
{
ut_ad(n_blocks);
@ -1998,7 +2047,7 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
}
/* Go to the child node */
page= btr_node_ptr_get_child_page_no(node_ptr, offsets);
page= btr_node_ptr_get_child_page_no(page_cur.rec, offsets);
n_blocks++;
}
@ -2308,8 +2357,7 @@ convert_big_rec:
return(DB_TOO_BIG_RECORD);
}
LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page),
goto fail);
LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page), goto fail);
if (block->page.zip.data && leaf
&& (page_get_data_size(page) + rec_size
@ -2323,7 +2371,7 @@ fail:
/* prefetch siblings of the leaf for the pessimistic
operation, if the page is leaf. */
if (page_is_leaf(page)) {
if (leaf) {
btr_cur_prefetch_siblings(block, index);
}
fail_err:
@ -2392,7 +2440,7 @@ fail_err:
#ifdef UNIV_DEBUG
if (!(flags & BTR_CREATE_FLAG)
&& index->is_primary() && page_is_leaf(page)) {
&& leaf && index->is_primary()) {
const dfield_t* trx_id = dtuple_get_nth_field(
entry, dict_col_get_clust_pos(
dict_table_get_sys_col(index->table,

View file

@ -409,7 +409,6 @@ static bool buf_page_decrypt_after_read(buf_page_t *bpage,
if (id.space() == SRV_TMP_SPACE_ID
&& innodb_encrypt_temporary_tables) {
slot = buf_pool.io_buf_reserve();
ut_a(slot);
slot->allocate();
bool ok = buf_tmp_page_decrypt(slot->crypt_buf, dst_frame);
slot->release();
@ -432,7 +431,6 @@ decompress:
}
slot = buf_pool.io_buf_reserve();
ut_a(slot);
slot->allocate();
decompress_with_slot:
@ -456,7 +454,6 @@ decrypt_failed:
}
slot = buf_pool.io_buf_reserve();
ut_a(slot);
slot->allocate();
/* decrypt using crypt_buf to dst_frame */
@ -1287,6 +1284,41 @@ inline bool buf_pool_t::realloc(buf_block_t *block)
return(true); /* free_list was enough */
}
void buf_pool_t::io_buf_t::create(ulint n_slots)
{
this->n_slots= n_slots;
slots= static_cast<buf_tmp_buffer_t*>
(ut_malloc_nokey(n_slots * sizeof *slots));
memset((void*) slots, 0, n_slots * sizeof *slots);
}
void buf_pool_t::io_buf_t::close()
{
for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++)
{
aligned_free(s->crypt_buf);
aligned_free(s->comp_buf);
}
ut_free(slots);
slots= nullptr;
n_slots= 0;
}
buf_tmp_buffer_t *buf_pool_t::io_buf_t::reserve()
{
for (;;)
{
for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++)
if (s->acquire())
return s;
os_aio_wait_until_no_pending_writes();
for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++)
if (s->acquire())
return s;
os_aio_wait_until_no_pending_reads();
}
}
/** Sets the global variable that feeds MySQL's innodb_buffer_pool_resize_status
to the specified string. The format and the following parameters are the
same as the ones used for printf(3).
@ -1353,21 +1385,25 @@ inline bool buf_pool_t::withdraw_blocks()
block = next_block;
}
mysql_mutex_unlock(&mutex);
/* reserve free_list length */
if (UT_LIST_GET_LEN(withdraw) < withdraw_target) {
buf_flush_LRU(
std::max<ulint>(withdraw_target
- UT_LIST_GET_LEN(withdraw),
srv_LRU_scan_depth));
buf_flush_wait_batch_end_acquiring_mutex(true);
srv_LRU_scan_depth),
true);
mysql_mutex_unlock(&buf_pool.mutex);
buf_dblwr.flush_buffered_writes();
mysql_mutex_lock(&buf_pool.flush_list_mutex);
buf_flush_wait_LRU_batch_end();
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
mysql_mutex_lock(&buf_pool.mutex);
}
/* relocate blocks/buddies in withdrawn area */
ulint count2 = 0;
mysql_mutex_lock(&mutex);
buf_pool_mutex_exit_forbid();
for (buf_page_t* bpage = UT_LIST_GET_FIRST(LRU), *next_bpage;
bpage; bpage = next_bpage) {
@ -2220,13 +2256,15 @@ lookup:
return bpage;
must_read_page:
if (dberr_t err= buf_read_page(page_id, zip_size))
{
switch (dberr_t err= buf_read_page(page_id, zip_size)) {
case DB_SUCCESS:
case DB_SUCCESS_LOCKED_REC:
goto lookup;
default:
ib::error() << "Reading compressed page " << page_id
<< " failed with error: " << err;
return nullptr;
}
goto lookup;
}
/********************************************************************//**
@ -2369,11 +2407,6 @@ buf_page_get_low(
|| (rw_latch == RW_X_LATCH)
|| (rw_latch == RW_SX_LATCH)
|| (rw_latch == RW_NO_LATCH));
ut_ad(!allow_ibuf_merge
|| mode == BUF_GET
|| mode == BUF_GET_POSSIBLY_FREED
|| mode == BUF_GET_IF_IN_POOL
|| mode == BUF_GET_IF_IN_POOL_OR_WATCH);
if (err) {
*err = DB_SUCCESS;
@ -2381,15 +2414,15 @@ buf_page_get_low(
#ifdef UNIV_DEBUG
switch (mode) {
case BUF_PEEK_IF_IN_POOL:
default:
ut_ad(!allow_ibuf_merge);
ut_ad(mode == BUF_PEEK_IF_IN_POOL);
break;
case BUF_GET_POSSIBLY_FREED:
case BUF_GET_IF_IN_POOL:
/* The caller may pass a dummy page size,
because it does not really matter. */
break;
default:
MY_ASSERT_UNREACHABLE();
case BUF_GET_POSSIBLY_FREED:
break;
case BUF_GET:
case BUF_GET_IF_IN_POOL_OR_WATCH:
ut_ad(!mtr->is_freeing_tree());
@ -2471,20 +2504,23 @@ loop:
corrupted, or if an encrypted page with a valid
checksum cannot be decypted. */
if (dberr_t local_err = buf_read_page(page_id, zip_size)) {
if (local_err != DB_CORRUPTION
&& mode != BUF_GET_POSSIBLY_FREED
switch (dberr_t local_err = buf_read_page(page_id, zip_size)) {
case DB_SUCCESS:
case DB_SUCCESS_LOCKED_REC:
buf_read_ahead_random(page_id, zip_size, ibuf_inside(mtr));
break;
default:
if (mode != BUF_GET_POSSIBLY_FREED
&& retries++ < BUF_PAGE_READ_MAX_RETRIES) {
DBUG_EXECUTE_IF("intermittent_read_failure",
retries = BUF_PAGE_READ_MAX_RETRIES;);
} else {
if (err) {
*err = local_err;
}
return nullptr;
}
} else {
buf_read_ahead_random(page_id, zip_size, ibuf_inside(mtr));
/* fall through */
case DB_PAGE_CORRUPTED:
if (err) {
*err = local_err;
}
return nullptr;
}
ut_d(if (!(++buf_dbg_counter % 5771)) buf_pool.validate());
@ -2536,11 +2572,12 @@ ignore_block:
return nullptr;
}
} else if (mode != BUF_PEEK_IF_IN_POOL) {
} else if (!mtr) {
} else if (!mtr) {
ut_ad(!block->page.oldest_modification());
mysql_mutex_lock(&buf_pool.mutex);
block->unfix();
free_unfixed_block:
if (!buf_LRU_free_page(&block->page, true)) {
ut_ad(0);
}
@ -2656,20 +2693,19 @@ wait_for_unfix:
/* Decompress the page while not holding
buf_pool.mutex. */
auto ok = buf_zip_decompress(block, false);
block->page.read_unfix(state);
state = block->page.state();
block->page.lock.x_unlock();
const auto ok = buf_zip_decompress(block, false);
--buf_pool.n_pend_unzip;
if (!ok) {
/* FIXME: Evict the corrupted
ROW_FORMAT=COMPRESSED page! */
if (err) {
*err = DB_PAGE_CORRUPTED;
}
return nullptr;
mysql_mutex_lock(&buf_pool.mutex);
}
state = block->page.read_unfix(state);
block->page.lock.x_unlock();
if (!ok) {
goto free_unfixed_block;
}
}
@ -2875,72 +2911,73 @@ buf_page_get_gen(
dberr_t* err,
bool allow_ibuf_merge)
{
if (buf_block_t *block= recv_sys.recover(page_id))
buf_block_t *block= recv_sys.recover(page_id);
if (UNIV_LIKELY(!block))
return buf_page_get_low(page_id, zip_size, rw_latch,
guess, mode, mtr, err, allow_ibuf_merge);
else if (UNIV_UNLIKELY(block == reinterpret_cast<buf_block_t*>(-1)))
{
if (UNIV_UNLIKELY(block == reinterpret_cast<buf_block_t*>(-1)))
{
corrupted:
if (err)
*err= DB_CORRUPTION;
return nullptr;
}
/* Recovery is a special case; we fix() before acquiring lock. */
auto s= block->page.fix();
ut_ad(s >= buf_page_t::FREED);
/* The block may be write-fixed at this point because we are not
holding a lock, but it must not be read-fixed. */
ut_ad(s < buf_page_t::READ_FIX || s >= buf_page_t::WRITE_FIX);
corrupted:
if (err)
*err= DB_SUCCESS;
const bool must_merge= allow_ibuf_merge &&
ibuf_page_exists(page_id, block->zip_size());
*err= DB_CORRUPTION;
return nullptr;
}
/* Recovery is a special case; we fix() before acquiring lock. */
auto s= block->page.fix();
ut_ad(s >= buf_page_t::FREED);
/* The block may be write-fixed at this point because we are not
holding a lock, but it must not be read-fixed. */
ut_ad(s < buf_page_t::READ_FIX || s >= buf_page_t::WRITE_FIX);
if (err)
*err= DB_SUCCESS;
const bool must_merge= allow_ibuf_merge &&
ibuf_page_exists(page_id, block->zip_size());
if (s < buf_page_t::UNFIXED)
{
got_freed_page:
ut_ad(mode == BUF_GET_POSSIBLY_FREED || mode == BUF_PEEK_IF_IN_POOL);
mysql_mutex_lock(&buf_pool.mutex);
block->page.unfix();
buf_LRU_free_page(&block->page, true);
mysql_mutex_unlock(&buf_pool.mutex);
goto corrupted;
}
else if (must_merge &&
fil_page_get_type(block->page.frame) == FIL_PAGE_INDEX &&
page_is_leaf(block->page.frame))
{
block->page.lock.x_lock();
s= block->page.state();
ut_ad(s > buf_page_t::FREED);
ut_ad(s < buf_page_t::READ_FIX);
if (s < buf_page_t::UNFIXED)
{
got_freed_page:
ut_ad(mode == BUF_GET_POSSIBLY_FREED || mode == BUF_PEEK_IF_IN_POOL);
block->page.unfix();
goto corrupted;
}
else if (must_merge &&
fil_page_get_type(block->page.frame) == FIL_PAGE_INDEX &&
page_is_leaf(block->page.frame))
{
block->page.lock.x_lock();
s= block->page.state();
ut_ad(s > buf_page_t::FREED);
ut_ad(s < buf_page_t::READ_FIX);
if (s < buf_page_t::UNFIXED)
{
block->page.lock.x_unlock();
goto got_freed_page;
}
else
{
if (block->page.is_ibuf_exist())
block->page.clear_ibuf_exist();
if (dberr_t e=
ibuf_merge_or_delete_for_page(block, page_id, block->zip_size()))
{
if (err)
*err= e;
buf_pool.corrupted_evict(&block->page, s);
return nullptr;
}
}
if (rw_latch == RW_X_LATCH)
{
mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX);
return block;
}
block->page.lock.x_unlock();
goto got_freed_page;
}
else
{
if (block->page.is_ibuf_exist())
block->page.clear_ibuf_exist();
if (dberr_t e=
ibuf_merge_or_delete_for_page(block, page_id, block->zip_size()))
{
if (err)
*err= e;
buf_pool.corrupted_evict(&block->page, s);
return nullptr;
}
}
mtr->page_lock(block, rw_latch);
return block;
}
return buf_page_get_low(page_id, zip_size, rw_latch,
guess, mode, mtr, err, allow_ibuf_merge);
if (rw_latch == RW_X_LATCH)
{
mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX);
return block;
}
block->page.lock.x_unlock();
}
mtr->page_lock(block, rw_latch);
return block;
}
/********************************************************************//**
@ -3238,12 +3275,12 @@ retry:
buf_unzip_LRU_add_block(reinterpret_cast<buf_block_t*>(bpage), FALSE);
}
buf_pool.stat.n_pages_created++;
mysql_mutex_unlock(&buf_pool.mutex);
mtr->memo_push(reinterpret_cast<buf_block_t*>(bpage), MTR_MEMO_PAGE_X_FIX);
bpage->set_accessed();
buf_pool.stat.n_pages_created++;
/* Delete possible entries for the page from the insert buffer:
such can exist if the page belonged to an index which was dropped */
@ -3493,7 +3530,6 @@ dberr_t buf_page_t::read_complete(const fil_node_t &node)
ut_d(auto n=) buf_pool.n_pend_reads--;
ut_ad(n > 0);
buf_pool.stat.n_pages_read++;
const byte *read_frame= zip.data ? zip.data : frame;
ut_ad(read_frame);
@ -3645,9 +3681,6 @@ void buf_pool_invalidate()
{
mysql_mutex_lock(&buf_pool.mutex);
buf_flush_wait_batch_end(true);
buf_flush_wait_batch_end(false);
/* It is possible that a write batch that has been posted
earlier is still not complete. For buffer pool invalidation to
proceed we must ensure there is NO write activity happening. */
@ -3798,8 +3831,8 @@ void buf_pool_t::print()
<< UT_LIST_GET_LEN(flush_list)
<< ", n pending decompressions=" << n_pend_unzip
<< ", n pending reads=" << n_pend_reads
<< ", n pending flush LRU=" << n_flush_LRU_
<< " list=" << n_flush_list_
<< ", n pending flush LRU=" << n_flush()
<< " list=" << buf_dblwr.pending_writes()
<< ", pages made young=" << stat.n_pages_made_young
<< ", not young=" << stat.n_pages_not_made_young
<< ", pages read=" << stat.n_pages_read
@ -3911,13 +3944,13 @@ void buf_stats_get_pool_info(buf_pool_info_t *pool_info)
pool_info->flush_list_len = UT_LIST_GET_LEN(buf_pool.flush_list);
pool_info->n_pend_unzip = UT_LIST_GET_LEN(buf_pool.unzip_LRU);
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
pool_info->n_pend_reads = buf_pool.n_pend_reads;
pool_info->n_pending_flush_lru = buf_pool.n_flush_LRU_;
pool_info->n_pending_flush_lru = buf_pool.n_flush();
pool_info->n_pending_flush_list = buf_pool.n_flush_list_;
pool_info->n_pending_flush_list = buf_dblwr.pending_writes();
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
current_time = time(NULL);
time_elapsed = 0.001 + difftime(current_time,

View file

@ -46,7 +46,17 @@ inline buf_block_t *buf_dblwr_trx_sys_get(mtr_t *mtr)
0, RW_X_LATCH, mtr);
}
/** Initialize the doublewrite buffer data structure.
void buf_dblwr_t::init()
{
if (!active_slot)
{
active_slot= &slots[0];
mysql_mutex_init(buf_dblwr_mutex_key, &mutex, nullptr);
pthread_cond_init(&cond, nullptr);
}
}
/** Initialise the persistent storage of the doublewrite buffer.
@param header doublewrite page header in the TRX_SYS page */
inline void buf_dblwr_t::init(const byte *header)
{
@ -54,8 +64,6 @@ inline void buf_dblwr_t::init(const byte *header)
ut_ad(!active_slot->reserved);
ut_ad(!batch_running);
mysql_mutex_init(buf_dblwr_mutex_key, &mutex, nullptr);
pthread_cond_init(&cond, nullptr);
block1= page_id_t(0, mach_read_from_4(header + TRX_SYS_DOUBLEWRITE_BLOCK1));
block2= page_id_t(0, mach_read_from_4(header + TRX_SYS_DOUBLEWRITE_BLOCK2));
@ -74,7 +82,7 @@ inline void buf_dblwr_t::init(const byte *header)
@return whether the operation succeeded */
bool buf_dblwr_t::create()
{
if (is_initialised())
if (is_created())
return true;
mtr_t mtr;
@ -341,7 +349,7 @@ func_exit:
void buf_dblwr_t::recover()
{
ut_ad(log_sys.last_checkpoint_lsn);
if (!is_initialised())
if (!is_created())
return;
uint32_t page_no_dblwr= 0;
@ -450,10 +458,9 @@ next_page:
/** Free the doublewrite buffer. */
void buf_dblwr_t::close()
{
if (!is_initialised())
if (!active_slot)
return;
/* Free the double write data structures. */
ut_ad(!active_slot->reserved);
ut_ad(!active_slot->first_free);
ut_ad(!batch_running);
@ -467,35 +474,41 @@ void buf_dblwr_t::close()
mysql_mutex_destroy(&mutex);
memset((void*) this, 0, sizeof *this);
active_slot= &slots[0];
}
/** Update the doublewrite buffer on write completion. */
void buf_dblwr_t::write_completed()
void buf_dblwr_t::write_completed(bool with_doublewrite)
{
ut_ad(this == &buf_dblwr);
ut_ad(srv_use_doublewrite_buf);
ut_ad(is_initialised());
ut_ad(!srv_read_only_mode);
mysql_mutex_lock(&mutex);
ut_ad(batch_running);
slot *flush_slot= active_slot == &slots[0] ? &slots[1] : &slots[0];
ut_ad(flush_slot->reserved);
ut_ad(flush_slot->reserved <= flush_slot->first_free);
ut_ad(writes_pending);
if (!--writes_pending)
pthread_cond_broadcast(&write_cond);
if (!--flush_slot->reserved)
if (with_doublewrite)
{
mysql_mutex_unlock(&mutex);
/* This will finish the batch. Sync data files to the disk. */
fil_flush_file_spaces();
mysql_mutex_lock(&mutex);
ut_ad(is_created());
ut_ad(srv_use_doublewrite_buf);
ut_ad(batch_running);
slot *flush_slot= active_slot == &slots[0] ? &slots[1] : &slots[0];
ut_ad(flush_slot->reserved);
ut_ad(flush_slot->reserved <= flush_slot->first_free);
/* We can now reuse the doublewrite memory buffer: */
flush_slot->first_free= 0;
batch_running= false;
pthread_cond_broadcast(&cond);
if (!--flush_slot->reserved)
{
mysql_mutex_unlock(&mutex);
/* This will finish the batch. Sync data files to the disk. */
fil_flush_file_spaces();
mysql_mutex_lock(&mutex);
/* We can now reuse the doublewrite memory buffer: */
flush_slot->first_free= 0;
batch_running= false;
pthread_cond_broadcast(&cond);
}
}
mysql_mutex_unlock(&mutex);
@ -640,7 +653,7 @@ void buf_dblwr_t::flush_buffered_writes_completed(const IORequest &request)
{
ut_ad(this == &buf_dblwr);
ut_ad(srv_use_doublewrite_buf);
ut_ad(is_initialised());
ut_ad(is_created());
ut_ad(!srv_read_only_mode);
ut_ad(!request.bpage);
ut_ad(request.node == fil_system.sys_space->chain.start);
@ -706,7 +719,7 @@ posted, and also when we may have to wait for a page latch!
Otherwise a deadlock of threads can occur. */
void buf_dblwr_t::flush_buffered_writes()
{
if (!is_initialised() || !srv_use_doublewrite_buf)
if (!is_created() || !srv_use_doublewrite_buf)
{
fil_flush_file_spaces();
return;
@ -739,6 +752,7 @@ void buf_dblwr_t::add_to_batch(const IORequest &request, size_t size)
const ulint buf_size= 2 * block_size();
mysql_mutex_lock(&mutex);
writes_pending++;
for (;;)
{

File diff suppressed because it is too large Load diff

View file

@ -136,7 +136,6 @@ static void buf_LRU_block_free_hashed_page(buf_block_t *block)
@param[in] bpage control block */
static inline void incr_LRU_size_in_bytes(const buf_page_t* bpage)
{
/* FIXME: use atomics, not mutex */
mysql_mutex_assert_owner(&buf_pool.mutex);
buf_pool.stat.LRU_bytes += bpage->physical_size();
@ -400,8 +399,10 @@ buf_block_t *buf_LRU_get_free_block(bool have_mutex)
DBUG_EXECUTE_IF("recv_ran_out_of_buffer",
if (recv_recovery_is_on()
&& recv_sys.apply_log_recs) {
mysql_mutex_lock(&buf_pool.mutex);
goto flush_lru;
});
get_mutex:
mysql_mutex_lock(&buf_pool.mutex);
got_mutex:
buf_LRU_check_size_of_non_data_objects();
@ -444,20 +445,32 @@ got_block:
if ((block = buf_LRU_get_free_only()) != nullptr) {
goto got_block;
}
if (!buf_pool.n_flush_LRU_) {
break;
mysql_mutex_unlock(&buf_pool.mutex);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
const auto n_flush = buf_pool.n_flush();
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
mysql_mutex_lock(&buf_pool.mutex);
if (!n_flush) {
goto not_found;
}
if (!buf_pool.try_LRU_scan) {
mysql_mutex_lock(&buf_pool.flush_list_mutex);
buf_pool.page_cleaner_wakeup(true);
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
my_cond_wait(&buf_pool.done_free,
&buf_pool.mutex.m_mutex);
}
my_cond_wait(&buf_pool.done_free, &buf_pool.mutex.m_mutex);
}
#ifndef DBUG_OFF
not_found:
#endif
mysql_mutex_unlock(&buf_pool.mutex);
if (n_iterations > 1) {
MONITOR_INC( MONITOR_LRU_GET_FREE_WAITS );
}
if (n_iterations > 20 && !buf_lru_free_blocks_error_printed
if (n_iterations == 21 && !buf_lru_free_blocks_error_printed
&& srv_buf_pool_old_size == srv_buf_pool_size) {
buf_lru_free_blocks_error_printed = true;
mysql_mutex_unlock(&buf_pool.mutex);
ib::warn() << "Difficult to find free blocks in the buffer pool"
" (" << n_iterations << " search iterations)! "
<< flush_failures << " failed attempts to"
@ -469,12 +482,7 @@ not_found:
<< os_n_file_writes << " OS file writes, "
<< os_n_fsyncs
<< " OS fsyncs.";
buf_lru_free_blocks_error_printed = true;
}
if (n_iterations > 1) {
MONITOR_INC( MONITOR_LRU_GET_FREE_WAITS );
mysql_mutex_lock(&buf_pool.mutex);
}
/* No free block was found: try to flush the LRU list.
@ -488,15 +496,16 @@ not_found:
#ifndef DBUG_OFF
flush_lru:
#endif
if (!buf_flush_LRU(innodb_lru_flush_size)) {
if (!buf_flush_LRU(innodb_lru_flush_size, true)) {
MONITOR_INC(MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT);
++flush_failures;
}
n_iterations++;
mysql_mutex_lock(&buf_pool.mutex);
buf_pool.stat.LRU_waits++;
goto got_mutex;
mysql_mutex_unlock(&buf_pool.mutex);
buf_dblwr.flush_buffered_writes();
goto get_mutex;
}
/** Move the LRU_old pointer so that the length of the old blocks list
@ -805,50 +814,57 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip)
/* We cannot use transactional_lock_guard here,
because buf_buddy_relocate() in buf_buddy_free() could get stuck. */
hash_lock.lock();
lsn_t oldest_modification = bpage->oldest_modification_acquire();
const lsn_t oldest_modification = bpage->oldest_modification_acquire();
if (UNIV_UNLIKELY(!bpage->can_relocate())) {
/* Do not free buffer fixed and I/O-fixed blocks. */
goto func_exit;
}
if (oldest_modification == 1) {
switch (oldest_modification) {
case 2:
ut_ad(id.space() == SRV_TMP_SPACE_ID);
ut_ad(!bpage->zip.data);
if (!bpage->is_freed()) {
goto func_exit;
}
bpage->clear_oldest_modification();
break;
case 1:
mysql_mutex_lock(&buf_pool.flush_list_mutex);
oldest_modification = bpage->oldest_modification();
if (oldest_modification) {
ut_ad(oldest_modification == 1);
if (const lsn_t om = bpage->oldest_modification()) {
ut_ad(om == 1);
buf_pool.delete_from_flush_list(bpage);
}
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
ut_ad(!bpage->oldest_modification());
oldest_modification = 0;
}
if (zip || !bpage->zip.data) {
/* This would completely free the block. */
/* Do not completely free dirty blocks. */
if (oldest_modification) {
goto func_exit;
/* fall through */
case 0:
if (zip || !bpage->zip.data || !bpage->frame) {
break;
}
} else if (oldest_modification && !bpage->frame) {
func_exit:
hash_lock.unlock();
return(false);
} else if (bpage->frame) {
relocate_compressed:
b = static_cast<buf_page_t*>(ut_zalloc_nokey(sizeof *b));
ut_a(b);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
new (b) buf_page_t(*bpage);
b->frame = nullptr;
b->set_state(buf_page_t::UNFIXED + 1);
break;
default:
if (zip || !bpage->zip.data || !bpage->frame) {
/* This would completely free the block. */
/* Do not completely free dirty blocks. */
func_exit:
hash_lock.unlock();
return(false);
}
goto relocate_compressed;
}
mysql_mutex_assert_owner(&buf_pool.mutex);
DBUG_PRINT("ib_buf", ("free page %u:%u",
id.space(), id.page_no()));
DBUG_PRINT("ib_buf", ("free page %u:%u", id.space(), id.page_no()));
ut_ad(bpage->can_relocate());
@ -1026,7 +1042,8 @@ buf_LRU_block_free_non_file_page(
} else {
UT_LIST_ADD_FIRST(buf_pool.free, &block->page);
ut_d(block->page.in_free_list = true);
pthread_cond_signal(&buf_pool.done_free);
buf_pool.try_LRU_scan= true;
pthread_cond_broadcast(&buf_pool.done_free);
}
MEM_NOACCESS(block->page.frame, srv_page_size);

View file

@ -226,6 +226,7 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
buf_LRU_add_block(bpage, true/* to old blocks */);
}
buf_pool.stat.n_pages_read++;
mysql_mutex_unlock(&buf_pool.mutex);
buf_pool.n_pend_reads++;
goto func_exit_no_mutex;
@ -245,20 +246,18 @@ buffer buf_pool if it is not already there, in which case does nothing.
Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
flag is cleared and the x-lock released by an i/o-handler thread.
@param[out] err DB_SUCCESS or DB_TABLESPACE_DELETED
if we are trying
to read from a non-existent tablespace
@param[in,out] space tablespace
@param[in] sync true if synchronous aio is desired
@param[in] mode BUF_READ_IBUF_PAGES_ONLY, ...,
@param[in] page_id page id
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] unzip true=request uncompressed page
@return whether a read request was queued */
@return error code
@retval DB_SUCCESS if the page was read
@retval DB_SUCCESS_LOCKED_REC if the page exists in the buffer pool already */
static
bool
dberr_t
buf_read_page_low(
dberr_t* err,
fil_space_t* space,
bool sync,
ulint mode,
@ -268,15 +267,12 @@ buf_read_page_low(
{
buf_page_t* bpage;
*err = DB_SUCCESS;
if (buf_dblwr.is_inside(page_id)) {
ib::error() << "Trying to read doublewrite buffer page "
<< page_id;
ut_ad(0);
nothing_read:
space->release();
return false;
return DB_PAGE_CORRUPTED;
}
if (sync) {
@ -299,8 +295,9 @@ nothing_read:
completed */
bpage = buf_page_init_for_read(mode, page_id, zip_size, unzip);
if (bpage == NULL) {
goto nothing_read;
if (!bpage) {
space->release();
return DB_SUCCESS_LOCKED_REC;
}
ut_ad(bpage->in_file());
@ -320,7 +317,6 @@ nothing_read:
? IORequest::READ_SYNC
: IORequest::READ_ASYNC),
page_id.page_no() * len, len, dst, bpage);
*err = fio.err;
if (UNIV_UNLIKELY(fio.err != DB_SUCCESS)) {
ut_d(auto n=) buf_pool.n_pend_reads--;
@ -329,14 +325,14 @@ nothing_read:
} else if (sync) {
thd_wait_end(NULL);
/* The i/o was already completed in space->io() */
*err = bpage->read_complete(*fio.node);
fio.err = bpage->read_complete(*fio.node);
space->release();
if (*err == DB_FAIL) {
*err = DB_PAGE_CORRUPTED;
if (fio.err == DB_FAIL) {
fio.err = DB_PAGE_CORRUPTED;
}
}
return true;
return fio.err;
}
/** Applies a random read-ahead in buf_pool if there are at least a threshold
@ -414,24 +410,26 @@ read_ahead:
continue;
if (space->is_stopping())
break;
dberr_t err;
space->reacquire();
if (buf_read_page_low(&err, space, false, ibuf_mode, i, zip_size, false))
if (buf_read_page_low(space, false, ibuf_mode, i, zip_size, false) ==
DB_SUCCESS)
count++;
}
if (count)
{
DBUG_PRINT("ib_buf", ("random read-ahead %zu pages from %s: %u",
count, space->chain.start->name,
low.page_no()));
mysql_mutex_lock(&buf_pool.mutex);
/* Read ahead is considered one I/O operation for the purpose of
LRU policy decision. */
buf_LRU_stat_inc_io();
buf_pool.stat.n_ra_pages_read_rnd+= count;
mysql_mutex_unlock(&buf_pool.mutex);
}
space->release();
/* Read ahead is considered one I/O operation for the purpose of
LRU policy decision. */
buf_LRU_stat_inc_io();
buf_pool.stat.n_ra_pages_read_rnd+= count;
srv_stats.buf_pool_reads.add(count);
return count;
}
@ -441,8 +439,9 @@ on the buffer frame. The flag is cleared and the x-lock
released by the i/o-handler thread.
@param[in] page_id page id
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@retval DB_SUCCESS if the page was read and is not corrupted,
@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
@retval DB_SUCCESS if the page was read and is not corrupted
@retval DB_SUCCESS_LOCKED_REC if the page was not read
@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted
@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
after decryption normal page checksum does not match.
@retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */
@ -456,13 +455,9 @@ dberr_t buf_read_page(const page_id_t page_id, ulint zip_size)
return DB_TABLESPACE_DELETED;
}
dberr_t err;
if (buf_read_page_low(&err, space, true, BUF_READ_ANY_PAGE,
page_id, zip_size, false))
srv_stats.buf_pool_reads.add(1);
buf_LRU_stat_inc_io();
return err;
buf_LRU_stat_inc_io(); /* NOT protected by buf_pool.mutex */
return buf_read_page_low(space, true, BUF_READ_ANY_PAGE,
page_id, zip_size, false);
}
/** High-level function which reads a page asynchronously from a file to the
@ -475,12 +470,8 @@ released by the i/o-handler thread.
void buf_read_page_background(fil_space_t *space, const page_id_t page_id,
ulint zip_size)
{
dberr_t err;
if (buf_read_page_low(&err, space, false, BUF_READ_ANY_PAGE,
page_id, zip_size, false)) {
srv_stats.buf_pool_reads.add(1);
}
buf_read_page_low(space, false, BUF_READ_ANY_PAGE,
page_id, zip_size, false);
/* We do not increment number of I/O operations used for LRU policy
here (buf_LRU_stat_inc_io()). We use this in heuristics to decide
@ -638,23 +629,26 @@ failed:
continue;
if (space->is_stopping())
break;
dberr_t err;
space->reacquire();
count+= buf_read_page_low(&err, space, false, ibuf_mode, new_low, zip_size,
false);
if (buf_read_page_low(space, false, ibuf_mode, new_low, zip_size, false) ==
DB_SUCCESS)
count++;
}
if (count)
{
DBUG_PRINT("ib_buf", ("random read-ahead %zu pages from %s: %u",
count, space->chain.start->name,
new_low.page_no()));
mysql_mutex_lock(&buf_pool.mutex);
/* Read ahead is considered one I/O operation for the purpose of
LRU policy decision. */
buf_LRU_stat_inc_io();
buf_pool.stat.n_ra_pages_read+= count;
mysql_mutex_unlock(&buf_pool.mutex);
}
space->release();
/* Read ahead is considered one I/O operation for the purpose of
LRU policy decision. */
buf_LRU_stat_inc_io();
buf_pool.stat.n_ra_pages_read+= count;
return count;
}
@ -707,13 +701,12 @@ void buf_read_recv_pages(uint32_t space_id, st_::span<uint32_t> page_nos)
}
}
dberr_t err;
space->reacquire();
buf_read_page_low(&err, space, false,
BUF_READ_ANY_PAGE, cur_page_id, zip_size,
true);
if (err != DB_SUCCESS) {
switch (buf_read_page_low(space, false, BUF_READ_ANY_PAGE,
cur_page_id, zip_size, true)) {
case DB_SUCCESS: case DB_SUCCESS_LOCKED_REC:
break;
default:
sql_print_error("InnoDB: Recovery failed to read page "
UINT32PF " from %s",
cur_page_id.page_no(),

View file

@ -119,6 +119,9 @@ bool fil_space_t::try_to_close(bool print_info)
}
node->close();
fil_system.move_closed_last_to_space_list(node->space);
return true;
}
@ -393,13 +396,7 @@ static bool fil_node_open_file_low(fil_node_t *node)
ut_ad(node->is_open());
if (UNIV_LIKELY(!fil_system.freeze_space_list))
{
/* Move the file last in fil_system.space_list, so that
fil_space_t::try_to_close() should close it as a last resort. */
fil_system.space_list.erase(space_list_t::iterator(node->space));
fil_system.space_list.push_back(*node->space);
}
fil_system.move_opened_last_to_space_list(node->space);
fil_system.n_open++;
return true;
@ -796,7 +793,17 @@ pfs_os_file_t fil_system_t::detach(fil_space_t *space, bool detach_handle)
space->is_in_default_encrypt= false;
default_encrypt_tables.remove(*space);
}
space_list.erase(space_list_t::iterator(space));
{
space_list_t::iterator s= space_list_t::iterator(space);
if (space_list_last_opened == space)
{
space_list_t::iterator prev= s;
space_list_last_opened= &*--prev;
}
space_list.erase(s);
}
if (space == sys_space)
sys_space= nullptr;
else if (space == temp_space)
@ -915,12 +922,14 @@ bool fil_space_free(uint32_t id, bool x_latched)
@param purpose tablespace purpose
@param crypt_data encryption information
@param mode encryption mode
@param opened true if space files are opened
@return pointer to created tablespace, to be filled in with add()
@retval nullptr on failure (such as when the same tablespace exists) */
fil_space_t *fil_space_t::create(uint32_t id, uint32_t flags,
fil_type_t purpose,
fil_space_crypt_t *crypt_data,
fil_encryption_t mode)
fil_encryption_t mode,
bool opened)
{
fil_space_t* space;
@ -973,7 +982,10 @@ fil_space_t *fil_space_t::create(uint32_t id, uint32_t flags,
HASH_INSERT(fil_space_t, hash, &fil_system.spaces, id, space);
fil_system.space_list.push_back(*space);
if (opened)
fil_system.add_opened_last_to_space_list(space);
else
fil_system.space_list.push_back(*space);
switch (id) {
case 0:
@ -1294,6 +1306,15 @@ void fil_system_t::close()
#endif /* __linux__ */
}
void fil_system_t::add_opened_last_to_space_list(fil_space_t *space)
{
if (UNIV_LIKELY(space_list_last_opened != nullptr))
space_list.insert(space_list_t::iterator(space_list_last_opened), *space);
else
space_list.push_back(*space);
space_list_last_opened= space;
}
/** Extend all open data files to the recovered size */
ATTRIBUTE_COLD void fil_system_t::extend_to_recv_size()
{
@ -1917,7 +1938,7 @@ err_exit:
if (fil_space_t* space = fil_space_t::create(space_id, flags,
FIL_TYPE_TABLESPACE,
crypt_data, mode)) {
crypt_data, mode, true)) {
fil_node_t* node = space->add(path, file, size, false, true);
IF_WIN(node->find_metadata(), node->find_metadata(file, true));
mtr.start();

View file

@ -1209,8 +1209,6 @@ after_insert:
ut_ad(!rec || rec_offs_validate(rec, cursor->index(), *offsets));
#endif
MONITOR_INC(MONITOR_INDEX_SPLIT);
return(rec);
}

View file

@ -914,43 +914,37 @@ static SHOW_VAR innodb_status_variables[]= {
(char*) &export_vars.innodb_buffer_pool_resize_status, SHOW_CHAR},
{"buffer_pool_load_incomplete",
&export_vars.innodb_buffer_pool_load_incomplete, SHOW_BOOL},
{"buffer_pool_pages_data",
&export_vars.innodb_buffer_pool_pages_data, SHOW_SIZE_T},
{"buffer_pool_pages_data", &UT_LIST_GET_LEN(buf_pool.LRU), SHOW_SIZE_T},
{"buffer_pool_bytes_data",
&export_vars.innodb_buffer_pool_bytes_data, SHOW_SIZE_T},
{"buffer_pool_pages_dirty",
&export_vars.innodb_buffer_pool_pages_dirty, SHOW_SIZE_T},
{"buffer_pool_bytes_dirty",
&export_vars.innodb_buffer_pool_bytes_dirty, SHOW_SIZE_T},
{"buffer_pool_pages_flushed", &buf_flush_page_count, SHOW_SIZE_T},
{"buffer_pool_pages_free",
&export_vars.innodb_buffer_pool_pages_free, SHOW_SIZE_T},
&UT_LIST_GET_LEN(buf_pool.flush_list), SHOW_SIZE_T},
{"buffer_pool_bytes_dirty", &buf_pool.flush_list_bytes, SHOW_SIZE_T},
{"buffer_pool_pages_flushed", &buf_pool.stat.n_pages_written, SHOW_SIZE_T},
{"buffer_pool_pages_free", &UT_LIST_GET_LEN(buf_pool.free), SHOW_SIZE_T},
#ifdef UNIV_DEBUG
{"buffer_pool_pages_latched",
&export_vars.innodb_buffer_pool_pages_latched, SHOW_SIZE_T},
#endif /* UNIV_DEBUG */
{"buffer_pool_pages_made_not_young",
&export_vars.innodb_buffer_pool_pages_made_not_young, SHOW_SIZE_T},
&buf_pool.stat.n_pages_not_made_young, SHOW_SIZE_T},
{"buffer_pool_pages_made_young",
&export_vars.innodb_buffer_pool_pages_made_young, SHOW_SIZE_T},
&buf_pool.stat.n_pages_made_young, SHOW_SIZE_T},
{"buffer_pool_pages_misc",
&export_vars.innodb_buffer_pool_pages_misc, SHOW_SIZE_T},
{"buffer_pool_pages_old",
&export_vars.innodb_buffer_pool_pages_old, SHOW_SIZE_T},
{"buffer_pool_pages_old", &buf_pool.LRU_old_len, SHOW_SIZE_T},
{"buffer_pool_pages_total",
&export_vars.innodb_buffer_pool_pages_total, SHOW_SIZE_T},
{"buffer_pool_pages_LRU_flushed", &buf_lru_flush_page_count, SHOW_SIZE_T},
{"buffer_pool_pages_LRU_freed", &buf_lru_freed_page_count, SHOW_SIZE_T},
{"buffer_pool_pages_split", &buf_pool.pages_split, SHOW_SIZE_T},
{"buffer_pool_read_ahead_rnd",
&export_vars.innodb_buffer_pool_read_ahead_rnd, SHOW_SIZE_T},
{"buffer_pool_read_ahead",
&export_vars.innodb_buffer_pool_read_ahead, SHOW_SIZE_T},
&buf_pool.stat.n_ra_pages_read_rnd, SHOW_SIZE_T},
{"buffer_pool_read_ahead", &buf_pool.stat.n_ra_pages_read, SHOW_SIZE_T},
{"buffer_pool_read_ahead_evicted",
&export_vars.innodb_buffer_pool_read_ahead_evicted, SHOW_SIZE_T},
{"buffer_pool_read_requests",
&export_vars.innodb_buffer_pool_read_requests, SHOW_SIZE_T},
{"buffer_pool_reads",
&export_vars.innodb_buffer_pool_reads, SHOW_SIZE_T},
&buf_pool.stat.n_ra_pages_evicted, SHOW_SIZE_T},
{"buffer_pool_read_requests", &buf_pool.stat.n_page_gets, SHOW_SIZE_T},
{"buffer_pool_reads", &buf_pool.stat.n_pages_read, SHOW_SIZE_T},
{"buffer_pool_wait_free", &buf_pool.stat.LRU_waits, SHOW_SIZE_T},
{"buffer_pool_write_requests", &buf_pool.flush_list_requests, SHOW_SIZE_T},
{"checkpoint_age", &export_vars.innodb_checkpoint_age, SHOW_SIZE_T},
@ -19419,10 +19413,22 @@ static MYSQL_SYSVAR_BOOL(numa_interleave, srv_numa_interleave,
NULL, NULL, FALSE);
#endif /* HAVE_LIBNUMA */
static void innodb_change_buffering_update(THD *thd, struct st_mysql_sys_var*,
void*, const void *save)
{
ulong i= *static_cast<const ulong*>(save);
if (i != IBUF_USE_NONE && !ibuf.index)
push_warning(thd, Sql_condition::WARN_LEVEL_WARN, ER_NOT_KEYFILE,
"InnoDB: The change buffer is corrupted.");
else
innodb_change_buffering= i;
}
static MYSQL_SYSVAR_ENUM(change_buffering, innodb_change_buffering,
PLUGIN_VAR_RQCMDARG,
"Buffer changes to secondary indexes.",
NULL, NULL, IBUF_USE_NONE, &innodb_change_buffering_typelib);
nullptr, innodb_change_buffering_update,
IBUF_USE_NONE, &innodb_change_buffering_typelib);
static MYSQL_SYSVAR_UINT(change_buffer_max_size,
srv_change_buffer_max_size,

View file

@ -6120,6 +6120,7 @@ func_exit:
id, MTR_MEMO_PAGE_SX_FIX);
if (UNIV_UNLIKELY(!root)) {
err = DB_CORRUPTION;
goto func_exit;
}

View file

@ -403,8 +403,13 @@ ibuf_init_at_db_start(void)
if (!header_page) {
err_exit:
sql_print_error("InnoDB: The change buffer is corrupted");
sql_print_error("InnoDB: The change buffer is corrupted"
" or has been removed on upgrade"
" to MariaDB 11.0 or later");
mtr.commit();
if (innodb_change_buffering == IBUF_USE_NONE) {
err = DB_SUCCESS;
}
return err;
}
@ -1979,6 +1984,7 @@ void
ibuf_free_excess_pages(void)
/*========================*/
{
if (UNIV_UNLIKELY(!ibuf.index)) return;
/* Free at most a few pages at a time, so that we do not delay the
requested service too much */
@ -2420,6 +2426,7 @@ will be merged from ibuf trees to the pages read
@retval 0 if ibuf.empty */
ulint ibuf_contract()
{
if (UNIV_UNLIKELY(!ibuf.index)) return 0;
mtr_t mtr;
btr_cur_t cur;
ulint sum_sizes;
@ -2469,6 +2476,7 @@ ibuf_merge_space(
/*=============*/
ulint space) /*!< in: tablespace id to merge */
{
if (UNIV_UNLIKELY(!ibuf.index)) return 0;
mtr_t mtr;
btr_pcur_t pcur;
@ -2934,13 +2942,14 @@ void
ibuf_update_max_tablespace_id(void)
/*===============================*/
{
if (UNIV_UNLIKELY(!ibuf.index)) return;
const rec_t* rec;
const byte* field;
ulint len;
btr_pcur_t pcur;
mtr_t mtr;
ut_a(!dict_table_is_comp(ibuf.index->table));
ut_ad(!ibuf.index->table->not_redundant());
ibuf_mtr_start(&mtr);
@ -4419,6 +4428,8 @@ in DISCARD TABLESPACE, IMPORT TABLESPACE, or read-ahead.
@param[in] space missing or to-be-discarded tablespace */
void ibuf_delete_for_discarded_space(uint32_t space)
{
if (UNIV_UNLIKELY(!ibuf.index)) return;
btr_pcur_t pcur;
const rec_t* ibuf_rec;
mtr_t mtr;
@ -4532,6 +4543,7 @@ ibuf_print(
/*=======*/
FILE* file) /*!< in: file where to print */
{
if (UNIV_UNLIKELY(!ibuf.index)) return;
mysql_mutex_lock(&ibuf_mutex);
fprintf(file,
@ -4571,8 +4583,6 @@ dberr_t ibuf_check_bitmap_on_import(const trx_t* trx, fil_space_t* space)
mtr_t mtr;
mysql_mutex_lock(&ibuf_mutex);
/* The two bitmap pages (allocation bitmap and ibuf bitmap) repeat
every page_size pages. For example if page_size is 16 KiB, then the
two bitmap pages repeat every 16 KiB * 16384 = 256 MiB. In the loop
@ -4581,18 +4591,14 @@ dberr_t ibuf_check_bitmap_on_import(const trx_t* trx, fil_space_t* space)
for (uint32_t page_no = 0; page_no < size; page_no += physical_size) {
if (trx_is_interrupted(trx)) {
mysql_mutex_unlock(&ibuf_mutex);
return(DB_INTERRUPTED);
}
mtr_start(&mtr);
ibuf_enter(&mtr);
buf_block_t* bitmap_page = ibuf_bitmap_get_map_page(
page_id_t(space->id, page_no), zip_size, &mtr);
if (!bitmap_page) {
mysql_mutex_unlock(&ibuf_mutex);
ibuf_exit(&mtr);
mtr.commit();
return DB_CORRUPTION;
}
@ -4615,7 +4621,6 @@ dberr_t ibuf_check_bitmap_on_import(const trx_t* trx, fil_space_t* space)
physical_size)));
}
#endif /* UNIV_DEBUG */
ibuf_exit(&mtr);
mtr_commit(&mtr);
continue;
}
@ -4630,8 +4635,6 @@ dberr_t ibuf_check_bitmap_on_import(const trx_t* trx, fil_space_t* space)
cur_page_id, zip_size,
IBUF_BITMAP_IBUF, &mtr)) {
mysql_mutex_unlock(&ibuf_mutex);
ibuf_exit(&mtr);
mtr_commit(&mtr);
ib_errf(trx->mysql_thd,
@ -4665,11 +4668,9 @@ dberr_t ibuf_check_bitmap_on_import(const trx_t* trx, fil_space_t* space)
}
}
ibuf_exit(&mtr);
mtr_commit(&mtr);
}
mysql_mutex_unlock(&ibuf_mutex);
return(DB_SUCCESS);
}

View file

@ -445,7 +445,7 @@ Gets the root node of a tree and x- or s-latches it.
buf_block_t*
btr_root_block_get(
/*===============*/
const dict_index_t* index, /*!< in: index tree */
dict_index_t* index, /*!< in: index tree */
rw_lock_type_t mode, /*!< in: either RW_S_LATCH
or RW_X_LATCH */
mtr_t* mtr, /*!< in: mtr */

View file

@ -103,6 +103,9 @@ enum btr_latch_mode {
dict_index_t::lock is being held in non-exclusive mode. */
BTR_MODIFY_LEAF_ALREADY_LATCHED = BTR_MODIFY_LEAF
| BTR_ALREADY_S_LATCHED,
/** Attempt to modify records in an x-latched tree. */
BTR_MODIFY_TREE_ALREADY_LATCHED = BTR_MODIFY_TREE
| BTR_ALREADY_S_LATCHED,
/** U-latch root and X-latch a leaf page, assuming that
dict_index_t::lock is being held in U mode. */
BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED = BTR_MODIFY_ROOT_AND_LEAF

View file

@ -720,13 +720,14 @@ public:
ut_ad(s < REINIT);
}
void read_unfix(uint32_t s)
uint32_t read_unfix(uint32_t s)
{
ut_ad(lock.is_write_locked());
ut_ad(s == UNFIXED + 1 || s == IBUF_EXIST + 1 || s == REINIT + 1);
ut_d(auto old_state=) zip.fix.fetch_add(s - READ_FIX);
uint32_t old_state= zip.fix.fetch_add(s - READ_FIX);
ut_ad(old_state >= READ_FIX);
ut_ad(old_state < WRITE_FIX);
return old_state + (s - READ_FIX);
}
void set_freed(uint32_t prev_state, uint32_t count= 0)
@ -777,11 +778,11 @@ public:
it from buf_pool.flush_list */
inline void write_complete(bool temporary);
/** Write a flushable page to a file. buf_pool.mutex must be held.
@param lru true=buf_pool.LRU; false=buf_pool.flush_list
/** Write a flushable page to a file or free a freeable block.
@param evict whether to evict the page on write completion
@param space tablespace
@return whether the page was flushed and buf_pool.mutex was released */
inline bool flush(bool lru, fil_space_t *space);
@return whether a page write was initiated and buf_pool.mutex released */
bool flush(bool evict, fil_space_t *space);
/** Notify that a page in a temporary tablespace has been modified. */
void set_temp_modified()
@ -851,8 +852,6 @@ public:
/** @return whether the block is mapped to a data file */
bool in_file() const { return state() >= FREED; }
/** @return whether the block is modified and ready for flushing */
inline bool ready_for_flush() const;
/** @return whether the block can be relocated in memory.
The block can be dirty, but it must not be I/O-fixed or bufferfixed. */
inline bool can_relocate() const;
@ -1025,10 +1024,10 @@ Compute the hash fold value for blocks in buf_pool.zip_hash. */
#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
/* @} */
/** A "Hazard Pointer" class used to iterate over page lists
inside the buffer pool. A hazard pointer is a buf_page_t pointer
/** A "Hazard Pointer" class used to iterate over buf_pool.LRU or
buf_pool.flush_list. A hazard pointer is a buf_page_t pointer
which we intend to iterate over next and we want it remain valid
even after we release the buffer pool mutex. */
even after we release the mutex that protects the list. */
class HazardPointer
{
public:
@ -1143,7 +1142,8 @@ struct buf_buddy_free_t {
/*!< Node of zip_free list */
};
/** @brief The buffer pool statistics structure. */
/** @brief The buffer pool statistics structure;
protected by buf_pool.mutex unless otherwise noted. */
struct buf_pool_stat_t{
/** Initialize the counters */
void init() { memset((void*) this, 0, sizeof *this); }
@ -1152,9 +1152,8 @@ struct buf_pool_stat_t{
/*!< number of page gets performed;
also successful searches through
the adaptive hash index are
counted as page gets; this field
is NOT protected by the buffer
pool mutex */
counted as page gets;
NOT protected by buf_pool.mutex */
ulint n_pages_read; /*!< number read operations */
ulint n_pages_written;/*!< number write operations */
ulint n_pages_created;/*!< number of pages created
@ -1172,10 +1171,9 @@ struct buf_pool_stat_t{
young because the first access
was not long enough ago, in
buf_page_peek_if_too_old() */
/** number of waits for eviction; writes protected by buf_pool.mutex */
/** number of waits for eviction */
ulint LRU_waits;
ulint LRU_bytes; /*!< LRU size in bytes */
ulint flush_list_bytes;/*!< flush_list size in bytes */
};
/** Statistics of buddy blocks of a given size. */
@ -1496,6 +1494,11 @@ public:
n_chunks_new / 4 * chunks->size;
}
/** @return whether the buffer pool has run out */
TPOOL_SUPPRESS_TSAN
bool ran_out() const
{ return UNIV_UNLIKELY(!try_LRU_scan || !UT_LIST_GET_LEN(free)); }
/** @return whether the buffer pool is shrinking */
inline bool is_shrinking() const
{
@ -1533,17 +1536,10 @@ public:
/** Buffer pool mutex */
alignas(CPU_LEVEL1_DCACHE_LINESIZE) mysql_mutex_t mutex;
/** Number of pending LRU flush; protected by mutex. */
ulint n_flush_LRU_;
/** broadcast when n_flush_LRU reaches 0; protected by mutex */
pthread_cond_t done_flush_LRU;
/** Number of pending flush_list flush; protected by mutex */
ulint n_flush_list_;
/** broadcast when n_flush_list reaches 0; protected by mutex */
pthread_cond_t done_flush_list;
TPOOL_SUPPRESS_TSAN ulint n_flush_LRU() const { return n_flush_LRU_; }
TPOOL_SUPPRESS_TSAN ulint n_flush_list() const { return n_flush_list_; }
/** current statistics; protected by mutex */
buf_pool_stat_t stat;
/** old statistics; protected by mutex */
buf_pool_stat_t old_stat;
/** @name General fields */
/* @{ */
@ -1704,11 +1700,12 @@ public:
buf_buddy_stat_t buddy_stat[BUF_BUDDY_SIZES_MAX + 1];
/*!< Statistics of buddy system,
indexed by block size */
buf_pool_stat_t stat; /*!< current statistics */
buf_pool_stat_t old_stat; /*!< old statistics */
/* @} */
/** number of index page splits */
Atomic_counter<ulint> pages_split;
/** @name Page flushing algorithm fields */
/* @{ */
@ -1717,7 +1714,10 @@ public:
alignas(CPU_LEVEL1_DCACHE_LINESIZE) mysql_mutex_t flush_list_mutex;
/** "hazard pointer" for flush_list scans; protected by flush_list_mutex */
FlushHp flush_hp;
/** modified blocks (a subset of LRU) */
/** flush_list size in bytes; protected by flush_list_mutex */
ulint flush_list_bytes;
/** possibly modified persistent pages (a subset of LRU);
buf_dblwr.pending_writes() is approximately COUNT(is_write_fixed()) */
UT_LIST_BASE_NODE_T(buf_page_t) flush_list;
/** number of blocks ever added to flush_list;
sometimes protected by flush_list_mutex */
@ -1726,28 +1726,70 @@ public:
TPOOL_SUPPRESS_TSAN void add_flush_list_requests(size_t size)
{ ut_ad(size); flush_list_requests+= size; }
private:
/** whether the page cleaner needs wakeup from indefinite sleep */
bool page_cleaner_is_idle;
static constexpr unsigned PAGE_CLEANER_IDLE= 1;
static constexpr unsigned FLUSH_LIST_ACTIVE= 2;
static constexpr unsigned LRU_FLUSH= 4;
/** Number of pending LRU flush * LRU_FLUSH +
PAGE_CLEANER_IDLE + FLUSH_LIST_ACTIVE flags */
unsigned page_cleaner_status;
/** track server activity count for signaling idle flushing */
ulint last_activity_count;
public:
/** signalled to wake up the page_cleaner; protected by flush_list_mutex */
pthread_cond_t do_flush_list;
/** broadcast when !n_flush(); protected by flush_list_mutex */
pthread_cond_t done_flush_LRU;
/** broadcast when a batch completes; protected by flush_list_mutex */
pthread_cond_t done_flush_list;
/** @return number of pending LRU flush */
unsigned n_flush() const
{
mysql_mutex_assert_owner(&flush_list_mutex);
return page_cleaner_status / LRU_FLUSH;
}
/** Increment the number of pending LRU flush */
inline void n_flush_inc();
/** Decrement the number of pending LRU flush */
inline void n_flush_dec();
/** @return whether flush_list flushing is active */
bool flush_list_active() const
{
mysql_mutex_assert_owner(&flush_list_mutex);
return page_cleaner_status & FLUSH_LIST_ACTIVE;
}
void flush_list_set_active()
{
ut_ad(!flush_list_active());
page_cleaner_status+= FLUSH_LIST_ACTIVE;
}
void flush_list_set_inactive()
{
ut_ad(flush_list_active());
page_cleaner_status-= FLUSH_LIST_ACTIVE;
}
/** @return whether the page cleaner must sleep due to being idle */
bool page_cleaner_idle() const noexcept
{
mysql_mutex_assert_owner(&flush_list_mutex);
return page_cleaner_is_idle;
return page_cleaner_status & PAGE_CLEANER_IDLE;
}
/** Wake up the page cleaner if needed */
void page_cleaner_wakeup();
/** Wake up the page cleaner if needed.
@param for_LRU whether to wake up for LRU eviction */
void page_cleaner_wakeup(bool for_LRU= false);
/** Register whether an explicit wakeup of the page cleaner is needed */
void page_cleaner_set_idle(bool deep_sleep)
{
mysql_mutex_assert_owner(&flush_list_mutex);
page_cleaner_is_idle= deep_sleep;
page_cleaner_status= (page_cleaner_status & ~PAGE_CLEANER_IDLE) |
(PAGE_CLEANER_IDLE * deep_sleep);
}
/** Update server last activity count */
@ -1757,9 +1799,6 @@ public:
last_activity_count= activity_count;
}
// n_flush_LRU() + n_flush_list()
// is approximately COUNT(is_write_fixed()) in flush_list
unsigned freed_page_clock;/*!< a sequence number used
to count the number of buffer
blocks removed from the end of
@ -1769,16 +1808,10 @@ public:
to read this for heuristic
purposes without holding any
mutex or latch */
bool try_LRU_scan; /*!< Cleared when an LRU
scan for free block fails. This
flag is used to avoid repeated
scans of LRU list when we know
that there is no free block
available in the scan depth for
eviction. Set whenever
we flush a batch from the
buffer pool. Protected by the
buf_pool.mutex */
/** Cleared when buf_LRU_get_free_block() fails.
Set whenever the free list grows, along with a broadcast of done_free.
Protected by buf_pool.mutex. */
Atomic_relaxed<bool> try_LRU_scan;
/* @} */
/** @name LRU replacement algorithm fields */
@ -1787,7 +1820,8 @@ public:
UT_LIST_BASE_NODE_T(buf_page_t) free;
/*!< base node of the free
block list */
/** signaled each time when the free list grows; protected by mutex */
/** broadcast each time when the free list grows or try_LRU_scan is set;
protected by mutex */
pthread_cond_t done_free;
UT_LIST_BASE_NODE_T(buf_page_t) withdraw;
@ -1847,29 +1881,16 @@ public:
{
if (n_pend_reads)
return true;
mysql_mutex_lock(&mutex);
const bool any_pending{n_flush_LRU_ || n_flush_list_};
mysql_mutex_unlock(&mutex);
mysql_mutex_lock(&flush_list_mutex);
const bool any_pending= page_cleaner_status > PAGE_CLEANER_IDLE ||
buf_dblwr.pending_writes();
mysql_mutex_unlock(&flush_list_mutex);
return any_pending;
}
/** @return total amount of pending I/O */
ulint io_pending() const
{
return n_pend_reads + n_flush_LRU() + n_flush_list();
}
private:
/** Remove a block from the flush list. */
inline void delete_from_flush_list_low(buf_page_t *bpage) noexcept;
/** Remove a block from flush_list.
@param bpage buffer pool page
@param clear whether to invoke buf_page_t::clear_oldest_modification() */
void delete_from_flush_list(buf_page_t *bpage, bool clear) noexcept;
public:
/** Remove a block from flush_list.
@param bpage buffer pool page */
void delete_from_flush_list(buf_page_t *bpage) noexcept
{ delete_from_flush_list(bpage, true); }
void delete_from_flush_list(buf_page_t *bpage) noexcept;
/** Prepare to insert a modified blcok into flush_list.
@param lsn start LSN of the mini-transaction
@ -1884,7 +1905,7 @@ public:
lsn_t lsn) noexcept;
/** Free a page whose underlying file page has been freed. */
inline void release_freed_page(buf_page_t *bpage) noexcept;
ATTRIBUTE_COLD void release_freed_page(buf_page_t *bpage) noexcept;
private:
/** Temporary memory for page_compressed and encrypted I/O */
@ -1895,34 +1916,12 @@ private:
/** array of slots */
buf_tmp_buffer_t *slots;
void create(ulint n_slots)
{
this->n_slots= n_slots;
slots= static_cast<buf_tmp_buffer_t*>
(ut_malloc_nokey(n_slots * sizeof *slots));
memset((void*) slots, 0, n_slots * sizeof *slots);
}
void create(ulint n_slots);
void close()
{
for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++)
{
aligned_free(s->crypt_buf);
aligned_free(s->comp_buf);
}
ut_free(slots);
slots= nullptr;
n_slots= 0;
}
void close();
/** Reserve a buffer */
buf_tmp_buffer_t *reserve()
{
for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++)
if (s->acquire())
return s;
return nullptr;
}
buf_tmp_buffer_t *reserve();
} io_buf;
/** whether resize() is in the critical path */
@ -2011,7 +2010,10 @@ inline void buf_page_t::set_oldest_modification(lsn_t lsn)
/** Clear oldest_modification after removing from buf_pool.flush_list */
inline void buf_page_t::clear_oldest_modification()
{
mysql_mutex_assert_owner(&buf_pool.flush_list_mutex);
#ifdef SAFE_MUTEX
if (oldest_modification() != 2)
mysql_mutex_assert_owner(&buf_pool.flush_list_mutex);
#endif /* SAFE_MUTEX */
ut_d(const auto s= state());
ut_ad(s >= REMOVE_HASH);
ut_ad(oldest_modification());
@ -2023,17 +2025,6 @@ inline void buf_page_t::clear_oldest_modification()
oldest_modification_.store(0, std::memory_order_release);
}
/** @return whether the block is modified and ready for flushing */
inline bool buf_page_t::ready_for_flush() const
{
mysql_mutex_assert_owner(&buf_pool.mutex);
ut_ad(in_LRU_list);
const auto s= state();
ut_a(s >= FREED);
ut_ad(!fsp_is_system_temporary(id().space()) || oldest_modification() == 2);
return s < READ_FIX;
}
/** @return whether the block can be relocated in memory.
The block can be dirty, but it must not be I/O-fixed or bufferfixed. */
inline bool buf_page_t::can_relocate() const

View file

@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2020, MariaDB Corporation.
Copyright (c) 2017, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -54,9 +54,9 @@ class buf_dblwr_t
};
/** the page number of the first doublewrite block (block_size() pages) */
page_id_t block1= page_id_t(0, 0);
page_id_t block1{0, 0};
/** the page number of the second doublewrite block (block_size() pages) */
page_id_t block2= page_id_t(0, 0);
page_id_t block2{0, 0};
/** mutex protecting the data members below */
mysql_mutex_t mutex;
@ -72,11 +72,15 @@ class buf_dblwr_t
ulint writes_completed;
/** number of pages written by flush_buffered_writes_completed() */
ulint pages_written;
/** condition variable for !writes_pending */
pthread_cond_t write_cond;
/** number of pending page writes */
size_t writes_pending;
slot slots[2];
slot *active_slot= &slots[0];
slot *active_slot;
/** Initialize the doublewrite buffer data structure.
/** Initialise the persistent storage of the doublewrite buffer.
@param header doublewrite page header in the TRX_SYS page */
inline void init(const byte *header);
@ -84,6 +88,8 @@ class buf_dblwr_t
bool flush_buffered_writes(const ulint size);
public:
/** Initialise the doublewrite buffer data structures. */
void init();
/** Create or restore the doublewrite buffer in the TRX_SYS page.
@return whether the operation succeeded */
bool create();
@ -118,7 +124,7 @@ public:
void recover();
/** Update the doublewrite buffer on data page write completion. */
void write_completed();
void write_completed(bool with_doublewrite);
/** Flush possible buffered writes to persistent storage.
It is very important to call this function after a batch of writes has been
posted, and also when we may have to wait for a page latch!
@ -137,14 +143,14 @@ public:
@param size payload size in bytes */
void add_to_batch(const IORequest &request, size_t size);
/** Determine whether the doublewrite buffer is initialized */
bool is_initialised() const
/** Determine whether the doublewrite buffer has been created */
bool is_created() const
{ return UNIV_LIKELY(block1 != page_id_t(0, 0)); }
/** @return whether a page identifier is part of the doublewrite buffer */
bool is_inside(const page_id_t id) const
{
if (!is_initialised())
if (!is_created())
return false;
ut_ad(block1 < block2);
if (id < block1)
@ -156,13 +162,44 @@ public:
/** Wait for flush_buffered_writes() to be fully completed */
void wait_flush_buffered_writes()
{
if (is_initialised())
{
mysql_mutex_lock(&mutex);
while (batch_running)
my_cond_wait(&cond, &mutex.m_mutex);
mysql_mutex_unlock(&mutex);
}
mysql_mutex_lock(&mutex);
while (batch_running)
my_cond_wait(&cond, &mutex.m_mutex);
mysql_mutex_unlock(&mutex);
}
/** Register an unbuffered page write */
void add_unbuffered()
{
mysql_mutex_lock(&mutex);
writes_pending++;
mysql_mutex_unlock(&mutex);
}
size_t pending_writes()
{
mysql_mutex_lock(&mutex);
const size_t pending{writes_pending};
mysql_mutex_unlock(&mutex);
return pending;
}
/** Wait for writes_pending to reach 0 */
void wait_for_page_writes()
{
mysql_mutex_lock(&mutex);
while (writes_pending)
my_cond_wait(&write_cond, &mutex.m_mutex);
mysql_mutex_unlock(&mutex);
}
/** Wait for writes_pending to reach 0 */
void wait_for_page_writes(const timespec &abstime)
{
mysql_mutex_lock(&mutex);
while (writes_pending)
my_cond_timedwait(&write_cond, &mutex.m_mutex, &abstime);
mysql_mutex_unlock(&mutex);
}
};

View file

@ -30,10 +30,8 @@ Created 11/5/1995 Heikki Tuuri
#include "log0log.h"
#include "buf0buf.h"
/** Number of pages flushed. Protected by buf_pool.mutex. */
extern ulint buf_flush_page_count;
/** Number of pages flushed via LRU. Protected by buf_pool.mutex.
Also included in buf_flush_page_count. */
Also included in buf_pool.stat.n_pages_written. */
extern ulint buf_lru_flush_page_count;
/** Number of pages freed without flushing. Protected by buf_pool.mutex. */
extern ulint buf_lru_freed_page_count;
@ -86,15 +84,18 @@ buf_flush_init_for_writing(
bool buf_flush_list_space(fil_space_t *space, ulint *n_flushed= nullptr)
MY_ATTRIBUTE((warn_unused_result));
/** Write out dirty blocks from buf_pool.LRU.
/** Write out dirty blocks from buf_pool.LRU,
and move clean blocks to buf_pool.free.
The caller must invoke buf_dblwr.flush_buffered_writes()
after releasing buf_pool.mutex.
@param max_n wished maximum mumber of blocks flushed
@return the number of processed pages
@param evict whether to evict pages after flushing
@return evict ? number of processed pages : number of pages written
@retval 0 if a buf_pool.LRU batch is already running */
ulint buf_flush_LRU(ulint max_n);
ulint buf_flush_LRU(ulint max_n, bool evict);
/** Wait until a flush batch ends.
@param lru true=buf_pool.LRU; false=buf_pool.flush_list */
void buf_flush_wait_batch_end(bool lru);
/** Wait until a LRU flush batch ends. */
void buf_flush_wait_LRU_batch_end();
/** Wait until all persistent pages are flushed up to a limit.
@param sync_lsn buf_pool.get_oldest_modification(LSN_MAX) to wait for */
ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn);
@ -106,9 +107,6 @@ ATTRIBUTE_COLD void buf_flush_ahead(lsn_t lsn, bool furious);
/** Initialize page_cleaner. */
ATTRIBUTE_COLD void buf_flush_page_cleaner_init();
/** Wait for pending flushes to complete. */
void buf_flush_wait_batch_end_acquiring_mutex(bool lru);
/** Flush the buffer pool on shutdown. */
ATTRIBUTE_COLD void buf_flush_buffer_pool();

View file

@ -33,10 +33,11 @@ Created 11/5/1995 Heikki Tuuri
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
released by the i/o-handler thread.
@param[in] page_id page id
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@retval DB_SUCCESS if the page was read and is not corrupted,
@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
@param page_id page id
@param zip_size ROW_FORMAT=COMPRESSED page size, or 0
@retval DB_SUCCESS if the page was read and is not corrupted
@retval DB_SUCCESS_LOCKED_REC if the page was not read
@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted
@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
after decryption normal page checksum does not match.
@retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */

View file

@ -898,11 +898,13 @@ public:
@param purpose tablespace purpose
@param crypt_data encryption information
@param mode encryption mode
@param opened true if space files are opened
@return pointer to created tablespace, to be filled in with add()
@retval nullptr on failure (such as when the same tablespace exists) */
static fil_space_t *create(uint32_t id, uint32_t flags,
fil_type_t purpose, fil_space_crypt_t *crypt_data,
fil_encryption_t mode= FIL_ENCRYPTION_DEFAULT);
fil_encryption_t mode= FIL_ENCRYPTION_DEFAULT,
bool opened= false);
MY_ATTRIBUTE((warn_unused_result))
/** Acquire a tablespace reference.
@ -1107,7 +1109,7 @@ private:
inline bool fil_space_t::use_doublewrite() const
{
return !UT_LIST_GET_FIRST(chain)->atomic_write && srv_use_doublewrite_buf &&
buf_dblwr.is_initialised();
buf_dblwr.is_created();
}
inline void fil_space_t::set_imported()
@ -1384,6 +1386,11 @@ struct fil_system_t
private:
bool m_initialised;
/** Points to the last opened space in space_list. Protected with
fil_system.mutex. */
fil_space_t *space_list_last_opened= nullptr;
#ifdef __linux__
/** available block devices that reside on non-rotational storage */
std::vector<dev_t> ssd;
@ -1425,7 +1432,8 @@ public:
/** nonzero if fil_node_open_file_low() should avoid moving the tablespace
to the end of space_list, for FIFO policy of try_to_close() */
ulint freeze_space_list;
/** list of all tablespaces */
/** List of all file spaces, opened spaces should be at the top of the list
to optimize try_to_close() execution. Protected with fil_system.mutex. */
ilist<fil_space_t, space_list_tag_t> space_list;
/** list of all tablespaces for which a FILE_MODIFY record has been written
since the latest redo log checkpoint.
@ -1440,6 +1448,49 @@ public:
potential space_id reuse */
bool space_id_reuse_warned;
/** Add the file to the end of opened spaces list in
fil_system.space_list, so that fil_space_t::try_to_close() should close
it as a last resort.
@param space space to add */
void add_opened_last_to_space_list(fil_space_t *space);
/** Move the file to the end of opened spaces list in
fil_system.space_list, so that fil_space_t::try_to_close() should close
it as a last resort.
@param space space to move */
inline void move_opened_last_to_space_list(fil_space_t *space)
{
/* In the case when several files of the same space are added in a
row, there is no need to remove and add a space to the same position
in space_list. It can be for system or temporary tablespaces. */
if (freeze_space_list || space_list_last_opened == space)
return;
space_list.erase(space_list_t::iterator(space));
add_opened_last_to_space_list(space);
}
/** Move closed file last in fil_system.space_list, so that
fil_space_t::try_to_close() iterates opened files first in FIFO order,
i.e. first opened, first closed.
@param space space to move */
void move_closed_last_to_space_list(fil_space_t *space)
{
if (UNIV_UNLIKELY(freeze_space_list))
return;
space_list_t::iterator s= space_list_t::iterator(space);
if (space_list_last_opened == space)
{
space_list_t::iterator prev= s;
space_list_last_opened= &*--prev;
}
space_list.erase(s);
space_list.push_back(*space);
}
/** Return the next tablespace from default_encrypt_tables list.
@param space previous tablespace (nullptr to start from the start)
@param recheck whether the removal condition needs to be rechecked after

View file

@ -342,6 +342,9 @@ public:
/** Upgrade U locks on a block to X */
void page_lock_upgrade(const buf_block_t &block);
/** Upgrade index U lock to X */
ATTRIBUTE_COLD void index_lock_upgrade();
/** Check if we are holding tablespace latch
@param space tablespace to search for
@return whether space.latch is being held */

View file

@ -85,11 +85,6 @@ struct srv_stats_t
/** Count the amount of data written in total (in bytes) */
ulint_ctr_1_t data_written;
/** Number of buffer pool reads that led to the reading of
a disk page */
ulint_ctr_1_t buf_pool_reads;
/** Number of bytes saved by page compression */
ulint_ctr_n_t page_compression_saved;
/* Number of pages compressed with page compression */
@ -649,23 +644,11 @@ struct export_var_t{
char innodb_buffer_pool_resize_status[512];/*!< Buf pool resize status */
my_bool innodb_buffer_pool_load_incomplete;/*!< Buf pool load incomplete */
ulint innodb_buffer_pool_pages_total; /*!< Buffer pool size */
ulint innodb_buffer_pool_pages_data; /*!< Data pages */
ulint innodb_buffer_pool_bytes_data; /*!< File bytes used */
ulint innodb_buffer_pool_pages_dirty; /*!< Dirty data pages */
ulint innodb_buffer_pool_bytes_dirty; /*!< File bytes modified */
ulint innodb_buffer_pool_pages_misc; /*!< Miscellanous pages */
ulint innodb_buffer_pool_pages_free; /*!< Free pages */
#ifdef UNIV_DEBUG
ulint innodb_buffer_pool_pages_latched; /*!< Latched pages */
#endif /* UNIV_DEBUG */
ulint innodb_buffer_pool_pages_made_not_young;
ulint innodb_buffer_pool_pages_made_young;
ulint innodb_buffer_pool_pages_old;
ulint innodb_buffer_pool_read_requests; /*!< buf_pool.stat.n_page_gets */
ulint innodb_buffer_pool_reads; /*!< srv_buf_pool_reads */
ulint innodb_buffer_pool_read_ahead_rnd;/*!< srv_read_ahead_rnd */
ulint innodb_buffer_pool_read_ahead; /*!< srv_read_ahead */
ulint innodb_buffer_pool_read_ahead_evicted;/*!< srv_read_ahead evicted*/
ulint innodb_checkpoint_age;
ulint innodb_checkpoint_max_age;
ulint innodb_data_pending_reads; /*!< Pending reads */

View file

@ -924,14 +924,19 @@ public:
/**
Determine if the specified transaction or any older one might be active.
@param caller_trx used to get/set pins
@param trx current transaction
@param id transaction identifier
@return whether any transaction not newer than id might be active
*/
bool find_same_or_older(trx_t *caller_trx, trx_id_t id)
bool find_same_or_older(trx_t *trx, trx_id_t id)
{
return rw_trx_hash.iterate(caller_trx, find_same_or_older_callback, &id);
if (trx->max_inactive_id >= id)
return false;
bool found= rw_trx_hash.iterate(trx, find_same_or_older_callback, &id);
if (!found)
trx->max_inactive_id= id;
return found;
}

View file

@ -642,6 +642,10 @@ public:
Cleared in commit_in_memory() after commit_state(),
trx_sys_t::deregister_rw(), release_locks(). */
trx_id_t id;
/** The largest encountered transaction identifier for which no
transaction was observed to be active. This is a cache to speed up
trx_sys_t::find_same_or_older(). */
trx_id_t max_inactive_id;
private:
/** mutex protecting state and some of lock

View file

@ -1064,13 +1064,16 @@ lock_sec_rec_some_has_impl(
const trx_id_t max_trx_id= page_get_max_trx_id(page_align(rec));
if ((caller_trx->id > max_trx_id &&
!trx_sys.find_same_or_older(caller_trx, max_trx_id)) ||
/* Note: It is possible to have caller_trx->id == 0 in a locking read
if caller_trx has not modified any persistent tables. */
if (!trx_sys.find_same_or_older(caller_trx, max_trx_id) ||
!lock_check_trx_id_sanity(max_trx_id, rec, index, offsets))
return nullptr;
/* In this case it is possible that some transaction has an implicit
x-lock. We have to look in the clustered index. */
/* We checked above that some active (or XA PREPARE) transaction exists
that is older than PAGE_MAX_TRX_ID. That is, some transaction may be
holding an implicit lock on the record. We have to look up the
clustered index record to find if it is (or was) the case. */
return row_vers_impl_x_locked(caller_trx, rec, index, offsets);
}
@ -5157,20 +5160,24 @@ has an implicit lock on the record. The transaction instance must have a
reference count > 0 so that it can't be committed and freed before this
function has completed. */
static
void
bool
lock_rec_convert_impl_to_expl_for_trx(
/*==================================*/
trx_t* trx, /*!< in/out: active transaction */
const page_id_t id, /*!< in: page identifier */
const rec_t* rec, /*!< in: user record on page */
dict_index_t* index, /*!< in: index of record */
trx_t* trx, /*!< in/out: active transaction */
ulint heap_no)/*!< in: rec heap number to lock */
dict_index_t* index) /*!< in: index of record */
{
if (!trx)
return false;
ut_ad(trx->is_referenced());
ut_ad(page_rec_is_leaf(rec));
ut_ad(!rec_is_metadata(rec, *index));
DEBUG_SYNC_C("before_lock_rec_convert_impl_to_expl_for_trx");
ulint heap_no= page_rec_get_heap_no(rec);
{
LockGuard g{lock_sys.rec_hash, id};
trx->mutex_lock();
@ -5187,6 +5194,7 @@ lock_rec_convert_impl_to_expl_for_trx(
trx->release_reference();
DEBUG_SYNC_C("after_lock_rec_convert_impl_to_expl_for_trx");
return false;
}
@ -5260,7 +5268,6 @@ static void lock_rec_other_trx_holds_expl(trx_t *caller_trx, trx_t *trx,
}
#endif /* UNIV_DEBUG */
/** If an implicit x-lock exists on a record, convert it to an explicit one.
Often, this is called by a transaction that is about to enter a lock wait
@ -5272,12 +5279,14 @@ This may also be called by the same transaction that is already holding
an implicit exclusive lock on the record. In this case, no explicit lock
should be created.
@tparam is_primary whether the index is the primary key
@param[in,out] caller_trx current transaction
@param[in] id index tree leaf page identifier
@param[in] rec record on the leaf page
@param[in] index the index of the record
@param[in] offsets rec_get_offsets(rec,index)
@return whether caller_trx already holds an exclusive lock on rec */
template<bool is_primary>
static
bool
lock_rec_convert_impl_to_expl(
@ -5295,8 +5304,9 @@ lock_rec_convert_impl_to_expl(
ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
ut_ad(page_rec_is_leaf(rec));
ut_ad(!rec_is_metadata(rec, *index));
ut_ad(index->is_primary() == is_primary);
if (dict_index_is_clust(index)) {
if (is_primary) {
trx_id_t trx_id;
trx_id = lock_clust_rec_some_has_impl(rec, index, offsets);
@ -5322,20 +5332,7 @@ lock_rec_convert_impl_to_expl(
ut_d(lock_rec_other_trx_holds_expl(caller_trx, trx, rec, id));
}
if (trx) {
ulint heap_no = page_rec_get_heap_no(rec);
ut_ad(trx->is_referenced());
/* If the transaction is still active and has no
explicit x-lock set on the record, set one for it.
trx cannot be committed until the ref count is zero. */
lock_rec_convert_impl_to_expl_for_trx(
id, rec, index, trx, heap_no);
}
return false;
return lock_rec_convert_impl_to_expl_for_trx(trx, id, rec, index);
}
/*********************************************************************//**
@ -5374,8 +5371,9 @@ lock_clust_rec_modify_check_and_lock(
/* If a transaction has no explicit x-lock set on the record, set one
for it */
if (lock_rec_convert_impl_to_expl(thr_get_trx(thr), block->page.id(),
rec, index, offsets)) {
if (lock_rec_convert_impl_to_expl<true>(thr_get_trx(thr),
block->page.id(),
rec, index, offsets)) {
/* We already hold an implicit exclusive lock. */
return DB_SUCCESS;
}
@ -5532,15 +5530,17 @@ lock_sec_rec_read_check_and_lock(
return(DB_SUCCESS);
}
const page_id_t id{block->page.id()};
ut_ad(!rec_is_metadata(rec, *index));
trx_t *trx = thr_get_trx(thr);
if (lock_table_has(trx, index->table, mode)) {
return DB_SUCCESS;
}
if (!page_rec_is_supremum(rec)
&& !lock_table_has(trx, index->table, LOCK_X)
&& lock_rec_convert_impl_to_expl(thr_get_trx(thr), id, rec,
index, offsets)
&& lock_rec_convert_impl_to_expl<false>(
trx, block->page.id(), rec, index, offsets)
&& gap_mode == LOCK_REC_NOT_GAP) {
/* We already hold an implicit exclusive lock. */
return DB_SUCCESS;
@ -5565,7 +5565,8 @@ lock_sec_rec_read_check_and_lock(
if (trx->wsrep == 3) trx->wsrep = 1;
#endif /* WITH_WSREP */
ut_ad(lock_rec_queue_validate(false, id, rec, index, offsets));
ut_ad(lock_rec_queue_validate(false, block->page.id(),
rec, index, offsets));
return(err);
}
@ -5622,7 +5623,8 @@ lock_clust_rec_read_check_and_lock(
trx_t *trx = thr_get_trx(thr);
if (!lock_table_has(trx, index->table, LOCK_X)
&& heap_no != PAGE_HEAP_NO_SUPREMUM
&& lock_rec_convert_impl_to_expl(trx, id, rec, index, offsets)
&& lock_rec_convert_impl_to_expl<true>(trx, id,
rec, index, offsets)
&& gap_mode == LOCK_REC_NOT_GAP) {
/* We already hold an implicit exclusive lock. */
return DB_SUCCESS;

View file

@ -954,14 +954,6 @@ wait_suspend_loop:
if (!buf_pool.is_initialised()) {
ut_ad(!srv_was_started);
} else if (ulint pending_io = buf_pool.io_pending()) {
if (srv_print_verbose_log && count > 600) {
ib::info() << "Waiting for " << pending_io << " buffer"
" page I/Os to complete";
count = 0;
}
goto loop;
} else {
buf_flush_buffer_pool();
}

View file

@ -3093,7 +3093,7 @@ set_start_lsn:
/* The following is adapted from
buf_pool_t::insert_into_flush_list() */
mysql_mutex_lock(&buf_pool.flush_list_mutex);
buf_pool.stat.flush_list_bytes+= block->physical_size();
buf_pool.flush_list_bytes+= block->physical_size();
block->page.set_oldest_modification(start_lsn);
UT_LIST_ADD_FIRST(buf_pool.flush_list, &block->page);
buf_pool.page_cleaner_wakeup();

View file

@ -140,9 +140,9 @@ inline void buf_pool_t::insert_into_flush_list(buf_page_t *prev,
UT_LIST_REMOVE(flush_list, &block->page);
}
else
stat.flush_list_bytes+= block->physical_size();
flush_list_bytes+= block->physical_size();
ut_ad(stat.flush_list_bytes <= curr_pool_size);
ut_ad(flush_list_bytes <= curr_pool_size);
if (prev)
UT_LIST_INSERT_AFTER(flush_list, prev, &block->page);

View file

@ -217,14 +217,12 @@ rec_get_n_extern_new(
stored in one byte for 0..127. The length
will be encoded in two bytes when it is 128 or
more, or when the field is stored externally. */
if (DATA_BIG_COL(col)) {
if (len & 0x80) {
/* 1exxxxxxx xxxxxxxx */
if (len & 0x40) {
n_extern++;
}
lens--;
if (UNIV_UNLIKELY(len & 0x80) && DATA_BIG_COL(col)) {
/* 1exxxxxxx xxxxxxxx */
if (len & 0x40) {
n_extern++;
}
lens--;
}
}
} while (++i < n);
@ -244,6 +242,10 @@ enum rec_leaf_format {
REC_LEAF_INSTANT
};
#if defined __GNUC__ && !defined __clang__ && __GNUC__ < 11
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wconversion" /* GCC 5 to 10 need this */
#endif
/** Determine the offset to each field in a leaf-page record
in ROW_FORMAT=COMPACT,DYNAMIC,COMPRESSED.
This is a special case of rec_init_offsets() and rec_get_offsets_func().
@ -361,8 +363,7 @@ start:
do {
if (mblob) {
if (i == index->first_user_field()) {
offs = static_cast<rec_offs>(offs
+ FIELD_REF_SIZE);
offs += FIELD_REF_SIZE;
len = combine(offs, STORED_OFFPAGE);
any |= REC_OFFS_EXTERNAL;
field--;
@ -433,27 +434,23 @@ start:
stored in one byte for 0..127. The length
will be encoded in two bytes when it is 128 or
more, or when the field is stored externally. */
if ((len & 0x80) && DATA_BIG_COL(col)) {
if (UNIV_UNLIKELY(len & 0x80) && DATA_BIG_COL(col)) {
/* 1exxxxxxx xxxxxxxx */
len = static_cast<rec_offs>(len << 8
| *lens--);
offs = static_cast<rec_offs>(offs
+ get_value(len));
if (UNIV_UNLIKELY(len & 0x4000)) {
ut_ad(index->is_primary());
any |= REC_OFFS_EXTERNAL;
len = combine(offs, STORED_OFFPAGE);
} else {
len = offs;
}
len <<= 8;
len |= *lens--;
static_assert(STORED_OFFPAGE == 0x4000, "");
static_assert(REC_OFFS_EXTERNAL == 0x4000, "");
const rec_offs ext = len & REC_OFFS_EXTERNAL;
offs += get_value(len);
len = offs | ext;
any |= ext;
ut_ad(!ext || index->is_primary());
continue;
}
len = offs = static_cast<rec_offs>(offs + len);
len = offs += len;
} else {
len = offs = static_cast<rec_offs>(offs
+ field->fixed_len);
len = offs += field->fixed_len;
}
} while (field++, rec_offs_base(offsets)[++i] = len,
i < rec_offs_n_fields(offsets));
@ -679,8 +676,7 @@ rec_init_offsets(
do {
rec_offs len;
if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
len = offs = static_cast<rec_offs>(
offs + REC_NODE_PTR_SIZE);
len = offs += REC_NODE_PTR_SIZE;
goto resolved;
}
@ -720,29 +716,25 @@ rec_init_offsets(
encoded in two bytes when it is 128 or
more, or when the field is stored
externally. */
if (DATA_BIG_COL(col)) {
if (len & 0x80) {
/* 1exxxxxxx xxxxxxxx */
len = static_cast<rec_offs>(
len << 8 | *lens--);
if (UNIV_UNLIKELY(len & 0x80)
&& DATA_BIG_COL(col)) {
/* 1exxxxxxx xxxxxxxx */
len <<= 8;
len |= *lens--;
/* B-tree node pointers
must not contain externally
stored columns. Thus
the "e" flag must be 0. */
ut_a(!(len & 0x4000));
offs = static_cast<rec_offs>(
offs + get_value(len));
len = offs;
goto resolved;
}
/* B-tree node pointers
must not contain externally
stored columns. Thus
the "e" flag must be 0. */
ut_a(!(len & 0x4000));
offs += len & 0x3fff;
len = offs;
goto resolved;
}
len = offs = static_cast<rec_offs>(offs + len);
len = offs += len;
} else {
len = offs = static_cast<rec_offs>(
offs + field->fixed_len);
len = offs += field->fixed_len;
}
resolved:
rec_offs_base(offsets)[i + 1] = len;
@ -759,35 +751,30 @@ resolved:
rec_offs any;
if (rec_get_1byte_offs_flag(rec)) {
offs = static_cast<rec_offs>(offs + n_fields);
offs += static_cast<rec_offs>(n_fields);
any = offs;
/* Determine offsets to fields */
do {
offs = rec_1_get_field_end_info(rec, i);
if (offs & REC_1BYTE_SQL_NULL_MASK) {
offs &= static_cast<rec_offs>(
~REC_1BYTE_SQL_NULL_MASK);
set_type(offs, SQL_NULL);
offs ^= REC_1BYTE_SQL_NULL_MASK
| SQL_NULL;
}
rec_offs_base(offsets)[1 + i] = offs;
} while (++i < n);
} else {
offs = static_cast<rec_offs>(offs + 2 * n_fields);
offs += static_cast<rec_offs>(2 * n_fields);
any = offs;
/* Determine offsets to fields */
do {
offs = rec_2_get_field_end_info(rec, i);
if (offs & REC_2BYTE_SQL_NULL_MASK) {
offs &= static_cast<rec_offs>(
~REC_2BYTE_SQL_NULL_MASK);
set_type(offs, SQL_NULL);
}
if (offs & REC_2BYTE_EXTERN_MASK) {
offs &= static_cast<rec_offs>(
~REC_2BYTE_EXTERN_MASK);
set_type(offs, STORED_OFFPAGE);
any |= REC_OFFS_EXTERNAL;
}
static_assert(REC_2BYTE_SQL_NULL_MASK
== SQL_NULL, "");
static_assert(REC_2BYTE_EXTERN_MASK
== STORED_OFFPAGE, "");
static_assert(REC_OFFS_EXTERNAL
== STORED_OFFPAGE, "");
any |= (offs & REC_OFFS_EXTERNAL);
rec_offs_base(offsets)[1 + i] = offs;
} while (++i < n);
}
@ -999,8 +986,7 @@ rec_get_offsets_reverse(
do {
rec_offs len;
if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
len = offs = static_cast<rec_offs>(
offs + REC_NODE_PTR_SIZE);
len = offs += REC_NODE_PTR_SIZE;
goto resolved;
}
@ -1037,30 +1023,23 @@ rec_get_offsets_reverse(
stored in one byte for 0..127. The length
will be encoded in two bytes when it is 128 or
more, or when the field is stored externally. */
if (DATA_BIG_COL(col)) {
if (len & 0x80) {
/* 1exxxxxxx xxxxxxxx */
len = static_cast<rec_offs>(
len << 8 | *lens++);
offs = static_cast<rec_offs>(
offs + get_value(len));
if (UNIV_UNLIKELY(len & 0x4000)) {
any_ext = REC_OFFS_EXTERNAL;
len = combine(offs,
STORED_OFFPAGE);
} else {
len = offs;
}
goto resolved;
}
if (UNIV_UNLIKELY(len & 0x80) && DATA_BIG_COL(col)) {
/* 1exxxxxxx xxxxxxxx */
len &= 0x7f;
len <<= 8;
len |= *lens++;
static_assert(STORED_OFFPAGE == 0x4000, "");
static_assert(REC_OFFS_EXTERNAL == 0x4000, "");
rec_offs ext = len & REC_OFFS_EXTERNAL;
offs += get_value(len);
len = offs | ext;
any_ext |= ext;
goto resolved;
}
len = offs = static_cast<rec_offs>(offs + len);
len = offs += len;
} else {
len = offs = static_cast<rec_offs>(offs
+ field->fixed_len);
len = offs += field->fixed_len;
}
resolved:
rec_offs_base(offsets)[i + 1] = len;
@ -1100,7 +1079,7 @@ rec_get_nth_field_offs_old(
return(os);
}
next_os = next_os & ~REC_1BYTE_SQL_NULL_MASK;
next_os &= ~REC_1BYTE_SQL_NULL_MASK;
} else {
os = rec_2_get_field_start_offs(rec, n);
@ -1112,8 +1091,7 @@ rec_get_nth_field_offs_old(
return(os);
}
next_os = next_os & ~(REC_2BYTE_SQL_NULL_MASK
| REC_2BYTE_EXTERN_MASK);
next_os &= ~(REC_2BYTE_SQL_NULL_MASK | REC_2BYTE_EXTERN_MASK);
}
*len = next_os - os;
@ -1266,7 +1244,8 @@ rec_get_converted_size_comp_prefix_low(
} else if (dfield_is_ext(dfield)) {
ut_ad(DATA_BIG_COL(field->col));
extra_size += 2;
} else if (len < 128 || !DATA_BIG_COL(field->col)) {
} else if (UNIV_LIKELY(len < 128)
|| !DATA_BIG_COL(field->col)) {
extra_size++;
} else {
/* For variable-length columns, we look up the
@ -1617,14 +1596,7 @@ start:
/* set the null flag if necessary */
if (dfield_is_null(field)) {
#if defined __GNUC__ && !defined __clang__ && __GNUC__ < 6
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wconversion" /* GCC 5 may need this here */
#endif
*nulls |= static_cast<byte>(null_mask);
#if defined __GNUC__ && !defined __clang__ && __GNUC__ < 6
# pragma GCC diagnostic pop
#endif
null_mask <<= 1;
continue;
}
@ -1733,6 +1705,9 @@ rec_convert_dtuple_to_rec_new(
REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
return buf;
}
#if defined __GNUC__ && !defined __clang__ && __GNUC__ < 11
# pragma GCC diagnostic pop /* ignored "-Wconversion" */
#endif
/*********************************************************//**
Builds a physical record out of a data tuple and
@ -2095,14 +2070,12 @@ rec_copy_prefix_to_buf(
stored in one byte for 0..127. The length
will be encoded in two bytes when it is 128 or
more, or when the column is stored externally. */
if (DATA_BIG_COL(col)) {
if (len & 0x80) {
/* 1exxxxxx */
len &= 0x3f;
len <<= 8;
len |= *lens--;
UNIV_PREFETCH_R(lens);
}
if (UNIV_UNLIKELY(len & 0x80) && DATA_BIG_COL(col)) {
/* 1exxxxxx */
len &= 0x3f;
len <<= 8;
len |= *lens--;
UNIV_PREFETCH_R(lens);
}
prefix_len += len;
}

View file

@ -3078,6 +3078,9 @@ row_log_apply_op_low(
mtr_start(&mtr);
index->set_modified(mtr);
cursor.page_cur.index = index;
if (has_index_lock) {
mtr_x_lock_index(index, &mtr);
}
/* We perform the pessimistic variant of the operations if we
already hold index->lock exclusively. First, search the
@ -3085,7 +3088,8 @@ row_log_apply_op_low(
depending on when the row in the clustered index was
scanned. */
*error = cursor.search_leaf(entry, PAGE_CUR_LE, has_index_lock
? BTR_MODIFY_TREE : BTR_MODIFY_LEAF, &mtr);
? BTR_MODIFY_TREE_ALREADY_LATCHED
: BTR_MODIFY_LEAF, &mtr);
if (UNIV_UNLIKELY(*error != DB_SUCCESS)) {
goto func_exit;
}

View file

@ -871,7 +871,7 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_DEFAULT_START, MONITOR_MODULE_INDEX},
{"index_page_splits", "index", "Number of index page splits",
MONITOR_NONE,
MONITOR_EXISTING,
MONITOR_DEFAULT_START, MONITOR_INDEX_SPLIT},
{"index_page_merge_attempts", "index",
@ -1374,10 +1374,12 @@ srv_mon_process_existing_counter(
/* Get the value from corresponding global variable */
switch (monitor_id) {
/* export_vars.innodb_buffer_pool_reads. Num Reads from
disk (page not in buffer) */
case MONITOR_INDEX_SPLIT:
value = buf_pool.pages_split;
break;
case MONITOR_OVLD_BUF_POOL_READS:
value = srv_stats.buf_pool_reads;
value = buf_pool.stat.n_pages_read;
break;
/* innodb_buffer_pool_read_requests, the number of logical
@ -1438,7 +1440,7 @@ srv_mon_process_existing_counter(
/* innodb_buffer_pool_bytes_dirty */
case MONITOR_OVLD_BUF_POOL_BYTES_DIRTY:
value = buf_pool.stat.flush_list_bytes;
value = buf_pool.flush_list_bytes;
break;
/* innodb_buffer_pool_pages_free */

View file

@ -669,6 +669,7 @@ void srv_boot()
if (transactional_lock_enabled())
sql_print_information("InnoDB: Using transactional memory");
#endif
buf_dblwr.init();
srv_thread_pool_init();
trx_pool_init();
srv_init();
@ -987,56 +988,19 @@ srv_export_innodb_status(void)
export_vars.innodb_data_writes = os_n_file_writes;
ulint dblwr = 0;
if (buf_dblwr.is_initialised()) {
buf_dblwr.lock();
dblwr = buf_dblwr.submitted();
export_vars.innodb_dblwr_pages_written = buf_dblwr.written();
export_vars.innodb_dblwr_writes = buf_dblwr.batches();
buf_dblwr.unlock();
}
buf_dblwr.lock();
ulint dblwr = buf_dblwr.submitted();
export_vars.innodb_dblwr_pages_written = buf_dblwr.written();
export_vars.innodb_dblwr_writes = buf_dblwr.batches();
buf_dblwr.unlock();
export_vars.innodb_data_written = srv_stats.data_written + dblwr;
export_vars.innodb_buffer_pool_read_requests
= buf_pool.stat.n_page_gets;
export_vars.innodb_buffer_pool_reads = srv_stats.buf_pool_reads;
export_vars.innodb_buffer_pool_read_ahead_rnd =
buf_pool.stat.n_ra_pages_read_rnd;
export_vars.innodb_buffer_pool_read_ahead =
buf_pool.stat.n_ra_pages_read;
export_vars.innodb_buffer_pool_read_ahead_evicted =
buf_pool.stat.n_ra_pages_evicted;
export_vars.innodb_buffer_pool_pages_data =
UT_LIST_GET_LEN(buf_pool.LRU);
export_vars.innodb_buffer_pool_bytes_data =
buf_pool.stat.LRU_bytes
+ (UT_LIST_GET_LEN(buf_pool.unzip_LRU)
<< srv_page_size_shift);
export_vars.innodb_buffer_pool_pages_dirty =
UT_LIST_GET_LEN(buf_pool.flush_list);
export_vars.innodb_buffer_pool_pages_made_young
= buf_pool.stat.n_pages_made_young;
export_vars.innodb_buffer_pool_pages_made_not_young
= buf_pool.stat.n_pages_not_made_young;
export_vars.innodb_buffer_pool_pages_old = buf_pool.LRU_old_len;
export_vars.innodb_buffer_pool_bytes_dirty =
buf_pool.stat.flush_list_bytes;
export_vars.innodb_buffer_pool_pages_free =
UT_LIST_GET_LEN(buf_pool.free);
#ifdef UNIV_DEBUG
export_vars.innodb_buffer_pool_pages_latched =
buf_get_latched_pages_number();

View file

@ -454,7 +454,8 @@ err_exit:
fil_set_max_space_id_if_bigger(space_id);
fil_space_t *space= fil_space_t::create(space_id, fsp_flags,
FIL_TYPE_TABLESPACE, NULL);
FIL_TYPE_TABLESPACE, nullptr,
FIL_ENCRYPTION_DEFAULT, true);
ut_a(fil_validate());
ut_a(space);
@ -800,9 +801,7 @@ static lsn_t srv_prepare_to_delete_redo_log_file()
{
DBUG_ENTER("srv_prepare_to_delete_redo_log_file");
/* Disable checkpoints in the page cleaner. */
ut_ad(!recv_sys.recovery_on);
recv_sys.recovery_on= true;
ut_ad(recv_sys.recovery_on);
/* Clean the buffer pool. */
buf_flush_sync();
@ -1342,8 +1341,6 @@ dberr_t srv_start(bool create_new_db)
}
}
recv_sys.debug_free();
if (srv_operation != SRV_OPERATION_NORMAL) {
ut_ad(srv_operation == SRV_OPERATION_RESTORE_EXPORT
|| srv_operation == SRV_OPERATION_RESTORE);
@ -1395,6 +1392,8 @@ dberr_t srv_start(bool create_new_db)
return(srv_init_abort(err));
}
}
recv_sys.debug_free();
}
ut_ad(err == DB_SUCCESS);
@ -1715,12 +1714,12 @@ void innodb_shutdown()
ut_ad(dict_sys.is_initialised() || !srv_was_started);
ut_ad(trx_sys.is_initialised() || !srv_was_started);
ut_ad(buf_dblwr.is_initialised() || !srv_was_started
ut_ad(buf_dblwr.is_created() || !srv_was_started
|| srv_read_only_mode
|| srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
ut_ad(lock_sys.is_initialised() || !srv_was_started);
ut_ad(log_sys.is_initialised() || !srv_was_started);
ut_ad(ibuf.index || !srv_was_started
ut_ad(ibuf.index || !innodb_change_buffering || !srv_was_started
|| srv_force_recovery >= SRV_FORCE_NO_DDL_UNDO);
dict_stats_deinit();

View file

@ -404,6 +404,7 @@ void trx_t::free()
sizeof skip_lock_inheritance_and_n_ref);
/* do not poison mutex */
MEM_NOACCESS(&id, sizeof id);
MEM_NOACCESS(&max_inactive_id, sizeof id);
MEM_NOACCESS(&state, sizeof state);
MEM_NOACCESS(&is_recovered, sizeof is_recovered);
#ifdef WITH_WSREP

View file

@ -174,7 +174,7 @@ compress_pages_page_decompressed compression 0 NULL NULL NULL 0 NULL NULL NULL N
compress_pages_page_compression_error compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of page compression errors
compress_pages_encrypted compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of pages encrypted
compress_pages_decrypted compression 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of pages decrypted
index_page_splits index 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of index page splits
index_page_splits index 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 status_counter Number of index page splits
index_page_merge_attempts index 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of index page merge attempts
index_page_merge_successful index 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of successful index page merges
index_page_reorg_attempts index 0 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL 0 counter Number of index page reorganization attempts